Commit 50852902 by Zhihong Ma

fix : new mzh : PTQ ResNet18/50/152 old mzh: saved & reconstructed

parent 62550290
...@@ -3,15 +3,16 @@ import os ...@@ -3,15 +3,16 @@ import os
# 从get_param.py输出重定向文件val.txt中提取参数量和计算量 # 从get_param.py输出重定向文件val.txt中提取参数量和计算量
def extract_ratio(): def extract_ratio(md='ResNet18'):
fr = open('param_flops.txt','r') fr = open('param_flops_' + md + '.txt','r')
lines = fr.readlines() lines = fr.readlines()
layer = [] layer = []
par_ratio = [] par_ratio = []
flop_ratio = [] flop_ratio = []
for line in lines: for line in lines:
if '(' in line and ')' in line: # if '(' in line and ')' in line:
layer.append(line.split(')')[0].split('(')[1]) if 'Conv' in line or 'BatchNorm2d' in line or 'Linear' in line:
layer.append(line.split(':')[1].split('(')[0])
r1 = line.split('%')[0].split(',')[-1] r1 = line.split('%')[0].split(',')[-1]
r1 = float(r1) r1 = float(r1)
par_ratio.append(r1) par_ratio.append(r1)
...@@ -24,6 +25,6 @@ def extract_ratio(): ...@@ -24,6 +25,6 @@ def extract_ratio():
if __name__ == "__main__": if __name__ == "__main__":
layer, par_ratio, flop_ratio = extract_ratio() layer, par_ratio, flop_ratio = extract_ratio()
print(layer) print(len(layer))
print(par_ratio) print(len(par_ratio))
print(flop_ratio) print(len(flop_ratio))
\ No newline at end of file \ No newline at end of file
from model import *
import torch
from ptflops import get_model_complexity_info
import argparse
def get_children(model: torch.nn.Module):
# get children form model!
# 为了后续也能够更新参数,需要用nn.ModuleList来承载
# children = nn.ModuleList(model.children())
# print(children)
# 方便对其中的module进行后续的更新
# flatt_children = nn.ModuleList()
children = list(model.children())
# flatt_children = nn.ModuleList()
flatt_children = []
if len(children) == 0:
# if model has no children; model is last child! :O
return model
else:
# look for children from children... to the last child!
for child in children:
try:
flatt_children.extend(get_children(child))
except TypeError:
flatt_children.append(get_children(child))
# print(flatt_children)
return flatt_children
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Model Analysis --- params & flops')
parser.add_argument('-m', '--model', metavar='MODEL ARCH', default='resnet18')
args = parser.parse_args()
if args.model == 'ResNet18':
model = resnet18()
elif args.model == 'ResNet50':
model = resnet50()
elif args.model == 'ResNet152':
model = resnet152()
full_file = 'ckpt/cifar10_' + args.model + '.pt'
model.load_state_dict(torch.load(full_file))
# flat = get_children(model)
# print(flat)
# flat = get_children(model)
# new_model = nn.Sequential(*flat)
flops, params = get_model_complexity_info(model, (3, 32, 32), as_strings=True, print_per_layer_stat=True)
import torch
import torch.nn as nn
import torch.nn.functional as F
from module import *
import module
from global_var import GlobalVariables
# 定义 ResNet 模型
# 适用于Cifar10
class ResNet(nn.Module):
def __init__(self, block, layers, num_classes=10): # 这里将类别数设置为10
super(ResNet, self).__init__()
self.inplanes = 16 # 因为 CIFAR-10 图片较小,所以开始时需要更少的通道数
GlobalVariables.SELF_INPLANES = self.inplanes
# print('resnet init:'+ str(GlobalVariables.SELF_INPLANES))
# 输入层
self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1,
bias=True)
self.bn1 = nn.BatchNorm2d(16)
self.relu = nn.ReLU()
# 残差层(4 个阶段,每个阶段包含 6n+2 个卷积层)
self.layer1 = MakeLayer(block, 16, layers[0])
self.layer2 = MakeLayer(block, 32, layers[1], stride=2)
self.layer3 = MakeLayer(block, 64, layers[2], stride=2)
self.layer4 = MakeLayer(block, 128, layers[3], stride=2)
# 分类层
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(128 * block.expansion, num_classes)
# 参数初始化
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
def forward(self, x):
# 输入层
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
# 这里相比于imagenet的,少了一个maxpool,因为cifar10本身图片就小,如果再pool就太小了
# 残差层
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
# 分类层
x = self.avgpool(x) # 输出的尺寸为 B,C,1,1
x = x.view(x.size(0), -1)
x = self.fc(x)
out = F.softmax(x,dim = 1) # 这里不softmax也行 影响不大
return out
def quantize(self, quant_type, num_bits=8, e_bits=3):
self.qconvbnrelu1 = QConvBNReLU(quant_type,self.conv1,self.bn1,qi=True,qo=True,num_bits=num_bits,e_bits=e_bits)
# 没有输入num_bits 需修改
self.layer1.quantize(quant_type=quant_type,num_bits=num_bits, e_bits=e_bits)
self.layer2.quantize(quant_type=quant_type,num_bits=num_bits, e_bits=e_bits)
self.layer3.quantize(quant_type=quant_type,num_bits=num_bits, e_bits=e_bits)
self.layer4.quantize(quant_type=quant_type,num_bits=num_bits, e_bits=e_bits)
self.qavgpool1 = QAdaptiveAvgPool2d(quant_type,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
self.qfc1 = QLinear(quant_type, self.fc,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
def quantize_forward(self, x):
# for _, layer in self.quantize_layers.items():
# x = layer(x)
# out = F.softmax(x, dim=1)
# return out
x = self.qconvbnrelu1(x)
x = self.layer1.quantize_forward(x)
x = self.layer2.quantize_forward(x)
x = self.layer3.quantize_forward(x)
x = self.layer4.quantize_forward(x)
x = self.qavgpool1(x)
x = x.view(x.size(0), -1)
x = self.qfc1(x)
out = F.softmax(x,dim = 1) # 这里不softmax也行 影响不大
return out
def freeze(self):
self.qconvbnrelu1.freeze() # 因为作为第一层是有qi的,所以freeze的时候无需再重新提供qi
qo = self.layer1.freeze(qinput = self.qconvbnrelu1.qo)
qo = self.layer2.freeze(qinput = qo)
qo = self.layer3.freeze(qinput = qo)
qo = self.layer4.freeze(qinput = qo)
self.qavgpool1.freeze(qi=qo)
self.qfc1.freeze(qi=self.qavgpool1.qo)
def fakefreeze(self):
pass
def quantize_inference(self, x):
qx = self.qconvbnrelu1.qi.quantize_tensor(x)
qx = self.qconvbnrelu1.quantize_inference(qx)
qx = self.layer1.quantize_inference(qx)
qx = self.layer2.quantize_inference(qx)
qx = self.layer3.quantize_inference(qx)
qx = self.layer4.quantize_inference(qx)
qx = self.qavgpool1.quantize_inference(qx)
qx = qx.view(qx.size(0), -1)
qx = self.qfc1.quantize_inference(qx)
qx = self.qfc1.qo.dequantize_tensor(qx)
out = F.softmax(qx,dim = 1) # 这里不softmax也行 影响不大
return out
# BasicBlock 类
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
# 第一个卷积层
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride,
padding=1, bias=True)
self.bn1 = nn.BatchNorm2d(planes)
# 第二个卷积层
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1,
padding=1, bias=True)
self.bn2 = nn.BatchNorm2d(planes)
# shortcut
self.relu = nn.ReLU()
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
identity = self.downsample(identity)
out += identity
out = self.relu(out)
return out
def quantize(self, quant_type ,num_bits=8, e_bits=3):
self.qconvbnrelu1 = QConvBNReLU(quant_type,self.conv1,self.bn1,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
self.qconvbn1 = QConvBN(quant_type,self.conv2,self.bn2,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
if self.downsample is not None:
self.qconvbn2 = QConvBN(quant_type,self.downsample[0],self.downsample[1],qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
self.qelementadd = QElementwiseAdd(quant_type,qi0=False, qi1=False, qo=True,num_bits=num_bits,e_bits=e_bits)
self.qrelu1 = QReLU(quant_type,qi= False,num_bits=num_bits,e_bits=e_bits) # 需要qi
def quantize_forward(self, x):
identity = x
out = self.qconvbnrelu1(x)
out = self.qconvbn1(out)
if self.downsample is not None:
identity = self.qconvbn2(identity)
# residual add
# out = identity + out # 这里是需要写一个elementwiseadd的变换的,待后续修改
out = self.qelementadd(out,identity)
out = self.qrelu1(out)
return out
def freeze(self, qinput):
# 这里的qconvbnrelu1其实是可以用前一层的qo的,但感觉不太好传参,就没用
# 还需仔细检查
self.qconvbnrelu1.freeze(qi= qinput) # 需要接前一个module的最后一个qo
self.qconvbn1.freeze(qi = self.qconvbnrelu1.qo)
if self.downsample is not None:
self.qconvbn2.freeze(qi = qinput) # 一条支路
self.qelementadd.freeze(qi0 = self.qconvbn1.qo, qi1 = self.qconvbn2.qo)
else:
self.qelementadd.freeze(qi0 = self.qconvbn1.qo, qi1 = qinput)
# 这里或许需要补充个层来处理elementwise add
self.qrelu1.freeze(qi = self.qelementadd.qo) # 需要自己统计qi
return self.qrelu1.qi # relu后的qo可用relu统计的qi
def quantize_inference(self, x):
# 感觉是不需要进行初始的quantize_tensor和dequantize_tensor,因为他不是最前/后一层,只要中间的每层都在量化后的领域内,就不需要这种处理。
identity = x
out = self.qconvbnrelu1.quantize_inference(x)
out = self.qconvbn1.quantize_inference(out)
if self.downsample is not None:
identity = self.qconvbn2.quantize_inference(identity)
# out = identity + out # 这里可能需要写一个elementwiseadd的变换的,待后续修改
out = self.qelementadd.quantize_inference(out,identity)
out = self.qrelu1.quantize_inference(out)
return out
# Bottleneck 类
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
# 1x1 卷积层
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=True)
self.bn1 = nn.BatchNorm2d(planes)
# 3x3 卷积层
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1, bias=True)
self.bn2 = nn.BatchNorm2d(planes)
# 1x1 卷积层
self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
bias=True)
self.bn3 = nn.BatchNorm2d(planes * self.expansion)
# shortcut
self.relu = nn.ReLU()
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity # 相加是在这里处理的
out = self.relu(out)
return out
def quantize(self, quant_type ,num_bits=8, e_bits=3):
self.qconvbnrelu1 = QConvBNReLU(quant_type,self.conv1,self.bn1,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
self.qconvbnrelu2 = QConvBNReLU(quant_type,self.conv2,self.bn2,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
self.qconvbn1 = QConvBN(quant_type,self.conv3,self.bn3,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
if self.downsample is not None:
self.qconvbn2 = QConvBN(quant_type,self.downsample[0],self.downsample[1],qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
self.qelementadd = QElementwiseAdd(quant_type,qi0=False, qi1=False, qo=True,num_bits=num_bits,e_bits=e_bits)
self.qrelu1 = QReLU(quant_type,qi= False,num_bits=num_bits,e_bits=e_bits) # 需要qi
def quantize_forward(self, x):
identity = x
out = self.qconvbnrelu1(x)
out = self.qconvbnrelu2(out)
out = self.qconvbn1(out)
if self.downsample is not None:
identity = self.qconvbn2(identity)
# residual add
# out = identity + out # 这里是需要写一个elementwiseadd的变换的,待后续修改
out = self.qelementadd(out,identity)
out = self.qrelu1(out)
return out
def freeze(self, qinput):
# 这里的qconvbnrelu1其实是可以用前一层的qo的,但感觉不太好传参,就没用
# 还需仔细检查
self.qconvbnrelu1.freeze(qi= qinput) # 需要接前一个module的最后一个qo
self.qconvbnrelu2.freeze(qi=self.qconvbnrelu1.qo)
self.qconvbn1.freeze(qi = self.qconvbnrelu2.qo)
if self.downsample is not None:
self.qconvbn2.freeze(qi = qinput) # 一条支路
self.qelementadd.freeze(qi0 = self.qconvbn1.qo, qi1 = self.qconvbn2.qo)
else:
self.qelementadd.freeze(qi0 = self.qconvbn1.qo, qi1 = qinput)
# 这里或许需要补充个层来处理elementwise add
self.qrelu1.freeze(qi = self.qelementadd.qo) # 需要自己统计qi
return self.qrelu1.qi # relu后的qo可用relu统计的qi
def quantize_inference(self, x):
# 感觉是不需要进行初始的quantize_tensor和dequantize_tensor,因为他不是最前/后一层,只要中间的每层都在量化后的领域内,就不需要这种处理。
identity = x
out = self.qconvbnrelu1.quantize_inference(x)
out = self.qconvbnrelu2.quantize_inference(out)
out = self.qconvbn1.quantize_inference(out)
if self.downsample is not None:
identity = self.qconvbn2.quantize_inference(identity)
# out = identity + out # 这里可能需要写一个elementwiseadd的变换的,待后续修改
out = self.qelementadd.quantize_inference(out,identity)
out = self.qrelu1.quantize_inference(out)
return out
class MakeLayer(nn.Module):
def __init__(self, block, planes, blocks, stride=1):
super(MakeLayer, self).__init__()
# print('makelayer init:'+ str(GlobalVariables.SELF_INPLANES))
self.downsample = None
if stride != 1 or GlobalVariables.SELF_INPLANES != planes * block.expansion:
self.downsample = nn.Sequential(
nn.Conv2d(GlobalVariables.SELF_INPLANES, planes * block.expansion,kernel_size=1, stride=stride, bias=True),
nn.BatchNorm2d(planes * block.expansion)
)
self.blockdict = nn.ModuleDict()
self.blockdict['block1'] = block(inplanes=GlobalVariables.SELF_INPLANES, planes=planes, stride=stride, downsample=self.downsample)
GlobalVariables.SELF_INPLANES = planes * block.expansion
for i in range(1, blocks): # block的个数 这里只能用字典了
self.blockdict['block' + str(i+1)] = block(inplanes=GlobalVariables.SELF_INPLANES, planes=planes) # 此处进行实例化了
# def _make_layer(self, block, planes, blocks, stride=1):
# downsample = None
# # stride 是卷积层的步幅,而 self.inplanes 表示当前残差块输入的通道数,
# # planes * block.expansion 则表示当前残差块输出的通道数。因此,当 stride 不等于 1 或者 self.inplanes 不等于 planes * block.expansion 时,就需要进行下采样操作
# #该层中除了第一个残差块之外,其他所有残差块的输入通道数和输出通道数都相等,并且具有相同的步幅(都为 1 或者 2)。这些卷积层的输入张量大小不变, 输出张量高宽尺寸会随着残差块的堆叠而逐渐降低
# if stride != 1 or SELF_INPLANES != planes * block.expansion:
# downsample = nn.Sequential(
# nn.Conv2d(SELF_INPLANES, planes * block.expansion,
# kernel_size=1, stride=stride, bias=False),
# nn.BatchNorm2d(planes * block.expansion),
# )
# layers = []
# layers.append(block(SELF_INPLANES, planes, stride, downsample))
# SELF_INPLANES = planes * block.expansion
# for _ in range(1, blocks): # block的个数
# layers.append(block(SELF_INPLANES, planes))
# return nn.Sequential(*layers)
def forward(self,x):
for _, layer in self.blockdict.items():
x = layer(x)
return x
def quantize(self, quant_type, num_bits=8, e_bits=3):
# 需检查
for _, layer in self.blockdict.items():
layer.quantize(quant_type=quant_type,num_bits=num_bits,e_bits=e_bits) # 这里是因为每一块都是block,而block中有具体的quantize策略, n_exp和mode已经在__init__中赋值了
def quantize_forward(self, x):
for _, layer in self.blockdict.items():
x = layer.quantize_forward(x) # 各个block中有具体的quantize_forward
return x
def freeze(self, qinput): # 需要在 Module Resnet的freeze里传出来
# 这里的qconvbnrelu1其实是可以用前一层的qo的,但感觉不太好传参,就没用
# 还需仔细检查
cnt = 0
for _, layer in self.blockdict.items():
if cnt == 0:
qo = layer.freeze(qinput = qinput)
cnt = 1
else:
qo = layer.freeze(qinput = qo) # 各个block中有具体的freeze
return qo # 供后续的层用
def quantize_inference(self, x):
# 感觉是不需要进行初始的quantize_tensor和dequantize_tensor,因为他不是最前/后一层,只要中间的每层都在量化后的领域内,就不需要这种处理。
for _, layer in self.blockdict.items():
x = layer.quantize_inference(x) # 每个block中有具体的quantize_inference
return x
# 使用 ResNet18 模型
def resnet18(**kwargs):
model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
return model
# 使用 ResNet50 模型
def resnet50(**kwargs):
model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
return model
# 使用 ResNet152 模型
def resnet152(**kwargs):
model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
return model
...@@ -625,10 +625,10 @@ class QModule_2(nn.Module): ...@@ -625,10 +625,10 @@ class QModule_2(nn.Module):
if qi1: if qi1:
self.qi1 = QParam(quant_type,num_bits, e_bits) # qi在此处就已经被num_bits和mode赋值了 self.qi1 = QParam(quant_type,num_bits, e_bits) # qi在此处就已经被num_bits和mode赋值了
if qo: if qo:
self.qo = QParam(quant_type,32, e_bits) # qo在此处就已经被num_bits和mode赋值了 self.qo = QParam(quant_type,num_bits, e_bits) # qo在此处就已经被num_bits和mode赋值了
self.quant_type = quant_type self.quant_type = quant_type
self.num_bits = 32 self.num_bits = num_bits
self.e_bits = e_bits self.e_bits = e_bits
self.bias_qmax = bias_qmax(quant_type) self.bias_qmax = bias_qmax(quant_type)
......
...@@ -228,17 +228,17 @@ if __name__ == "__main__": ...@@ -228,17 +228,17 @@ if __name__ == "__main__":
print(f"test accuracy: {acc:.2f}%") print(f"test accuracy: {acc:.2f}%")
for name, module in model.named_modules(): # for name, module in model.named_modules():
print(f"{name}: {module}\n") # print(f"{name}: {module}\n")
print('========================================================') # print('========================================================')
print('========================================================') # print('========================================================')
model.quantize() # model.quantize()
for name , layer in model.quantize_layers.items(): # for name , layer in model.quantize_layers.items():
print(f"Layer {name}: {layer} ") # 足够遍历了 # print(f"Layer {name}: {layer} ") # 足够遍历了
......
ResNet(
3.86 M, 104.773% Params, 247.73 MMac, 100.000% MACs,
(conv1): Conv2d(448, 0.012% Params, 458.75 KMac, 0.185% MACs, 3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn1): BatchNorm2d(32, 0.001% Params, 32.77 KMac, 0.013% MACs, 16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 16.38 KMac, 0.007% MACs, )
(layer1): MakeLayer(
15.58 k, 0.423% Params, 16.25 MMac, 6.561% MACs,
(downsample): Sequential(
1.22 k, 0.033% Params, 1.25 MMac, 0.503% MACs,
(0): Conv2d(1.09 k, 0.030% Params, 1.11 MMac, 0.450% MACs, 16, 64, kernel_size=(1, 1), stride=(1, 1))
(1): BatchNorm2d(128, 0.003% Params, 131.07 KMac, 0.053% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(blockdict): ModuleDict(
14.37 k, 0.390% Params, 15.01 MMac, 6.058% MACs,
(block1): Bottleneck(
5.09 k, 0.138% Params, 5.31 MMac, 2.143% MACs,
(conv1): Conv2d(272, 0.007% Params, 278.53 KMac, 0.112% MACs, 16, 16, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(32, 0.001% Params, 32.77 KMac, 0.013% MACs, 16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(2.32 k, 0.063% Params, 2.38 MMac, 0.959% MACs, 16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(32, 0.001% Params, 32.77 KMac, 0.013% MACs, 16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(1.09 k, 0.030% Params, 1.11 MMac, 0.450% MACs, 16, 64, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(128, 0.003% Params, 131.07 KMac, 0.053% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 98.3 KMac, 0.040% MACs, )
)
(block2): Bottleneck(
4.64 k, 0.126% Params, 4.85 MMac, 1.958% MACs,
(conv1): Conv2d(1.04 k, 0.028% Params, 1.06 MMac, 0.430% MACs, 64, 16, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(32, 0.001% Params, 32.77 KMac, 0.013% MACs, 16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(2.32 k, 0.063% Params, 2.38 MMac, 0.959% MACs, 16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(32, 0.001% Params, 32.77 KMac, 0.013% MACs, 16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(1.09 k, 0.030% Params, 1.11 MMac, 0.450% MACs, 16, 64, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(128, 0.003% Params, 131.07 KMac, 0.053% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 98.3 KMac, 0.040% MACs, )
)
(block3): Bottleneck(
4.64 k, 0.126% Params, 4.85 MMac, 1.958% MACs,
(conv1): Conv2d(1.04 k, 0.028% Params, 1.06 MMac, 0.430% MACs, 64, 16, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(32, 0.001% Params, 32.77 KMac, 0.013% MACs, 16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(2.32 k, 0.063% Params, 2.38 MMac, 0.959% MACs, 16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(32, 0.001% Params, 32.77 KMac, 0.013% MACs, 16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(1.09 k, 0.030% Params, 1.11 MMac, 0.450% MACs, 16, 64, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(128, 0.003% Params, 131.07 KMac, 0.053% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 98.3 KMac, 0.040% MACs, )
)
)
)
(layer2): MakeLayer(
158.98 k, 4.313% Params, 42.76 MMac, 17.262% MACs,
(downsample): Sequential(
8.58 k, 0.233% Params, 2.2 MMac, 0.886% MACs,
(0): Conv2d(8.32 k, 0.226% Params, 2.13 MMac, 0.860% MACs, 64, 128, kernel_size=(1, 1), stride=(2, 2))
(1): BatchNorm2d(256, 0.007% Params, 65.54 KMac, 0.026% MACs, 128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(blockdict): ModuleDict(
150.4 k, 4.080% Params, 40.57 MMac, 16.375% MACs,
(block1): Bottleneck(
24.51 k, 0.665% Params, 8.0 MMac, 3.227% MACs,
(conv1): Conv2d(2.08 k, 0.056% Params, 2.13 MMac, 0.860% MACs, 64, 32, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(64, 0.002% Params, 65.54 KMac, 0.026% MACs, 32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(9.25 k, 0.251% Params, 2.37 MMac, 0.956% MACs, 32, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
(bn2): BatchNorm2d(64, 0.002% Params, 16.38 KMac, 0.007% MACs, 32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(4.22 k, 0.115% Params, 1.08 MMac, 0.436% MACs, 32, 128, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(256, 0.007% Params, 65.54 KMac, 0.026% MACs, 128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 73.73 KMac, 0.030% MACs, )
)
(block2): Bottleneck(
17.98 k, 0.488% Params, 4.65 MMac, 1.878% MACs,
(conv1): Conv2d(4.13 k, 0.112% Params, 1.06 MMac, 0.427% MACs, 128, 32, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(64, 0.002% Params, 16.38 KMac, 0.007% MACs, 32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(9.25 k, 0.251% Params, 2.37 MMac, 0.956% MACs, 32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(64, 0.002% Params, 16.38 KMac, 0.007% MACs, 32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(4.22 k, 0.115% Params, 1.08 MMac, 0.436% MACs, 32, 128, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(256, 0.007% Params, 65.54 KMac, 0.026% MACs, 128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 49.15 KMac, 0.020% MACs, )
)
(block3): Bottleneck(
17.98 k, 0.488% Params, 4.65 MMac, 1.878% MACs,
(conv1): Conv2d(4.13 k, 0.112% Params, 1.06 MMac, 0.427% MACs, 128, 32, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(64, 0.002% Params, 16.38 KMac, 0.007% MACs, 32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(9.25 k, 0.251% Params, 2.37 MMac, 0.956% MACs, 32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(64, 0.002% Params, 16.38 KMac, 0.007% MACs, 32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(4.22 k, 0.115% Params, 1.08 MMac, 0.436% MACs, 32, 128, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(256, 0.007% Params, 65.54 KMac, 0.026% MACs, 128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 49.15 KMac, 0.020% MACs, )
)
(block4): Bottleneck(
17.98 k, 0.488% Params, 4.65 MMac, 1.878% MACs,
(conv1): Conv2d(4.13 k, 0.112% Params, 1.06 MMac, 0.427% MACs, 128, 32, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(64, 0.002% Params, 16.38 KMac, 0.007% MACs, 32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(9.25 k, 0.251% Params, 2.37 MMac, 0.956% MACs, 32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(64, 0.002% Params, 16.38 KMac, 0.007% MACs, 32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(4.22 k, 0.115% Params, 1.08 MMac, 0.436% MACs, 32, 128, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(256, 0.007% Params, 65.54 KMac, 0.026% MACs, 128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 49.15 KMac, 0.020% MACs, )
)
(block5): Bottleneck(
17.98 k, 0.488% Params, 4.65 MMac, 1.878% MACs,
(conv1): Conv2d(4.13 k, 0.112% Params, 1.06 MMac, 0.427% MACs, 128, 32, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(64, 0.002% Params, 16.38 KMac, 0.007% MACs, 32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(9.25 k, 0.251% Params, 2.37 MMac, 0.956% MACs, 32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(64, 0.002% Params, 16.38 KMac, 0.007% MACs, 32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(4.22 k, 0.115% Params, 1.08 MMac, 0.436% MACs, 32, 128, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(256, 0.007% Params, 65.54 KMac, 0.026% MACs, 128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 49.15 KMac, 0.020% MACs, )
)
(block6): Bottleneck(
17.98 k, 0.488% Params, 4.65 MMac, 1.878% MACs,
(conv1): Conv2d(4.13 k, 0.112% Params, 1.06 MMac, 0.427% MACs, 128, 32, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(64, 0.002% Params, 16.38 KMac, 0.007% MACs, 32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(9.25 k, 0.251% Params, 2.37 MMac, 0.956% MACs, 32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(64, 0.002% Params, 16.38 KMac, 0.007% MACs, 32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(4.22 k, 0.115% Params, 1.08 MMac, 0.436% MACs, 32, 128, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(256, 0.007% Params, 65.54 KMac, 0.026% MACs, 128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 49.15 KMac, 0.020% MACs, )
)
(block7): Bottleneck(
17.98 k, 0.488% Params, 4.65 MMac, 1.878% MACs,
(conv1): Conv2d(4.13 k, 0.112% Params, 1.06 MMac, 0.427% MACs, 128, 32, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(64, 0.002% Params, 16.38 KMac, 0.007% MACs, 32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(9.25 k, 0.251% Params, 2.37 MMac, 0.956% MACs, 32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(64, 0.002% Params, 16.38 KMac, 0.007% MACs, 32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(4.22 k, 0.115% Params, 1.08 MMac, 0.436% MACs, 32, 128, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(256, 0.007% Params, 65.54 KMac, 0.026% MACs, 128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 49.15 KMac, 0.020% MACs, )
)
(block8): Bottleneck(
17.98 k, 0.488% Params, 4.65 MMac, 1.878% MACs,
(conv1): Conv2d(4.13 k, 0.112% Params, 1.06 MMac, 0.427% MACs, 128, 32, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(64, 0.002% Params, 16.38 KMac, 0.007% MACs, 32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(9.25 k, 0.251% Params, 2.37 MMac, 0.956% MACs, 32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(64, 0.002% Params, 16.38 KMac, 0.007% MACs, 32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(4.22 k, 0.115% Params, 1.08 MMac, 0.436% MACs, 32, 128, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(256, 0.007% Params, 65.54 KMac, 0.026% MACs, 128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 49.15 KMac, 0.020% MACs, )
)
)
)
(layer3): MakeLayer(
2.61 M, 70.724% Params, 169.36 MMac, 68.365% MACs,
(downsample): Sequential(
33.54 k, 0.910% Params, 2.15 MMac, 0.866% MACs,
(0): Conv2d(33.02 k, 0.896% Params, 2.11 MMac, 0.853% MACs, 128, 256, kernel_size=(1, 1), stride=(2, 2))
(1): BatchNorm2d(512, 0.014% Params, 32.77 KMac, 0.013% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(blockdict): ModuleDict(
2.57 M, 69.815% Params, 167.22 MMac, 67.499% MACs,
(block1): Bottleneck(
96.13 k, 2.608% Params, 7.8 MMac, 3.148% MACs,
(conv1): Conv2d(8.26 k, 0.224% Params, 2.11 MMac, 0.853% MACs, 128, 64, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(128, 0.003% Params, 32.77 KMac, 0.013% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 1.002% Params, 2.36 MMac, 0.954% MACs, 64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(16.64 k, 0.451% Params, 1.06 MMac, 0.430% MACs, 64, 256, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(512, 0.014% Params, 32.77 KMac, 0.013% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 36.86 KMac, 0.015% MACs, )
)
(block2): Bottleneck(
70.78 k, 1.920% Params, 4.55 MMac, 1.839% MACs,
(conv1): Conv2d(16.45 k, 0.446% Params, 1.05 MMac, 0.425% MACs, 256, 64, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 1.002% Params, 2.36 MMac, 0.954% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(16.64 k, 0.451% Params, 1.06 MMac, 0.430% MACs, 64, 256, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(512, 0.014% Params, 32.77 KMac, 0.013% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 24.58 KMac, 0.010% MACs, )
)
(block3): Bottleneck(
70.78 k, 1.920% Params, 4.55 MMac, 1.839% MACs,
(conv1): Conv2d(16.45 k, 0.446% Params, 1.05 MMac, 0.425% MACs, 256, 64, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 1.002% Params, 2.36 MMac, 0.954% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(16.64 k, 0.451% Params, 1.06 MMac, 0.430% MACs, 64, 256, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(512, 0.014% Params, 32.77 KMac, 0.013% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 24.58 KMac, 0.010% MACs, )
)
(block4): Bottleneck(
70.78 k, 1.920% Params, 4.55 MMac, 1.839% MACs,
(conv1): Conv2d(16.45 k, 0.446% Params, 1.05 MMac, 0.425% MACs, 256, 64, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 1.002% Params, 2.36 MMac, 0.954% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(16.64 k, 0.451% Params, 1.06 MMac, 0.430% MACs, 64, 256, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(512, 0.014% Params, 32.77 KMac, 0.013% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 24.58 KMac, 0.010% MACs, )
)
(block5): Bottleneck(
70.78 k, 1.920% Params, 4.55 MMac, 1.839% MACs,
(conv1): Conv2d(16.45 k, 0.446% Params, 1.05 MMac, 0.425% MACs, 256, 64, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 1.002% Params, 2.36 MMac, 0.954% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(16.64 k, 0.451% Params, 1.06 MMac, 0.430% MACs, 64, 256, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(512, 0.014% Params, 32.77 KMac, 0.013% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 24.58 KMac, 0.010% MACs, )
)
(block6): Bottleneck(
70.78 k, 1.920% Params, 4.55 MMac, 1.839% MACs,
(conv1): Conv2d(16.45 k, 0.446% Params, 1.05 MMac, 0.425% MACs, 256, 64, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 1.002% Params, 2.36 MMac, 0.954% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(16.64 k, 0.451% Params, 1.06 MMac, 0.430% MACs, 64, 256, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(512, 0.014% Params, 32.77 KMac, 0.013% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 24.58 KMac, 0.010% MACs, )
)
(block7): Bottleneck(
70.78 k, 1.920% Params, 4.55 MMac, 1.839% MACs,
(conv1): Conv2d(16.45 k, 0.446% Params, 1.05 MMac, 0.425% MACs, 256, 64, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 1.002% Params, 2.36 MMac, 0.954% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(16.64 k, 0.451% Params, 1.06 MMac, 0.430% MACs, 64, 256, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(512, 0.014% Params, 32.77 KMac, 0.013% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 24.58 KMac, 0.010% MACs, )
)
(block8): Bottleneck(
70.78 k, 1.920% Params, 4.55 MMac, 1.839% MACs,
(conv1): Conv2d(16.45 k, 0.446% Params, 1.05 MMac, 0.425% MACs, 256, 64, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 1.002% Params, 2.36 MMac, 0.954% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(16.64 k, 0.451% Params, 1.06 MMac, 0.430% MACs, 64, 256, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(512, 0.014% Params, 32.77 KMac, 0.013% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 24.58 KMac, 0.010% MACs, )
)
(block9): Bottleneck(
70.78 k, 1.920% Params, 4.55 MMac, 1.839% MACs,
(conv1): Conv2d(16.45 k, 0.446% Params, 1.05 MMac, 0.425% MACs, 256, 64, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 1.002% Params, 2.36 MMac, 0.954% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(16.64 k, 0.451% Params, 1.06 MMac, 0.430% MACs, 64, 256, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(512, 0.014% Params, 32.77 KMac, 0.013% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 24.58 KMac, 0.010% MACs, )
)
(block10): Bottleneck(
70.78 k, 1.920% Params, 4.55 MMac, 1.839% MACs,
(conv1): Conv2d(16.45 k, 0.446% Params, 1.05 MMac, 0.425% MACs, 256, 64, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 1.002% Params, 2.36 MMac, 0.954% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(16.64 k, 0.451% Params, 1.06 MMac, 0.430% MACs, 64, 256, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(512, 0.014% Params, 32.77 KMac, 0.013% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 24.58 KMac, 0.010% MACs, )
)
(block11): Bottleneck(
70.78 k, 1.920% Params, 4.55 MMac, 1.839% MACs,
(conv1): Conv2d(16.45 k, 0.446% Params, 1.05 MMac, 0.425% MACs, 256, 64, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 1.002% Params, 2.36 MMac, 0.954% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(16.64 k, 0.451% Params, 1.06 MMac, 0.430% MACs, 64, 256, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(512, 0.014% Params, 32.77 KMac, 0.013% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 24.58 KMac, 0.010% MACs, )
)
(block12): Bottleneck(
70.78 k, 1.920% Params, 4.55 MMac, 1.839% MACs,
(conv1): Conv2d(16.45 k, 0.446% Params, 1.05 MMac, 0.425% MACs, 256, 64, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 1.002% Params, 2.36 MMac, 0.954% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(16.64 k, 0.451% Params, 1.06 MMac, 0.430% MACs, 64, 256, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(512, 0.014% Params, 32.77 KMac, 0.013% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 24.58 KMac, 0.010% MACs, )
)
(block13): Bottleneck(
70.78 k, 1.920% Params, 4.55 MMac, 1.839% MACs,
(conv1): Conv2d(16.45 k, 0.446% Params, 1.05 MMac, 0.425% MACs, 256, 64, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 1.002% Params, 2.36 MMac, 0.954% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(16.64 k, 0.451% Params, 1.06 MMac, 0.430% MACs, 64, 256, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(512, 0.014% Params, 32.77 KMac, 0.013% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 24.58 KMac, 0.010% MACs, )
)
(block14): Bottleneck(
70.78 k, 1.920% Params, 4.55 MMac, 1.839% MACs,
(conv1): Conv2d(16.45 k, 0.446% Params, 1.05 MMac, 0.425% MACs, 256, 64, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 1.002% Params, 2.36 MMac, 0.954% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(16.64 k, 0.451% Params, 1.06 MMac, 0.430% MACs, 64, 256, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(512, 0.014% Params, 32.77 KMac, 0.013% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 24.58 KMac, 0.010% MACs, )
)
(block15): Bottleneck(
70.78 k, 1.920% Params, 4.55 MMac, 1.839% MACs,
(conv1): Conv2d(16.45 k, 0.446% Params, 1.05 MMac, 0.425% MACs, 256, 64, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 1.002% Params, 2.36 MMac, 0.954% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(16.64 k, 0.451% Params, 1.06 MMac, 0.430% MACs, 64, 256, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(512, 0.014% Params, 32.77 KMac, 0.013% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 24.58 KMac, 0.010% MACs, )
)
(block16): Bottleneck(
70.78 k, 1.920% Params, 4.55 MMac, 1.839% MACs,
(conv1): Conv2d(16.45 k, 0.446% Params, 1.05 MMac, 0.425% MACs, 256, 64, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 1.002% Params, 2.36 MMac, 0.954% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(16.64 k, 0.451% Params, 1.06 MMac, 0.430% MACs, 64, 256, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(512, 0.014% Params, 32.77 KMac, 0.013% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 24.58 KMac, 0.010% MACs, )
)
(block17): Bottleneck(
70.78 k, 1.920% Params, 4.55 MMac, 1.839% MACs,
(conv1): Conv2d(16.45 k, 0.446% Params, 1.05 MMac, 0.425% MACs, 256, 64, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 1.002% Params, 2.36 MMac, 0.954% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(16.64 k, 0.451% Params, 1.06 MMac, 0.430% MACs, 64, 256, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(512, 0.014% Params, 32.77 KMac, 0.013% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 24.58 KMac, 0.010% MACs, )
)
(block18): Bottleneck(
70.78 k, 1.920% Params, 4.55 MMac, 1.839% MACs,
(conv1): Conv2d(16.45 k, 0.446% Params, 1.05 MMac, 0.425% MACs, 256, 64, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 1.002% Params, 2.36 MMac, 0.954% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(16.64 k, 0.451% Params, 1.06 MMac, 0.430% MACs, 64, 256, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(512, 0.014% Params, 32.77 KMac, 0.013% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 24.58 KMac, 0.010% MACs, )
)
(block19): Bottleneck(
70.78 k, 1.920% Params, 4.55 MMac, 1.839% MACs,
(conv1): Conv2d(16.45 k, 0.446% Params, 1.05 MMac, 0.425% MACs, 256, 64, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 1.002% Params, 2.36 MMac, 0.954% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(16.64 k, 0.451% Params, 1.06 MMac, 0.430% MACs, 64, 256, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(512, 0.014% Params, 32.77 KMac, 0.013% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 24.58 KMac, 0.010% MACs, )
)
(block20): Bottleneck(
70.78 k, 1.920% Params, 4.55 MMac, 1.839% MACs,
(conv1): Conv2d(16.45 k, 0.446% Params, 1.05 MMac, 0.425% MACs, 256, 64, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 1.002% Params, 2.36 MMac, 0.954% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(16.64 k, 0.451% Params, 1.06 MMac, 0.430% MACs, 64, 256, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(512, 0.014% Params, 32.77 KMac, 0.013% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 24.58 KMac, 0.010% MACs, )
)
(block21): Bottleneck(
70.78 k, 1.920% Params, 4.55 MMac, 1.839% MACs,
(conv1): Conv2d(16.45 k, 0.446% Params, 1.05 MMac, 0.425% MACs, 256, 64, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 1.002% Params, 2.36 MMac, 0.954% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(16.64 k, 0.451% Params, 1.06 MMac, 0.430% MACs, 64, 256, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(512, 0.014% Params, 32.77 KMac, 0.013% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 24.58 KMac, 0.010% MACs, )
)
(block22): Bottleneck(
70.78 k, 1.920% Params, 4.55 MMac, 1.839% MACs,
(conv1): Conv2d(16.45 k, 0.446% Params, 1.05 MMac, 0.425% MACs, 256, 64, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 1.002% Params, 2.36 MMac, 0.954% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(16.64 k, 0.451% Params, 1.06 MMac, 0.430% MACs, 64, 256, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(512, 0.014% Params, 32.77 KMac, 0.013% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 24.58 KMac, 0.010% MACs, )
)
(block23): Bottleneck(
70.78 k, 1.920% Params, 4.55 MMac, 1.839% MACs,
(conv1): Conv2d(16.45 k, 0.446% Params, 1.05 MMac, 0.425% MACs, 256, 64, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 1.002% Params, 2.36 MMac, 0.954% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(16.64 k, 0.451% Params, 1.06 MMac, 0.430% MACs, 64, 256, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(512, 0.014% Params, 32.77 KMac, 0.013% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 24.58 KMac, 0.010% MACs, )
)
(block24): Bottleneck(
70.78 k, 1.920% Params, 4.55 MMac, 1.839% MACs,
(conv1): Conv2d(16.45 k, 0.446% Params, 1.05 MMac, 0.425% MACs, 256, 64, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 1.002% Params, 2.36 MMac, 0.954% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(16.64 k, 0.451% Params, 1.06 MMac, 0.430% MACs, 64, 256, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(512, 0.014% Params, 32.77 KMac, 0.013% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 24.58 KMac, 0.010% MACs, )
)
(block25): Bottleneck(
70.78 k, 1.920% Params, 4.55 MMac, 1.839% MACs,
(conv1): Conv2d(16.45 k, 0.446% Params, 1.05 MMac, 0.425% MACs, 256, 64, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 1.002% Params, 2.36 MMac, 0.954% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(16.64 k, 0.451% Params, 1.06 MMac, 0.430% MACs, 64, 256, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(512, 0.014% Params, 32.77 KMac, 0.013% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 24.58 KMac, 0.010% MACs, )
)
(block26): Bottleneck(
70.78 k, 1.920% Params, 4.55 MMac, 1.839% MACs,
(conv1): Conv2d(16.45 k, 0.446% Params, 1.05 MMac, 0.425% MACs, 256, 64, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 1.002% Params, 2.36 MMac, 0.954% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(16.64 k, 0.451% Params, 1.06 MMac, 0.430% MACs, 64, 256, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(512, 0.014% Params, 32.77 KMac, 0.013% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 24.58 KMac, 0.010% MACs, )
)
(block27): Bottleneck(
70.78 k, 1.920% Params, 4.55 MMac, 1.839% MACs,
(conv1): Conv2d(16.45 k, 0.446% Params, 1.05 MMac, 0.425% MACs, 256, 64, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 1.002% Params, 2.36 MMac, 0.954% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(16.64 k, 0.451% Params, 1.06 MMac, 0.430% MACs, 64, 256, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(512, 0.014% Params, 32.77 KMac, 0.013% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 24.58 KMac, 0.010% MACs, )
)
(block28): Bottleneck(
70.78 k, 1.920% Params, 4.55 MMac, 1.839% MACs,
(conv1): Conv2d(16.45 k, 0.446% Params, 1.05 MMac, 0.425% MACs, 256, 64, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 1.002% Params, 2.36 MMac, 0.954% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(16.64 k, 0.451% Params, 1.06 MMac, 0.430% MACs, 64, 256, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(512, 0.014% Params, 32.77 KMac, 0.013% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 24.58 KMac, 0.010% MACs, )
)
(block29): Bottleneck(
70.78 k, 1.920% Params, 4.55 MMac, 1.839% MACs,
(conv1): Conv2d(16.45 k, 0.446% Params, 1.05 MMac, 0.425% MACs, 256, 64, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 1.002% Params, 2.36 MMac, 0.954% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(16.64 k, 0.451% Params, 1.06 MMac, 0.430% MACs, 64, 256, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(512, 0.014% Params, 32.77 KMac, 0.013% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 24.58 KMac, 0.010% MACs, )
)
(block30): Bottleneck(
70.78 k, 1.920% Params, 4.55 MMac, 1.839% MACs,
(conv1): Conv2d(16.45 k, 0.446% Params, 1.05 MMac, 0.425% MACs, 256, 64, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 1.002% Params, 2.36 MMac, 0.954% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(16.64 k, 0.451% Params, 1.06 MMac, 0.430% MACs, 64, 256, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(512, 0.014% Params, 32.77 KMac, 0.013% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 24.58 KMac, 0.010% MACs, )
)
(block31): Bottleneck(
70.78 k, 1.920% Params, 4.55 MMac, 1.839% MACs,
(conv1): Conv2d(16.45 k, 0.446% Params, 1.05 MMac, 0.425% MACs, 256, 64, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 1.002% Params, 2.36 MMac, 0.954% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(16.64 k, 0.451% Params, 1.06 MMac, 0.430% MACs, 64, 256, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(512, 0.014% Params, 32.77 KMac, 0.013% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 24.58 KMac, 0.010% MACs, )
)
(block32): Bottleneck(
70.78 k, 1.920% Params, 4.55 MMac, 1.839% MACs,
(conv1): Conv2d(16.45 k, 0.446% Params, 1.05 MMac, 0.425% MACs, 256, 64, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 1.002% Params, 2.36 MMac, 0.954% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(16.64 k, 0.451% Params, 1.06 MMac, 0.430% MACs, 64, 256, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(512, 0.014% Params, 32.77 KMac, 0.013% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 24.58 KMac, 0.010% MACs, )
)
(block33): Bottleneck(
70.78 k, 1.920% Params, 4.55 MMac, 1.839% MACs,
(conv1): Conv2d(16.45 k, 0.446% Params, 1.05 MMac, 0.425% MACs, 256, 64, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 1.002% Params, 2.36 MMac, 0.954% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(16.64 k, 0.451% Params, 1.06 MMac, 0.430% MACs, 64, 256, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(512, 0.014% Params, 32.77 KMac, 0.013% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 24.58 KMac, 0.010% MACs, )
)
(block34): Bottleneck(
70.78 k, 1.920% Params, 4.55 MMac, 1.839% MACs,
(conv1): Conv2d(16.45 k, 0.446% Params, 1.05 MMac, 0.425% MACs, 256, 64, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 1.002% Params, 2.36 MMac, 0.954% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(16.64 k, 0.451% Params, 1.06 MMac, 0.430% MACs, 64, 256, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(512, 0.014% Params, 32.77 KMac, 0.013% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 24.58 KMac, 0.010% MACs, )
)
(block35): Bottleneck(
70.78 k, 1.920% Params, 4.55 MMac, 1.839% MACs,
(conv1): Conv2d(16.45 k, 0.446% Params, 1.05 MMac, 0.425% MACs, 256, 64, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 1.002% Params, 2.36 MMac, 0.954% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(16.64 k, 0.451% Params, 1.06 MMac, 0.430% MACs, 64, 256, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(512, 0.014% Params, 32.77 KMac, 0.013% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 24.58 KMac, 0.010% MACs, )
)
(block36): Bottleneck(
70.78 k, 1.920% Params, 4.55 MMac, 1.839% MACs,
(conv1): Conv2d(16.45 k, 0.446% Params, 1.05 MMac, 0.425% MACs, 256, 64, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 1.002% Params, 2.36 MMac, 0.954% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.003% Params, 8.19 KMac, 0.003% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(16.64 k, 0.451% Params, 1.06 MMac, 0.430% MACs, 64, 256, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(512, 0.014% Params, 32.77 KMac, 0.013% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 24.58 KMac, 0.010% MACs, )
)
)
)
(layer4): MakeLayer(
1.07 M, 29.161% Params, 18.83 MMac, 7.602% MACs,
(downsample): Sequential(
132.61 k, 3.597% Params, 2.12 MMac, 0.856% MACs,
(0): Conv2d(131.58 k, 3.570% Params, 2.11 MMac, 0.850% MACs, 256, 512, kernel_size=(1, 1), stride=(2, 2))
(1): BatchNorm2d(1.02 k, 0.028% Params, 16.38 KMac, 0.007% MACs, 512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(blockdict): ModuleDict(
942.34 k, 25.563% Params, 16.71 MMac, 6.746% MACs,
(block1): Bottleneck(
380.67 k, 10.327% Params, 7.7 MMac, 3.108% MACs,
(conv1): Conv2d(32.9 k, 0.892% Params, 2.11 MMac, 0.850% MACs, 256, 128, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(256, 0.007% Params, 16.38 KMac, 0.007% MACs, 128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(147.58 k, 4.004% Params, 2.36 MMac, 0.953% MACs, 128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
(bn2): BatchNorm2d(256, 0.007% Params, 4.1 KMac, 0.002% MACs, 128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(66.05 k, 1.792% Params, 1.06 MMac, 0.427% MACs, 128, 512, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(1.02 k, 0.028% Params, 16.38 KMac, 0.007% MACs, 512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 18.43 KMac, 0.007% MACs, )
)
(block2): Bottleneck(
280.83 k, 7.618% Params, 4.51 MMac, 1.819% MACs,
(conv1): Conv2d(65.66 k, 1.781% Params, 1.05 MMac, 0.424% MACs, 512, 128, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(256, 0.007% Params, 4.1 KMac, 0.002% MACs, 128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(147.58 k, 4.004% Params, 2.36 MMac, 0.953% MACs, 128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(256, 0.007% Params, 4.1 KMac, 0.002% MACs, 128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(66.05 k, 1.792% Params, 1.06 MMac, 0.427% MACs, 128, 512, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(1.02 k, 0.028% Params, 16.38 KMac, 0.007% MACs, 512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 12.29 KMac, 0.005% MACs, )
)
(block3): Bottleneck(
280.83 k, 7.618% Params, 4.51 MMac, 1.819% MACs,
(conv1): Conv2d(65.66 k, 1.781% Params, 1.05 MMac, 0.424% MACs, 512, 128, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(256, 0.007% Params, 4.1 KMac, 0.002% MACs, 128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(147.58 k, 4.004% Params, 2.36 MMac, 0.953% MACs, 128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(256, 0.007% Params, 4.1 KMac, 0.002% MACs, 128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(66.05 k, 1.792% Params, 1.06 MMac, 0.427% MACs, 128, 512, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(1.02 k, 0.028% Params, 16.38 KMac, 0.007% MACs, 512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 12.29 KMac, 0.005% MACs, )
)
)
)
(avgpool): AdaptiveAvgPool2d(0, 0.000% Params, 8.19 KMac, 0.003% MACs, output_size=(1, 1))
(fc): Linear(5.13 k, 0.139% Params, 5.13 KMac, 0.002% MACs, in_features=512, out_features=10, bias=True)
)
\ No newline at end of file
ResNet(
714.09 k, 101.626% Params, 36.09 MMac, 100.000% MACs,
(conv1): Conv2d(448, 0.064% Params, 458.75 KMac, 1.271% MACs, 3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn1): BatchNorm2d(32, 0.005% Params, 32.77 KMac, 0.091% MACs, 16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 16.38 KMac, 0.045% MACs, )
(layer1): MakeLayer(
9.41 k, 1.339% Params, 9.7 MMac, 26.879% MACs,
(blockdict): ModuleDict(
9.41 k, 1.339% Params, 9.7 MMac, 26.879% MACs,
(block1): BasicBlock(
4.7 k, 0.669% Params, 4.85 MMac, 13.440% MACs,
(conv1): Conv2d(2.32 k, 0.330% Params, 2.38 MMac, 6.584% MACs, 16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn1): BatchNorm2d(32, 0.005% Params, 32.77 KMac, 0.091% MACs, 16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(2.32 k, 0.330% Params, 2.38 MMac, 6.584% MACs, 16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(32, 0.005% Params, 32.77 KMac, 0.091% MACs, 16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 32.77 KMac, 0.091% MACs, )
)
(block2): BasicBlock(
4.7 k, 0.669% Params, 4.85 MMac, 13.440% MACs,
(conv1): Conv2d(2.32 k, 0.330% Params, 2.38 MMac, 6.584% MACs, 16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn1): BatchNorm2d(32, 0.005% Params, 32.77 KMac, 0.091% MACs, 16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(2.32 k, 0.330% Params, 2.38 MMac, 6.584% MACs, 16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(32, 0.005% Params, 32.77 KMac, 0.091% MACs, 16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 32.77 KMac, 0.091% MACs, )
)
)
)
(layer2): MakeLayer(
33.86 k, 4.818% Params, 8.7 MMac, 24.109% MACs,
(downsample): Sequential(
608, 0.087% Params, 155.65 KMac, 0.431% MACs,
(0): Conv2d(544, 0.077% Params, 139.26 KMac, 0.386% MACs, 16, 32, kernel_size=(1, 1), stride=(2, 2))
(1): BatchNorm2d(64, 0.009% Params, 16.38 KMac, 0.045% MACs, 32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(blockdict): ModuleDict(
33.25 k, 4.732% Params, 8.54 MMac, 23.678% MACs,
(block1): BasicBlock(
14.62 k, 2.081% Params, 3.76 MMac, 10.420% MACs,
(conv1): Conv2d(4.64 k, 0.660% Params, 1.19 MMac, 3.292% MACs, 16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
(bn1): BatchNorm2d(64, 0.009% Params, 16.38 KMac, 0.045% MACs, 32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(9.25 k, 1.316% Params, 2.37 MMac, 6.561% MACs, 32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(64, 0.009% Params, 16.38 KMac, 0.045% MACs, 32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 16.38 KMac, 0.045% MACs, )
)
(block2): BasicBlock(
18.62 k, 2.650% Params, 4.78 MMac, 13.258% MACs,
(conv1): Conv2d(9.25 k, 1.316% Params, 2.37 MMac, 6.561% MACs, 32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn1): BatchNorm2d(64, 0.009% Params, 16.38 KMac, 0.045% MACs, 32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(9.25 k, 1.316% Params, 2.37 MMac, 6.561% MACs, 32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(64, 0.009% Params, 16.38 KMac, 0.045% MACs, 32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 16.38 KMac, 0.045% MACs, )
)
)
)
(layer3): MakeLayer(
134.27 k, 19.109% Params, 8.61 MMac, 23.860% MACs,
(downsample): Sequential(
2.24 k, 0.319% Params, 143.36 KMac, 0.397% MACs,
(0): Conv2d(2.11 k, 0.301% Params, 135.17 KMac, 0.375% MACs, 32, 64, kernel_size=(1, 1), stride=(2, 2))
(1): BatchNorm2d(128, 0.018% Params, 8.19 KMac, 0.023% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(blockdict): ModuleDict(
132.03 k, 18.790% Params, 8.47 MMac, 23.462% MACs,
(block1): BasicBlock(
57.92 k, 8.243% Params, 3.72 MMac, 10.295% MACs,
(conv1): Conv2d(18.5 k, 2.632% Params, 1.18 MMac, 3.280% MACs, 32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
(bn1): BatchNorm2d(128, 0.018% Params, 8.19 KMac, 0.023% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 5.255% Params, 2.36 MMac, 6.550% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.018% Params, 8.19 KMac, 0.023% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 8.19 KMac, 0.023% MACs, )
)
(block2): BasicBlock(
74.11 k, 10.547% Params, 4.75 MMac, 13.167% MACs,
(conv1): Conv2d(36.93 k, 5.255% Params, 2.36 MMac, 6.550% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn1): BatchNorm2d(128, 0.018% Params, 8.19 KMac, 0.023% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 5.255% Params, 2.36 MMac, 6.550% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.018% Params, 8.19 KMac, 0.023% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 8.19 KMac, 0.023% MACs, )
)
)
)
(layer4): MakeLayer(
534.78 k, 76.108% Params, 8.56 MMac, 23.735% MACs,
(downsample): Sequential(
8.58 k, 1.220% Params, 137.22 KMac, 0.380% MACs,
(0): Conv2d(8.32 k, 1.184% Params, 133.12 KMac, 0.369% MACs, 64, 128, kernel_size=(1, 1), stride=(2, 2))
(1): BatchNorm2d(256, 0.036% Params, 4.1 KMac, 0.011% MACs, 128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(blockdict): ModuleDict(
526.21 k, 74.887% Params, 8.43 MMac, 23.355% MACs,
(block1): BasicBlock(
230.53 k, 32.808% Params, 3.69 MMac, 10.233% MACs,
(conv1): Conv2d(73.86 k, 10.511% Params, 1.18 MMac, 3.275% MACs, 64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
(bn1): BatchNorm2d(256, 0.036% Params, 4.1 KMac, 0.011% MACs, 128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(147.58 k, 21.003% Params, 2.36 MMac, 6.544% MACs, 128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(256, 0.036% Params, 4.1 KMac, 0.011% MACs, 128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 4.1 KMac, 0.011% MACs, )
)
(block2): BasicBlock(
295.68 k, 42.080% Params, 4.73 MMac, 13.122% MACs,
(conv1): Conv2d(147.58 k, 21.003% Params, 2.36 MMac, 6.544% MACs, 128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn1): BatchNorm2d(256, 0.036% Params, 4.1 KMac, 0.011% MACs, 128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(147.58 k, 21.003% Params, 2.36 MMac, 6.544% MACs, 128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(256, 0.036% Params, 4.1 KMac, 0.011% MACs, 128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 4.1 KMac, 0.011% MACs, )
)
)
)
(avgpool): AdaptiveAvgPool2d(0, 0.000% Params, 2.05 KMac, 0.006% MACs, output_size=(1, 1))
(fc): Linear(1.29 k, 0.184% Params, 1.29 KMac, 0.004% MACs, in_features=128, out_features=10, bias=True)
)
\ No newline at end of file
ResNet(
1.67 M, 111.801% Params, 92.48 MMac, 100.000% MACs,
(conv1): Conv2d(448, 0.030% Params, 458.75 KMac, 0.496% MACs, 3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn1): BatchNorm2d(32, 0.002% Params, 32.77 KMac, 0.035% MACs, 16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 16.38 KMac, 0.018% MACs, )
(layer1): MakeLayer(
15.58 k, 1.045% Params, 16.25 MMac, 17.575% MACs,
(downsample): Sequential(
1.22 k, 0.082% Params, 1.25 MMac, 1.346% MACs,
(0): Conv2d(1.09 k, 0.073% Params, 1.11 MMac, 1.205% MACs, 16, 64, kernel_size=(1, 1), stride=(1, 1))
(1): BatchNorm2d(128, 0.009% Params, 131.07 KMac, 0.142% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(blockdict): ModuleDict(
14.37 k, 0.964% Params, 15.01 MMac, 16.229% MACs,
(block1): Bottleneck(
5.09 k, 0.341% Params, 5.31 MMac, 5.740% MACs,
(conv1): Conv2d(272, 0.018% Params, 278.53 KMac, 0.301% MACs, 16, 16, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(32, 0.002% Params, 32.77 KMac, 0.035% MACs, 16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(2.32 k, 0.156% Params, 2.38 MMac, 2.569% MACs, 16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(32, 0.002% Params, 32.77 KMac, 0.035% MACs, 16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(1.09 k, 0.073% Params, 1.11 MMac, 1.205% MACs, 16, 64, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(128, 0.009% Params, 131.07 KMac, 0.142% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 98.3 KMac, 0.106% MACs, )
)
(block2): Bottleneck(
4.64 k, 0.311% Params, 4.85 MMac, 5.244% MACs,
(conv1): Conv2d(1.04 k, 0.070% Params, 1.06 MMac, 1.152% MACs, 64, 16, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(32, 0.002% Params, 32.77 KMac, 0.035% MACs, 16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(2.32 k, 0.156% Params, 2.38 MMac, 2.569% MACs, 16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(32, 0.002% Params, 32.77 KMac, 0.035% MACs, 16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(1.09 k, 0.073% Params, 1.11 MMac, 1.205% MACs, 16, 64, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(128, 0.009% Params, 131.07 KMac, 0.142% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 98.3 KMac, 0.106% MACs, )
)
(block3): Bottleneck(
4.64 k, 0.311% Params, 4.85 MMac, 5.244% MACs,
(conv1): Conv2d(1.04 k, 0.070% Params, 1.06 MMac, 1.152% MACs, 64, 16, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(32, 0.002% Params, 32.77 KMac, 0.035% MACs, 16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(2.32 k, 0.156% Params, 2.38 MMac, 2.569% MACs, 16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(32, 0.002% Params, 32.77 KMac, 0.035% MACs, 16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(1.09 k, 0.073% Params, 1.11 MMac, 1.205% MACs, 16, 64, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(128, 0.009% Params, 131.07 KMac, 0.142% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 98.3 KMac, 0.106% MACs, )
)
)
)
(layer2): MakeLayer(
87.04 k, 5.838% Params, 24.15 MMac, 26.115% MACs,
(downsample): Sequential(
8.58 k, 0.575% Params, 2.2 MMac, 2.374% MACs,
(0): Conv2d(8.32 k, 0.558% Params, 2.13 MMac, 2.303% MACs, 64, 128, kernel_size=(1, 1), stride=(2, 2))
(1): BatchNorm2d(256, 0.017% Params, 65.54 KMac, 0.071% MACs, 128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(blockdict): ModuleDict(
78.46 k, 5.263% Params, 21.95 MMac, 23.741% MACs,
(block1): Bottleneck(
24.51 k, 1.644% Params, 8.0 MMac, 8.646% MACs,
(conv1): Conv2d(2.08 k, 0.140% Params, 2.13 MMac, 2.303% MACs, 64, 32, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(64, 0.004% Params, 65.54 KMac, 0.071% MACs, 32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(9.25 k, 0.620% Params, 2.37 MMac, 2.560% MACs, 32, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
(bn2): BatchNorm2d(64, 0.004% Params, 16.38 KMac, 0.018% MACs, 32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(4.22 k, 0.283% Params, 1.08 MMac, 1.169% MACs, 32, 128, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(256, 0.017% Params, 65.54 KMac, 0.071% MACs, 128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 73.73 KMac, 0.080% MACs, )
)
(block2): Bottleneck(
17.98 k, 1.206% Params, 4.65 MMac, 5.032% MACs,
(conv1): Conv2d(4.13 k, 0.277% Params, 1.06 MMac, 1.143% MACs, 128, 32, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(64, 0.004% Params, 16.38 KMac, 0.018% MACs, 32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(9.25 k, 0.620% Params, 2.37 MMac, 2.560% MACs, 32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(64, 0.004% Params, 16.38 KMac, 0.018% MACs, 32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(4.22 k, 0.283% Params, 1.08 MMac, 1.169% MACs, 32, 128, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(256, 0.017% Params, 65.54 KMac, 0.071% MACs, 128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 49.15 KMac, 0.053% MACs, )
)
(block3): Bottleneck(
17.98 k, 1.206% Params, 4.65 MMac, 5.032% MACs,
(conv1): Conv2d(4.13 k, 0.277% Params, 1.06 MMac, 1.143% MACs, 128, 32, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(64, 0.004% Params, 16.38 KMac, 0.018% MACs, 32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(9.25 k, 0.620% Params, 2.37 MMac, 2.560% MACs, 32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(64, 0.004% Params, 16.38 KMac, 0.018% MACs, 32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(4.22 k, 0.283% Params, 1.08 MMac, 1.169% MACs, 32, 128, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(256, 0.017% Params, 65.54 KMac, 0.071% MACs, 128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 49.15 KMac, 0.053% MACs, )
)
(block4): Bottleneck(
17.98 k, 1.206% Params, 4.65 MMac, 5.032% MACs,
(conv1): Conv2d(4.13 k, 0.277% Params, 1.06 MMac, 1.143% MACs, 128, 32, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(64, 0.004% Params, 16.38 KMac, 0.018% MACs, 32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(9.25 k, 0.620% Params, 2.37 MMac, 2.560% MACs, 32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(64, 0.004% Params, 16.38 KMac, 0.018% MACs, 32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(4.22 k, 0.283% Params, 1.08 MMac, 1.169% MACs, 32, 128, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(256, 0.017% Params, 65.54 KMac, 0.071% MACs, 128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 49.15 KMac, 0.053% MACs, )
)
)
)
(layer3): MakeLayer(
483.58 k, 32.437% Params, 32.72 MMac, 35.381% MACs,
(downsample): Sequential(
33.54 k, 2.249% Params, 2.15 MMac, 2.321% MACs,
(0): Conv2d(33.02 k, 2.215% Params, 2.11 MMac, 2.285% MACs, 128, 256, kernel_size=(1, 1), stride=(2, 2))
(1): BatchNorm2d(512, 0.034% Params, 32.77 KMac, 0.035% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(blockdict): ModuleDict(
450.05 k, 30.188% Params, 30.57 MMac, 33.060% MACs,
(block1): Bottleneck(
96.13 k, 6.448% Params, 7.8 MMac, 8.433% MACs,
(conv1): Conv2d(8.26 k, 0.554% Params, 2.11 MMac, 2.285% MACs, 128, 64, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(128, 0.009% Params, 32.77 KMac, 0.035% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 2.477% Params, 2.36 MMac, 2.556% MACs, 64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.009% Params, 8.19 KMac, 0.009% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(16.64 k, 1.116% Params, 1.06 MMac, 1.152% MACs, 64, 256, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(512, 0.034% Params, 32.77 KMac, 0.035% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 36.86 KMac, 0.040% MACs, )
)
(block2): Bottleneck(
70.78 k, 4.748% Params, 4.55 MMac, 4.925% MACs,
(conv1): Conv2d(16.45 k, 1.103% Params, 1.05 MMac, 1.138% MACs, 256, 64, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(128, 0.009% Params, 8.19 KMac, 0.009% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 2.477% Params, 2.36 MMac, 2.556% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.009% Params, 8.19 KMac, 0.009% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(16.64 k, 1.116% Params, 1.06 MMac, 1.152% MACs, 64, 256, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(512, 0.034% Params, 32.77 KMac, 0.035% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 24.58 KMac, 0.027% MACs, )
)
(block3): Bottleneck(
70.78 k, 4.748% Params, 4.55 MMac, 4.925% MACs,
(conv1): Conv2d(16.45 k, 1.103% Params, 1.05 MMac, 1.138% MACs, 256, 64, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(128, 0.009% Params, 8.19 KMac, 0.009% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 2.477% Params, 2.36 MMac, 2.556% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.009% Params, 8.19 KMac, 0.009% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(16.64 k, 1.116% Params, 1.06 MMac, 1.152% MACs, 64, 256, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(512, 0.034% Params, 32.77 KMac, 0.035% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 24.58 KMac, 0.027% MACs, )
)
(block4): Bottleneck(
70.78 k, 4.748% Params, 4.55 MMac, 4.925% MACs,
(conv1): Conv2d(16.45 k, 1.103% Params, 1.05 MMac, 1.138% MACs, 256, 64, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(128, 0.009% Params, 8.19 KMac, 0.009% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 2.477% Params, 2.36 MMac, 2.556% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.009% Params, 8.19 KMac, 0.009% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(16.64 k, 1.116% Params, 1.06 MMac, 1.152% MACs, 64, 256, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(512, 0.034% Params, 32.77 KMac, 0.035% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 24.58 KMac, 0.027% MACs, )
)
(block5): Bottleneck(
70.78 k, 4.748% Params, 4.55 MMac, 4.925% MACs,
(conv1): Conv2d(16.45 k, 1.103% Params, 1.05 MMac, 1.138% MACs, 256, 64, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(128, 0.009% Params, 8.19 KMac, 0.009% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 2.477% Params, 2.36 MMac, 2.556% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.009% Params, 8.19 KMac, 0.009% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(16.64 k, 1.116% Params, 1.06 MMac, 1.152% MACs, 64, 256, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(512, 0.034% Params, 32.77 KMac, 0.035% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 24.58 KMac, 0.027% MACs, )
)
(block6): Bottleneck(
70.78 k, 4.748% Params, 4.55 MMac, 4.925% MACs,
(conv1): Conv2d(16.45 k, 1.103% Params, 1.05 MMac, 1.138% MACs, 256, 64, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(128, 0.009% Params, 8.19 KMac, 0.009% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(36.93 k, 2.477% Params, 2.36 MMac, 2.556% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.009% Params, 8.19 KMac, 0.009% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(16.64 k, 1.116% Params, 1.06 MMac, 1.152% MACs, 64, 256, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(512, 0.034% Params, 32.77 KMac, 0.035% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 24.58 KMac, 0.027% MACs, )
)
)
)
(layer4): MakeLayer(
1.07 M, 72.104% Params, 18.83 MMac, 20.366% MACs,
(downsample): Sequential(
132.61 k, 8.895% Params, 2.12 MMac, 2.294% MACs,
(0): Conv2d(131.58 k, 8.826% Params, 2.11 MMac, 2.277% MACs, 256, 512, kernel_size=(1, 1), stride=(2, 2))
(1): BatchNorm2d(1.02 k, 0.069% Params, 16.38 KMac, 0.018% MACs, 512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(blockdict): ModuleDict(
942.34 k, 63.209% Params, 16.71 MMac, 18.071% MACs,
(block1): Bottleneck(
380.67 k, 25.534% Params, 7.7 MMac, 8.327% MACs,
(conv1): Conv2d(32.9 k, 2.207% Params, 2.11 MMac, 2.277% MACs, 256, 128, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(256, 0.017% Params, 16.38 KMac, 0.018% MACs, 128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(147.58 k, 9.899% Params, 2.36 MMac, 2.553% MACs, 128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
(bn2): BatchNorm2d(256, 0.017% Params, 4.1 KMac, 0.004% MACs, 128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(66.05 k, 4.430% Params, 1.06 MMac, 1.143% MACs, 128, 512, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(1.02 k, 0.069% Params, 16.38 KMac, 0.018% MACs, 512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 18.43 KMac, 0.020% MACs, )
)
(block2): Bottleneck(
280.83 k, 18.837% Params, 4.51 MMac, 4.872% MACs,
(conv1): Conv2d(65.66 k, 4.405% Params, 1.05 MMac, 1.136% MACs, 512, 128, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(256, 0.017% Params, 4.1 KMac, 0.004% MACs, 128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(147.58 k, 9.899% Params, 2.36 MMac, 2.553% MACs, 128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(256, 0.017% Params, 4.1 KMac, 0.004% MACs, 128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(66.05 k, 4.430% Params, 1.06 MMac, 1.143% MACs, 128, 512, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(1.02 k, 0.069% Params, 16.38 KMac, 0.018% MACs, 512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 12.29 KMac, 0.013% MACs, )
)
(block3): Bottleneck(
280.83 k, 18.837% Params, 4.51 MMac, 4.872% MACs,
(conv1): Conv2d(65.66 k, 4.405% Params, 1.05 MMac, 1.136% MACs, 512, 128, kernel_size=(1, 1), stride=(1, 1))
(bn1): BatchNorm2d(256, 0.017% Params, 4.1 KMac, 0.004% MACs, 128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(147.58 k, 9.899% Params, 2.36 MMac, 2.553% MACs, 128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(256, 0.017% Params, 4.1 KMac, 0.004% MACs, 128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(66.05 k, 4.430% Params, 1.06 MMac, 1.143% MACs, 128, 512, kernel_size=(1, 1), stride=(1, 1))
(bn3): BatchNorm2d(1.02 k, 0.069% Params, 16.38 KMac, 0.018% MACs, 512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(0, 0.000% Params, 12.29 KMac, 0.013% MACs, )
)
)
)
(avgpool): AdaptiveAvgPool2d(0, 0.000% Params, 8.19 KMac, 0.009% MACs, output_size=(1, 1))
(fc): Linear(5.13 k, 0.344% Params, 5.13 KMac, 0.006% MACs, in_features=512, out_features=10, bias=True)
)
\ No newline at end of file
...@@ -68,7 +68,7 @@ if __name__ == "__main__": ...@@ -68,7 +68,7 @@ if __name__ == "__main__":
parser = argparse.ArgumentParser(description='PyTorch FP32 Training') parser = argparse.ArgumentParser(description='PyTorch FP32 Training')
parser.add_argument('-m', '--model', metavar='MODEL ARCH', default='resnet18') parser.add_argument('-m', '--model', metavar='MODEL ARCH', default='ResNet18')
parser.add_argument('-b', '--batch_size', default=128, type=int, metavar='BATCH SIZE', help='mini-batch size (default: 128)') parser.add_argument('-b', '--batch_size', default=128, type=int, metavar='BATCH SIZE', help='mini-batch size (default: 128)')
parser.add_argument('-j','--workers', default=4, type=int, metavar='WORKERS',help='number of data loading workers (default: 4)') parser.add_argument('-j','--workers', default=4, type=int, metavar='WORKERS',help='number of data loading workers (default: 4)')
# parser.add_argument('-t', '--test', dest='test', action='store_true', help='test model on test set') # parser.add_argument('-t', '--test', dest='test', action='store_true', help='test model on test set')
...@@ -121,19 +121,28 @@ if __name__ == "__main__": ...@@ -121,19 +121,28 @@ if __name__ == "__main__":
model.eval() model.eval()
full_acc = full_inference(model, test_loader, device) full_acc = full_inference(model, test_loader, device)
model_fold = fold_model(model) model_fold = fold_model(model) #
full_params = [] full_params = []
layer, par_ratio, flop_ratio = extract_ratio() layer, par_ratio, flop_ratio = extract_ratio(args.model)
print(layer) # print(layer)
layer = []
for name, param in model.named_parameters():
if 'weight' in name:
n = name.split('.')
pre = '.'.join(n[:len(n)-1])
layer.append(pre)
# print(name)
print('===================')
# print(layer)
par_ratio, flop_ratio = fold_ratio(layer, par_ratio, flop_ratio) par_ratio, flop_ratio = fold_ratio(layer, par_ratio, flop_ratio)
# sys.exit()
for name, param in model_fold.named_parameters(): for name, param in model_fold.named_parameters():
if 'bn' in name: if 'bn' in name or 'sample.1' in name:
continue continue
param_norm = F.normalize(param.data.cpu(),p=2,dim=-1) param_norm = F.normalize(param.data.cpu(),p=2,dim=-1)
full_params.append(param_norm) full_params.append(param_norm) # 没统计bn的 只统计了conv的 而且还是fold后的
writer.add_histogram(tag='Full_' + name + '_data', values=param.data) writer.add_histogram(tag='Full_' + name + '_data', values=param.data)
...@@ -156,7 +165,14 @@ if __name__ == "__main__": ...@@ -156,7 +165,14 @@ if __name__ == "__main__":
for num_bits in num_bit_list: for num_bits in num_bit_list:
e_bit_list = ebit_list(quant_type,num_bits) e_bit_list = ebit_list(quant_type,num_bits)
for e_bits in e_bit_list: for e_bits in e_bit_list:
model_ptq = resnet18() # model_ptq = resnet18()
if args.model == 'ResNet18':
model_ptq = resnet18()
elif args.model == 'ResNet50':
model_ptq = resnet50()
elif args.model == 'ResNet152':
model_ptq = resnet152()
if quant_type == 'FLOAT': if quant_type == 'FLOAT':
title = '%s_%d_E%d' % (quant_type, num_bits, e_bits) title = '%s_%d_E%d' % (quant_type, num_bits, e_bits)
else: else:
...@@ -194,25 +210,30 @@ if __name__ == "__main__": ...@@ -194,25 +210,30 @@ if __name__ == "__main__":
js_flops = 0. js_flops = 0.
js_param = 0. js_param = 0.
for name, param in model_ptq.named_parameters(): for name, param in model_ptq.named_parameters():
if '.' not in name or 'bn' in name: # if '.' not in name or 'bn' in name:
if 'bn' in name or 'sample.1' in name:
continue continue
writer.add_histogram(tag=title +':'+ name + '_data', values=param.data) writer.add_histogram(tag=title +':'+ name + '_data', values=param.data)
# idx = idx + 1 idx = idx + 1
# prefix = name.split('.')[0] # renset中有多个. 需要改写拼一下
# if prefix in layer: # prefix = name.split('.')[0]
# layer_idx = layer.index(prefix) n = name.split('.')
# ptq_param = param.data.cpu() prefix = '.'.join(n[:len(n) - 1])
# # 取L2范数 # weight和bias 1:1 ? 对于ratio,是按层赋予的,此处可以对weight和bias再单独赋予不同的权重,比如(8:2)
# ptq_norm = F.normalize(ptq_param,p=2,dim=-1) if prefix in layer:
# writer.add_histogram(tag=title +':'+ name + '_data', values=ptq_param) layer_idx = layer.index(prefix)
# js = js_div(ptq_norm,full_params[idx]) ptq_param = param.data.cpu()
# js = js.item() # 取L2范数
# if js < 0.: ptq_norm = F.normalize(ptq_param,p=2,dim=-1)
# js = 0. writer.add_histogram(tag=title +':'+ name + '_data', values=ptq_param)
# js_flops = js_flops + js * flop_ratio[layer_idx] js = js_div(ptq_norm,full_params[idx]) # 这里算了fold后的量化前后模型的js距离
# js_param = js_param + js * flop_ratio[layer_idx] js = js.item()
# js_flops_list.append(js_flops) if js < 0.:
# js_param_list.append(js_param) js = 0.
js_flops = js_flops + js * flop_ratio[layer_idx]
js_param = js_param + js * par_ratio[layer_idx]
js_flops_list.append(js_flops)
js_param_list.append(js_param)
print(title + ': js_flops: %f js_param: %f acc_loss: %f' % (js_flops, js_param, acc_loss)) print(title + ': js_flops: %f js_param: %f acc_loss: %f' % (js_flops, js_param, acc_loss))
...@@ -233,10 +254,10 @@ if __name__ == "__main__": ...@@ -233,10 +254,10 @@ if __name__ == "__main__":
worksheet.cell(row=i+4, column=4, value=ptq_acc_list[i]) worksheet.cell(row=i+4, column=4, value=ptq_acc_list[i])
worksheet.cell(row=i+4, column=5, value=acc_loss_list[i]) worksheet.cell(row=i+4, column=5, value=acc_loss_list[i])
workbook.save('ptq_result.xlsx') workbook.save('ptq_result_' + args.model + '.xlsx')
writer.close() writer.close()
ft = open('ptq_result.txt','w') ft = open('ptq_result_' + args.model + '.txt','w')
print('title_list:',file=ft) print('title_list:',file=ft)
print(" ".join(title_list),file=ft) print(" ".join(title_list),file=ft)
......
title_list:
INT_2 INT_3 INT_4 INT_5 INT_6 INT_7 INT_8 INT_9 INT_10 INT_11 INT_12 INT_13 INT_14 INT_15 INT_16 POT_2 POT_3 POT_4 POT_5 POT_6 POT_7 POT_8 FLOAT_3_E1 FLOAT_4_E1 FLOAT_4_E2 FLOAT_5_E1 FLOAT_5_E2 FLOAT_5_E3 FLOAT_6_E1 FLOAT_6_E2 FLOAT_6_E3 FLOAT_6_E4 FLOAT_7_E1 FLOAT_7_E2 FLOAT_7_E3 FLOAT_7_E4 FLOAT_7_E5 FLOAT_8_E1 FLOAT_8_E2 FLOAT_8_E3 FLOAT_8_E4 FLOAT_8_E5 FLOAT_8_E6
js_flops_list:
1575.126077030527 980.8324825038856 447.4871705577316 203.8177281153719 94.1658153206219 44.73944284292641 21.730716696253086 10.687903335080755 5.2935009924434775 2.6865031426677675 1.345978185346981 0.6738058971124082 0.34590930672785625 0.16620132379306904 0.09185943251823848 1575.0264663456858 767.5068295225365 59.80415491853343 17.32189175246257 17.160386413787755 17.15972613238827 17.160655554562823 547.0296470821636 228.09197712606053 153.9307141697144 102.8744121697856 63.04910506966272 11.893784458090247 49.68929151890493 30.72369295281706 4.336553462330601 4.810517948583543 25.62475856077897 16.963161148931942 1.7730239215421446 1.2492962287085048 4.844787354857122 14.21240714817728 10.605240065475499 0.7963437572573967 0.32131797583853794 1.3061700599586734 4.844787523330232
js_param_list:
2231.9475377209037 1458.7430817370525 656.866021106162 290.661557510572 132.0211812900384 62.06574209045005 29.96287022906031 14.768791159744465 7.344364349715033 3.757019554618513 1.896182903527843 0.9241808205303167 0.45857306080932436 0.2269121111102425 0.12261352661167306 2231.901673608193 1143.359470049635 82.82637961696304 24.06635574752677 23.843136397545287 23.842358607630732 23.84306741528584 799.9775130544906 323.8336430582792 218.61973701520765 143.18120884416584 88.72081224892759 16.52024912262558 68.08470436272326 43.20128678260041 6.041579655336327 6.686327875421352 34.6238061335222 24.064747116161215 2.491426419987749 1.7403336017725606 6.690842031928857 18.94797143083834 15.257619881935225 1.0957373786589855 0.44768947355373956 1.7705741794826835 6.690842738428997
ptq_acc_list:
10.0 10.0 10.0 78.52 86.7 89.95 90.73 90.96 90.64 87.4 74.21 52.1 40.65 30.51 20.3 10.0 10.0 10.0 39.21 40.15 44.33 34.83 10.0 19.98 10.0 34.59 85.82 80.56 57.06 88.62 90.17 81.06 68.03 89.75 90.85 88.77 10.0 72.61 90.02 91.08 89.55 10.0 10.0
acc_loss_list:
0.8900978129464776 0.8900978129464776 0.8900978129464776 0.1370480272557424 0.04714803824596101 0.011429827453566238 0.0028574568633914815 0.0003297065611605796 0.0038465765468732203 0.03945488515221441 0.18441586987581055 0.4274096054511484 0.5532476096274316 0.6646884272997032 0.7768985602813496 0.8900978129464776 0.8900978129464776 0.8900978129464776 0.5690735245631388 0.5587427189801077 0.5128036047917354 0.6172106824925816 0.8900978129464776 0.7804154302670623 0.8900978129464776 0.6198483349818661 0.05681943070667109 0.11462798109682375 0.3728981206726013 0.026046818331684696 0.00901197933838876 0.10913287174414764 0.2523354214748873 0.013627871194636718 0.0015386306187493194 0.024398285525881955 0.8900978129464776 0.20200021980437408 0.010660512144191657 -0.0009891196834817388 0.015825914935707196 0.8900978129464776 0.8900978129464776
title_list:
INT_2 INT_3 INT_4 INT_5 INT_6 INT_7 INT_8 INT_9 INT_10 INT_11 INT_12 INT_13 INT_14 INT_15 INT_16 POT_2 POT_3 POT_4 POT_5 POT_6 POT_7 POT_8 FLOAT_3_E1 FLOAT_4_E1 FLOAT_4_E2 FLOAT_5_E1 FLOAT_5_E2 FLOAT_5_E3 FLOAT_6_E1 FLOAT_6_E2 FLOAT_6_E3 FLOAT_6_E4 FLOAT_7_E1 FLOAT_7_E2 FLOAT_7_E3 FLOAT_7_E4 FLOAT_7_E5 FLOAT_8_E1 FLOAT_8_E2 FLOAT_8_E3 FLOAT_8_E4 FLOAT_8_E5 FLOAT_8_E6
js_flops_list:
1833.4576454973073 814.7863891368864 217.7654229387627 54.07616924802023 13.731802945455469 3.5847427020530582 0.9118541432904458 0.2622900218848318 0.07627003915874074 0.027745791769400664 0.015915006254486226 0.012409352705166696 0.0077479353538904274 0.0062617873011873975 0.005917287498327866 1833.2003417254284 544.2136113656462 35.21026365121499 33.83804856891729 33.83703344984572 33.83750169488491 33.84147193704756 342.096219925368 82.6043808610444 75.92517125989443 27.82235802343243 26.574672151466128 9.6049355988981 14.044291246668882 14.55114135659603 2.4864347446515884 9.426150750133262 10.07193874315086 10.701781541507756 0.6597191298214471 2.4197650833272104 9.550487563237345 8.849135643873504 9.216705123201871 0.1929881628940372 0.6207588325434388 2.5428780026080493 9.550487563237345
js_param_list:
3613.037168160796 1825.7907466758202 512.0932785883192 129.26071365337654 33.314456921282606 8.673843570791789 2.1826018682118424 0.6138186833325912 0.1691841503982388 0.05180905191755439 0.02266508641878177 0.014530378356803484 0.00975786055068809 0.005063431812688739 0.00398069855228542 3612.992302272399 1246.9340617899438 71.14710558688047 67.61964317269017 67.6172664356203 67.61753548832318 67.6175100773394 755.379587970111 181.41267691267066 170.89087380459807 56.989989927129535 59.371069176236894 19.274735775346528 26.031672719261728 32.363778392002544 5.0043194398511135 18.814548222792805 17.309141148134536 23.84953967534161 1.332034978863292 4.83191046013193 18.864051408815957 14.787650268158211 20.519388091926267 0.3942680972083926 1.231435885110694 4.879394902995963 18.864051408815957
ptq_acc_list:
10.0 10.0 31.15 81.89 84.93 85.69 85.78 85.63 82.63 74.8 51.56 29.34 13.78 11.57 10.17 10.0 10.0 44.45 44.64 46.43 44.18 38.58 9.92 38.85 70.91 65.34 82.3 80.82 73.99 84.14 85.05 76.68 75.95 84.95 85.55 81.54 10.0 77.73 85.18 85.98 81.93 10.01 13.34
acc_loss_list:
0.8835991153532767 0.8835991153532767 0.6374112443254569 0.04679315562798273 0.011407286695378766 0.0025608194622279 0.0015132115004073503 0.003259224770108266 0.038179490164125265 0.1293213828425096 0.3998370387614945 0.6584798044465138 0.8395995809568153 0.8653241764637412 0.8816203003142824 0.8835991153532767 0.8835991153532767 0.48259806774531483 0.4803864509370271 0.45955069258526365 0.4857408916307764 0.5509253870329415 0.8845303224304505 0.5477825631474799 0.17460132697008499 0.2394366197183098 0.04202071935746711 0.05924805028518221 0.1387498544988942 0.02060295658246998 0.010010476079618198 0.10743801652892551 0.11593528110813635 0.011174484926085367 0.004190431847282033 0.05086718659061798 0.8835991153532767 0.09521592364101959 0.008497264579210684 -0.0008148061925271492 0.04632755208939576 0.8834827144686299 0.844721219881271
title_list:
INT_2 INT_3 INT_4 INT_5 INT_6 INT_7 INT_8 INT_9 INT_10 INT_11 INT_12 INT_13 INT_14 INT_15 INT_16 POT_2 POT_3 POT_4 POT_5 POT_6 POT_7 POT_8 FLOAT_3_E1 FLOAT_4_E1 FLOAT_4_E2 FLOAT_5_E1 FLOAT_5_E2 FLOAT_5_E3 FLOAT_6_E1 FLOAT_6_E2 FLOAT_6_E3 FLOAT_6_E4 FLOAT_7_E1 FLOAT_7_E2 FLOAT_7_E3 FLOAT_7_E4 FLOAT_7_E5 FLOAT_8_E1 FLOAT_8_E2 FLOAT_8_E3 FLOAT_8_E4 FLOAT_8_E5 FLOAT_8_E6
js_flops_list:
1489.6432790793892 858.47390911721 350.38842997977486 146.66108726257698 65.51871772345022 30.802447738403625 15.015633081763848 7.372939759214539 3.602748170145869 1.7596017349024324 0.9023980469489912 0.42604559053986407 0.2086617904343124 0.11696138076612213 0.06650877266410397 1489.4950585919782 648.6726890563766 44.0313761461945 14.813184296979202 14.70886411284525 14.708637223793453 14.708329981851291 442.110054514285 167.03961080744105 110.912352356486 73.14257321252117 44.75826464643717 8.918392355710786 35.41728607793805 22.00069249787684 3.0807357022322006 4.133106679411769 18.786975210869198 12.291142909976228 1.2420341267864268 1.0780820385160967 4.196963771246701 10.816659177228358 7.780715811154463 0.513961854917128 0.2801788104206083 1.150574461896198 4.196963771246701
js_param_list:
2988.8747567488617 1887.9793935628438 794.5371505720092 330.3960680775245 145.92231495119452 67.9559314292448 33.03981244952361 16.124047122726786 8.021401990398326 3.943098007875918 1.9811299823118427 0.9460539051395199 0.44709418282093033 0.22449034273754867 0.12425914862692854 2988.8363531204886 1451.7681143260804 94.67273844326954 30.460878266197444 30.244231409403923 30.2446749589304 30.244134610251493 984.9086948427197 371.60971497639866 248.5749360354289 159.90777702378026 99.54631101875773 19.048673214252524 75.87671359764475 48.95576239520067 6.683113070521427 8.485231215526596 39.31778320380456 27.44412247810391 2.6854627255413566 2.207580403630901 8.479439151405776 21.80574465505866 17.614834435129385 1.148945392883737 0.5553705895013917 2.2254689905601692 8.479439151405776
ptq_acc_list:
10.0 10.0 10.01 72.69 87.21 89.67 90.45 90.33 89.37 79.82 61.97 35.21 22.84 21.47 13.47 10.0 10.0 12.81 17.49 27.49 30.18 34.97 10.0 15.78 21.89 33.3 82.29 82.49 58.04 87.21 88.9 82.42 67.65 88.34 90.33 87.15 10.05 70.35 89.06 90.52 88.78 9.99 10.0
acc_loss_list:
0.8896369054188279 0.8896369054188279 0.8895265423242467 0.19777066548946035 0.037523452157598565 0.010374130890630148 0.0017658095132987153 0.00309016664827283 0.01368502372806528 0.11908177905308472 0.31607990288047677 0.6114115439796932 0.747930691976603 0.7630504359342236 0.8513409115991613 0.8896369054188279 0.8896369054188279 0.8586248758415186 0.8069749475775301 0.6966118529963581 0.6669241805540227 0.6140602582496413 0.8896369054188279 0.8258470367509105 0.7584151859618143 0.6324908950446971 0.09182209469153507 0.08961483279991177 0.3594525990508774 0.037523452157598565 0.018872089173380353 0.09038737446197989 0.253393665158371 0.025052422469926013 0.00309016664827283 0.03818563072508546 0.8890850899459222 0.22359562962145466 0.017106279660081637 0.0009932678512305862 0.020196446308354467 0.8897472685134091 0.8896369054188279
## update: <br>2023.4.12<br>
- 已修改get_param_flops.py, extract_ratio.py, ptq.py中与计算数据分布相似度、计算计算量、参数量加权权重相关的程序,使其能够适应于ResNet系列网络。<br>
(注: 目前需要在得到了param_flops_ResNet18/50/152.txt后,手动删除在layer中重复出现了一次的downsample module的统计数据,考虑到resnet18的只需要删3处,resnet50,152中的需要删4处,故直接手动进行了)
- 已补充ResNet18/50/152的PTQ训练测试结果,在ptq_result_ResNet18/50/152.xlsx中
- 拟合曲线图:
1. ResNet18 - 参数量加权
<img src = "fig/res18-1.png" class="h-90 auto">
ResNet18 - 计算量加权
<img src = "fig/res18-2.png" class="h-90 auto">
2. ResNet50 - 参数量加权
<img src = "fig/res50-2.png" class="h-90 auto">
ResNet50 - 计算量加权
<img src = "fig/res50.png" class="h-90 auto">
3. ResNet152 - 参数量加权
<img src = "fig/res152-1.png" class="h-90 auto">
ResNet152 - 计算量加权
<img src = "fig/res152-2.png" class="h-90 auto">
- 所有图中在左上方均存在一系列离群点,其中包括了INT量化在量化位宽较高时,acc骤降对应的点,也包括了一些其他的异常点,还需要再对程序、数据进一步分析,调整参数,使部分由于程序设置导致异常的点恢复过来。
- 后续待补充:
1. 分析为什么INT量化位宽较大时,acc骤降,并解决该问题
2. 检查网络量化是否存在细节错误
3. 检查计算数据分布相似度时是否正确加权计算
4. ResNet QAT相关实验
<br><br>
分割线
===============================================<br><br><br>
## update:<br> 2023.4.10 <br>
- 注:new_mzh中的程序改用了与游昆霖同学统一的度量方式、以及一些量化细节约定,将代码重新建立在游昆霖同学版本的程序上。<br>
在量化BN层的过程中遇到了较多问题,感谢游昆霖同学的帮助:D
### 程序改动:
为量化ResNet18,在module.py中新增的量化层包括QConvBNReLu层,QConvBN层,QElementwiseAdd层,QAdaptiveAvgPool2d层。在model.py中建立了ResNet18的量化架构,通过class BasicBlock, class Bottleneck, class MakeLayer等保障了ResNet的扩展性,能够较为方便的扩展成ResNet50和152
### 待完善:
- ResNet的网络架构相比于AlexNet,VGG等更加的跳跃,各种MakeLayer, Residual的结构使得其不是一个平铺开来的网络,则过去的很多计算相似度等的算法不能直接适用在ResNet上(直接遍历网络参数时,会有包装在conv,bn等层外面的layer, sequential, block等),关于参数相似度、梯度相似度的分析有待后续研究补充。
QAT方面有待后续补充
- 我在加上正确的QElementwiseAdd层前,PTQ后的acc都不超过15%,足以见到该层的重要性,他是负责残差的相加部分,因为两个层的输出结果是在不同量化范围,所以不能直接相加,而是需要做rescale。
- 目前看到INT量化随位宽增加而先增大后下降,我查看了量化后的参数分布,其整体趋势与全精度模型是较为相似的,因此问题不在Conv,BN等普通的量化层上,我猜想可能是因为量化位宽较大的时候,QElementwiseAdd做rescale的过程中出现了溢出,还有待后续观察确认。
\ No newline at end of file
...@@ -82,8 +82,8 @@ echo "Use GPU ${CUDA_VISIBLE_DEVICES}" # which gpus ...@@ -82,8 +82,8 @@ echo "Use GPU ${CUDA_VISIBLE_DEVICES}" # which gpus
sleep 2s sleep 2s
hostname hostname
echo "python ./new_train.py -m ResNet18 -e 60 -b 128 -j 4 -lr 0.001 -wd 0.0001" echo "python ./new_train.py -m ResNet152 -e 300 -b 128 -j 4 -lr 0.001 -wd 0.0001"
python ./new_train.py -m ResNet18 -e 60 -b 128 -j 4 -lr 0.001 -wd 0.0001 python ./new_train.py -m ResNet152 -e 300 -b 128 -j 4 -lr 0.001 -wd 0.0001
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
# (TODO) # (TODO)
# Please modify job name # Please modify job name
#SBATCH -J ResNet18_trial # The job name #SBATCH -J ResNet50_trial # The job name
#SBATCH -o ./info/ret-%j.out # Write the standard output to file named 'ret-<job_number>.out' #SBATCH -o ./info/ret-%j.out # Write the standard output to file named 'ret-<job_number>.out'
#SBATCH -e ./info/ret-%j.err # Write the standard error to file named 'ret-<job_number>.err' #SBATCH -e ./info/ret-%j.err # Write the standard error to file named 'ret-<job_number>.err'
...@@ -83,7 +83,12 @@ sleep 2s ...@@ -83,7 +83,12 @@ sleep 2s
hostname hostname
echo "python ./ptq.py -m ResNet18 -b 128 -j 4" echo "python ./ptq.py -m ResNet18 -b 128 -j 4"
echo "python ./ptq.py -m ResNet50 -b 128 -j 4"
echo "python ./ptq.py -m ResNet152 -b 128 -j 4"
# python ./ptq.py -m ResNet18 -b 128 -j 4
python ./ptq.py -m ResNet18 -b 128 -j 4 python ./ptq.py -m ResNet18 -b 128 -j 4
python ./ptq.py -m ResNet50 -b 128 -j 4
python ./ptq.py -m ResNet152 -b 128 -j 4
......
from model import *
import torch
from ptflops import get_model_complexity_info
if __name__ == "__main__":
model = resnet18()
full_file = 'ckpt/cifar10_ResNet18.pt'
model.load_state_dict(torch.load(full_file))
flops, params = get_model_complexity_info(model, (3, 32, 32), as_strings=True, print_per_layer_stat=True)
import torch
import torch.nn as nn
import torch.nn.functional as F
from module import *
import module
from global_var import GlobalVariables
class AlexNet_BN(nn.Module):
def __init__(self, num_channels=3, num_classes=10):
super(AlexNet_BN, self).__init__()
# original size 32x32
self.conv1 = nn.Conv2d(num_channels, 32, kernel_size=3, padding=1, bias=True)
self.bn1 = nn.BatchNorm2d(32)
self.relu1 = nn.ReLU(inplace=True)
self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2) # output[48, 27, 27]
self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1, bias=True) # output[128, 27, 27]
self.bn2 = nn.BatchNorm2d(64)
self.relu2 = nn.ReLU(inplace=True)
self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2) # output[128, 13, 13]
self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1, bias=True) # output[192, 13, 13]
self.bn3 = nn.BatchNorm2d(128)
self.relu3 = nn.ReLU(inplace=True)
self.conv4 = nn.Conv2d(128, 256, kernel_size=3, padding=1, bias=True) # output[192, 13, 13]
self.bn4 = nn.BatchNorm2d(256)
self.relu4 = nn.ReLU(inplace=True)
self.conv5 = nn.Conv2d(256, 256, kernel_size=3, padding=1, bias=True) # output[128, 13, 13]
self.bn5 = nn.BatchNorm2d(256)
self.relu5 = nn.ReLU(inplace=True)
self.pool5 = nn.MaxPool2d(kernel_size=3, stride=2)
self.drop1 = nn.Dropout(p=0.5)
self.fc1 = nn.Linear(256 * 3 * 3, 1024, bias=True)
self.relu6 = nn.ReLU(inplace=True)
self.drop2 = nn.Dropout(p=0.5)
self.fc2 = nn.Linear(1024, 512, bias=True)
self.relu7 = nn.ReLU(inplace=True)
self.fc3 = nn.Linear(512, num_classes, bias=True)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu1(x)
x = self.pool1(x)
x = self.conv2(x)
x = self.bn2(x)
x = self.relu2(x)
x = self.pool2(x)
x = self.conv3(x)
x = self.bn3(x)
x = self.relu3(x)
x = self.conv4(x)
x = self.bn4(x)
x = self.relu4(x)
x = self.conv5(x)
x = self.bn5(x)
x = self.relu5(x)
x = self.pool5(x)
x = torch.flatten(x, start_dim=1)
x = self.drop1(x)
x = self.fc1(x)
x = self.relu6(x)
x = self.drop2(x)
x = self.fc2(x)
x = self.relu7(x)
x = self.fc3(x)
return x
def quantize(self, quant_type, num_bits=8, e_bits=3):
# e_bits仅当使用FLOAT量化时用到
self.qconv1 = QConvBNReLU(quant_type, self.conv1, self.bn1, qi=True, qo=True, num_bits=num_bits, e_bits=e_bits)
self.qpool1 = QMaxPooling2d(quant_type, kernel_size=2, stride=2, padding=0, num_bits=num_bits, e_bits=e_bits)
self.qconv2 = QConvBNReLU(quant_type, self.conv2, self.bn2, qi=False, qo=True, num_bits=num_bits, e_bits=e_bits)
self.qpool2 = QMaxPooling2d(quant_type, kernel_size=2, stride=2, padding=0, num_bits=num_bits, e_bits=e_bits)
self.qconv3 = QConvBNReLU(quant_type, self.conv3, self.bn3, qi=False, qo=True, num_bits=num_bits, e_bits=e_bits)
self.qconv4 = QConvBNReLU(quant_type, self.conv4, self.bn4, qi=False, qo=True, num_bits=num_bits, e_bits=e_bits)
self.qconv5 = QConvBNReLU(quant_type, self.conv5, self.bn5, qi=False, qo=True, num_bits=num_bits, e_bits=e_bits)
self.qpool5 = QMaxPooling2d(quant_type, kernel_size=3, stride=2, padding=0, num_bits=num_bits, e_bits=e_bits)
self.qfc1 = QLinear(quant_type, self.fc1, qi=False, qo=True, num_bits=num_bits, e_bits=e_bits)
self.qrelu6 = QReLU(quant_type, num_bits=num_bits, e_bits=e_bits)
self.qfc2 = QLinear(quant_type, self.fc2, qi=False, qo=True, num_bits=num_bits, e_bits=e_bits)
self.qrelu7 = QReLU(quant_type, num_bits=num_bits, e_bits=e_bits)
self.qfc3 = QLinear(quant_type, self.fc3, qi=False, qo=True, num_bits=num_bits, e_bits=e_bits)
def quantize_forward(self, x):
x = self.qconv1(x)
x = self.qpool1(x)
x = self.qconv2(x)
x = self.qpool2(x)
x = self.qconv3(x)
x = self.qconv4(x)
x = self.qconv5(x)
x = self.qpool5(x)
x = torch.flatten(x, start_dim=1)
x = self.drop1(x)
x = self.qfc1(x)
x = self.qrelu6(x)
x = self.drop2(x)
x = self.qfc2(x)
x = self.qrelu7(x)
x = self.qfc3(x)
return x
def freeze(self):
self.qconv1.freeze()
self.qpool1.freeze(self.qconv1.qo)
self.qconv2.freeze(self.qconv1.qo)
self.qpool2.freeze(self.qconv2.qo)
self.qconv3.freeze(self.qconv2.qo)
self.qconv4.freeze(self.qconv3.qo)
self.qconv5.freeze(self.qconv4.qo)
self.qpool5.freeze(self.qconv5.qo)
self.qfc1.freeze(self.qconv5.qo)
self.qrelu6.freeze(self.qfc1.qo)
self.qfc2.freeze(self.qfc1.qo)
self.qrelu7.freeze(self.qfc2.qo)
self.qfc3.freeze(self.qfc2.qo)
def quantize_inference(self, x):
x = self.qconv1.qi.quantize_tensor(x)
x = self.qconv1.quantize_inference(x)
x = self.qpool1.quantize_inference(x)
x = self.qconv2.quantize_inference(x)
x = self.qpool2.quantize_inference(x)
x = self.qconv3.quantize_inference(x)
x = self.qconv4.quantize_inference(x)
x = self.qconv5.quantize_inference(x)
x = self.qpool5.quantize_inference(x)
x = torch.flatten(x, start_dim=1)
x = self.qfc1.quantize_inference(x)
x = self.qrelu6.quantize_inference(x)
x = self.qfc2.quantize_inference(x)
x = self.qrelu7.quantize_inference(x)
x = self.qfc3.quantize_inference(x)
x = self.qfc3.qo.dequantize_tensor(x)
return x
# 定义 ResNet 模型
# 适用于Cifar10
class ResNet(nn.Module):
def __init__(self, block, layers, num_classes=10): # 这里将类别数设置为10
super(ResNet, self).__init__()
self.inplanes = 16 # 因为 CIFAR-10 图片较小,所以开始时需要更少的通道数
GlobalVariables.SELF_INPLANES = self.inplanes
# print('resnet init:'+ str(GlobalVariables.SELF_INPLANES))
# 输入层
self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1,
bias=True)
self.bn1 = nn.BatchNorm2d(16)
self.relu = nn.ReLU()
# 残差层(4 个阶段,每个阶段包含 6n+2 个卷积层)
self.layer1 = MakeLayer(block, 16, layers[0])
self.layer2 = MakeLayer(block, 32, layers[1], stride=2)
self.layer3 = MakeLayer(block, 64, layers[2], stride=2)
self.layer4 = MakeLayer(block, 128, layers[3], stride=2)
# 分类层
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(128 * block.expansion, num_classes)
# 参数初始化
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
def forward(self, x):
# 输入层
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
# 这里相比于imagenet的,少了一个maxpool,因为cifar10本身图片就小,如果再pool就太小了
# 残差层
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
# 分类层
x = self.avgpool(x) # 输出的尺寸为 B,C,1,1
x = x.view(x.size(0), -1)
x = self.fc(x)
out = F.softmax(x,dim = 1) # 这里不softmax也行 影响不大
return out
def quantize(self, quant_type, num_bits=8, e_bits=3):
self.qconvbnrelu1 = QConvBNReLU(quant_type,self.conv1,self.bn1,qi=True,qo=True,num_bits=num_bits,e_bits=e_bits)
# 没有输入num_bits 需修改
self.layer1.quantize(quant_type=quant_type,num_bits=num_bits, e_bits=e_bits)
self.layer2.quantize(quant_type=quant_type,num_bits=num_bits, e_bits=e_bits)
self.layer3.quantize(quant_type=quant_type,num_bits=num_bits, e_bits=e_bits)
self.layer4.quantize(quant_type=quant_type,num_bits=num_bits, e_bits=e_bits)
self.qavgpool1 = QAdaptiveAvgPool2d(quant_type,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
self.qfc1 = QLinear(quant_type, self.fc,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
def quantize_forward(self, x):
# for _, layer in self.quantize_layers.items():
# x = layer(x)
# out = F.softmax(x, dim=1)
# return out
x = self.qconvbnrelu1(x)
x = self.layer1.quantize_forward(x)
x = self.layer2.quantize_forward(x)
x = self.layer3.quantize_forward(x)
x = self.layer4.quantize_forward(x)
x = self.qavgpool1(x)
x = x.view(x.size(0), -1)
x = self.qfc1(x)
out = F.softmax(x,dim = 1) # 这里不softmax也行 影响不大
return out
def freeze(self):
self.qconvbnrelu1.freeze() # 因为作为第一层是有qi的,所以freeze的时候无需再重新提供qi
qo = self.layer1.freeze(qinput = self.qconvbnrelu1.qo)
qo = self.layer2.freeze(qinput = qo)
qo = self.layer3.freeze(qinput = qo)
qo = self.layer4.freeze(qinput = qo)
self.qavgpool1.freeze(qi=qo)
self.qfc1.freeze(qi=self.qavgpool1.qo)
def fakefreeze(self):
pass
def quantize_inference(self, x):
qx = self.qconvbnrelu1.qi.quantize_tensor(x)
qx = self.qconvbnrelu1.quantize_inference(qx)
qx = self.layer1.quantize_inference(qx)
qx = self.layer2.quantize_inference(qx)
qx = self.layer3.quantize_inference(qx)
qx = self.layer4.quantize_inference(qx)
qx = self.qavgpool1.quantize_inference(qx)
qx = qx.view(qx.size(0), -1)
qx = self.qfc1.quantize_inference(qx)
qx = self.qfc1.qo.dequantize_tensor(qx)
out = F.softmax(qx,dim = 1) # 这里不softmax也行 影响不大
return out
# BasicBlock 类
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
# 第一个卷积层
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride,
padding=1, bias=True)
self.bn1 = nn.BatchNorm2d(planes)
# 第二个卷积层
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1,
padding=1, bias=True)
self.bn2 = nn.BatchNorm2d(planes)
# shortcut
self.relu = nn.ReLU()
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
identity = self.downsample(identity)
out += identity
out = self.relu(out)
return out
def quantize(self, quant_type ,num_bits=8, e_bits=3):
self.qconvbnrelu1 = QConvBNReLU(quant_type,self.conv1,self.bn1,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
self.qconvbn1 = QConvBN(quant_type,self.conv2,self.bn2,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
if self.downsample is not None:
self.qconvbn2 = QConvBN(quant_type,self.downsample[0],self.downsample[1],qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
self.qelementadd = QElementwiseAdd(quant_type,qi0=False, qi1=False, qo=True,num_bits=num_bits,e_bits=e_bits)
self.qrelu1 = QReLU(quant_type,qi= False,num_bits=num_bits,e_bits=e_bits) # 需要qi
def quantize_forward(self, x):
identity = x
out = self.qconvbnrelu1(x)
out = self.qconvbn1(out)
if self.downsample is not None:
identity = self.qconvbn2(identity)
# residual add
# out = identity + out # 这里是需要写一个elementwiseadd的变换的,待后续修改
out = self.qelementadd(out,identity)
out = self.qrelu1(out)
return out
def freeze(self, qinput):
# 这里的qconvbnrelu1其实是可以用前一层的qo的,但感觉不太好传参,就没用
# 还需仔细检查
self.qconvbnrelu1.freeze(qi= qinput) # 需要接前一个module的最后一个qo
self.qconvbn1.freeze(qi = self.qconvbnrelu1.qo)
if self.downsample is not None:
self.qconvbn2.freeze(qi = qinput) # 一条支路
self.qelementadd.freeze(qi0 = self.qconvbn1.qo, qi1 = self.qconvbn2.qo)
else:
self.qelementadd.freeze(qi0 = self.qconvbn1.qo, qi1 = qinput)
# 这里或许需要补充个层来处理elementwise add
self.qrelu1.freeze(qi = self.qelementadd.qo) # 需要自己统计qi
return self.qrelu1.qi # relu后的qo可用relu统计的qi
def quantize_inference(self, x):
# 感觉是不需要进行初始的quantize_tensor和dequantize_tensor,因为他不是最前/后一层,只要中间的每层都在量化后的领域内,就不需要这种处理。
identity = x
out = self.qconvbnrelu1.quantize_inference(x)
out = self.qconvbn1.quantize_inference(out)
if self.downsample is not None:
identity = self.qconvbn2.quantize_inference(identity)
# out = identity + out # 这里可能需要写一个elementwiseadd的变换的,待后续修改
out = self.qelementadd.quantize_inference(out,identity)
out = self.qrelu1.quantize_inference(out)
return out
# Bottleneck 类
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
# 1x1 卷积层
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=True)
self.bn1 = nn.BatchNorm2d(planes)
# 3x3 卷积层
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1, bias=True)
self.bn2 = nn.BatchNorm2d(planes)
# 1x1 卷积层
self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
bias=True)
self.bn3 = nn.BatchNorm2d(planes * self.expansion)
# shortcut
self.relu = nn.ReLU()
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity # 相加是在这里处理的
out = self.relu(out)
return out
class MakeLayer(nn.Module):
def __init__(self, block, planes, blocks, stride=1):
super(MakeLayer, self).__init__()
# print('makelayer init:'+ str(GlobalVariables.SELF_INPLANES))
self.downsample = None
if stride != 1 or GlobalVariables.SELF_INPLANES != planes * block.expansion:
self.downsample = nn.Sequential(
nn.Conv2d(GlobalVariables.SELF_INPLANES, planes * block.expansion,kernel_size=1, stride=stride, bias=True),
nn.BatchNorm2d(planes * block.expansion)
)
self.blockdict = nn.ModuleDict()
self.blockdict['block1'] = block(inplanes=GlobalVariables.SELF_INPLANES, planes=planes, stride=stride, downsample=self.downsample)
GlobalVariables.SELF_INPLANES = planes * block.expansion
for i in range(1, blocks): # block的个数 这里只能用字典了
self.blockdict['block' + str(i+1)] = block(inplanes=GlobalVariables.SELF_INPLANES, planes=planes) # 此处进行实例化了
# def _make_layer(self, block, planes, blocks, stride=1):
# downsample = None
# # stride 是卷积层的步幅,而 self.inplanes 表示当前残差块输入的通道数,
# # planes * block.expansion 则表示当前残差块输出的通道数。因此,当 stride 不等于 1 或者 self.inplanes 不等于 planes * block.expansion 时,就需要进行下采样操作
# #该层中除了第一个残差块之外,其他所有残差块的输入通道数和输出通道数都相等,并且具有相同的步幅(都为 1 或者 2)。这些卷积层的输入张量大小不变, 输出张量高宽尺寸会随着残差块的堆叠而逐渐降低
# if stride != 1 or SELF_INPLANES != planes * block.expansion:
# downsample = nn.Sequential(
# nn.Conv2d(SELF_INPLANES, planes * block.expansion,
# kernel_size=1, stride=stride, bias=False),
# nn.BatchNorm2d(planes * block.expansion),
# )
# layers = []
# layers.append(block(SELF_INPLANES, planes, stride, downsample))
# SELF_INPLANES = planes * block.expansion
# for _ in range(1, blocks): # block的个数
# layers.append(block(SELF_INPLANES, planes))
# return nn.Sequential(*layers)
def forward(self,x):
for _, layer in self.blockdict.items():
x = layer(x)
return x
def quantize(self, quant_type, num_bits=8, e_bits=3):
# 需检查
for _, layer in self.blockdict.items():
layer.quantize(quant_type=quant_type,num_bits=num_bits,e_bits=e_bits) # 这里是因为每一块都是block,而block中有具体的quantize策略, n_exp和mode已经在__init__中赋值了
def quantize_forward(self, x):
for _, layer in self.blockdict.items():
x = layer.quantize_forward(x) # 各个block中有具体的quantize_forward
return x
def freeze(self, qinput): # 需要在 Module Resnet的freeze里传出来
# 这里的qconvbnrelu1其实是可以用前一层的qo的,但感觉不太好传参,就没用
# 还需仔细检查
cnt = 0
for _, layer in self.blockdict.items():
if cnt == 0:
qo = layer.freeze(qinput = qinput)
cnt = 1
else:
qo = layer.freeze(qinput = qo) # 各个block中有具体的freeze
return qo # 供后续的层用
def quantize_inference(self, x):
# 感觉是不需要进行初始的quantize_tensor和dequantize_tensor,因为他不是最前/后一层,只要中间的每层都在量化后的领域内,就不需要这种处理。
for _, layer in self.blockdict.items():
x = layer.quantize_inference(x) # 每个block中有具体的quantize_inference
return x
# 使用 ResNet18 模型
def resnet18(**kwargs):
model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
return model
# 使用 ResNet50 模型
def resnet50(**kwargs):
model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
return model
# 使用 ResNet152 模型
def resnet152(**kwargs):
model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
return model
2023.4.10
注:new_mzh中的程序改用了与游昆霖同学统一的度量方式、以及一些量化细节约定,将代码重新建立在游昆霖同学版本的程序上。
在量化BN层的过程中遇到了较多问题,感谢游昆霖同学的帮助:D
程序改动:
为量化ResNet18,在module.py中新增的量化层包括QConvBNReLu层,QConvBN层,QElementwiseAdd层,QAdaptiveAvgPool2d层。在model.py中建立了ResNet18的量化架构,通过class BasicBlock, class Bottleneck, class MakeLayer等保障了ResNet的扩展性,能够较为方便的扩展成ResNet50和152
待完善:
ResNet的网络架构相比于AlexNet,VGG等更加的跳跃,各种MakeLayer, Residual的结构使得其不是一个平铺开来的网络,则过去的很多计算相似度等的算法不能直接适用在ResNet上(直接遍历网络参数时,会有包装在conv,bn等层外面的layer, sequential, block等),关于参数相似度、梯度相似度的分析有待后续研究补充。
QAT方面有待后续补充
下面的实验是关于ResNet18的PTQ结果:(js_flops, js_param等还未修改计算方式,因而暂时未计算,赋值为0)
```
PTQ: INT_2
direct quantization finish
Test set: Quant Model Accuracy: 10.00%
INT_2: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.883599
PTQ: INT_3
direct quantization finish
Test set: Quant Model Accuracy: 10.00%
INT_3: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.883599
PTQ: INT_4
direct quantization finish
Test set: Quant Model Accuracy: 49.76%
INT_4: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.420789
PTQ: INT_5
direct quantization finish
Test set: Quant Model Accuracy: 80.86%
INT_5: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.058782
PTQ: INT_6
direct quantization finish
Test set: Quant Model Accuracy: 84.91%
INT_6: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.011640
PTQ: INT_7
direct quantization finish
Test set: Quant Model Accuracy: 85.60%
INT_7: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.003608
PTQ: INT_8
direct quantization finish
Test set: Quant Model Accuracy: 85.85%
INT_8: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.000698
PTQ: INT_9
direct quantization finish
Test set: Quant Model Accuracy: 85.64%
INT_9: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.003143
PTQ: INT_10
direct quantization finish
Test set: Quant Model Accuracy: 82.81%
INT_10: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.036084
PTQ: INT_11
direct quantization finish
Test set: Quant Model Accuracy: 74.91%
INT_11: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.128041
PTQ: INT_12
direct quantization finish
Test set: Quant Model Accuracy: 56.50%
INT_12: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.342335
PTQ: INT_13
direct quantization finish
Test set: Quant Model Accuracy: 26.25%
INT_13: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.694448
PTQ: INT_14
direct quantization finish
Test set: Quant Model Accuracy: 14.16%
INT_14: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.835176
PTQ: INT_15
direct quantization finish
Test set: Quant Model Accuracy: 11.29%
INT_15: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.868583
PTQ: INT_16
direct quantization finish
Test set: Quant Model Accuracy: 10.25%
INT_16: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.880689
PTQ: POT_2
direct quantization finish
Test set: Quant Model Accuracy: 10.00%
POT_2: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.883599
PTQ: POT_3
direct quantization finish
Test set: Quant Model Accuracy: 10.00%
POT_3: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.883599
PTQ: POT_4
direct quantization finish
Test set: Quant Model Accuracy: 44.75%
POT_4: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.479106
PTQ: POT_5
direct quantization finish
Test set: Quant Model Accuracy: 40.29%
POT_5: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.531021
PTQ: POT_6
direct quantization finish
Test set: Quant Model Accuracy: 50.13%
POT_6: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.416482
PTQ: POT_7
direct quantization finish
Test set: Quant Model Accuracy: 45.75%
POT_7: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.467466
PTQ: POT_8
direct quantization finish
Test set: Quant Model Accuracy: 39.79%
POT_8: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.536841
PTQ: FLOAT_3_E1
direct quantization finish
Test set: Quant Model Accuracy: 9.93%
FLOAT_3_E1: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.884414
PTQ: FLOAT_4_E1
direct quantization finish
Test set: Quant Model Accuracy: 39.63%
FLOAT_4_E1: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.538703
PTQ: FLOAT_4_E2
direct quantization finish
Test set: Quant Model Accuracy: 70.74%
FLOAT_4_E2: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.176580
PTQ: FLOAT_5_E1
direct quantization finish
Test set: Quant Model Accuracy: 65.04%
FLOAT_5_E1: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.242929
PTQ: FLOAT_5_E2
direct quantization finish
Test set: Quant Model Accuracy: 82.65%
FLOAT_5_E2: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.037947
PTQ: FLOAT_5_E3
direct quantization finish
Test set: Quant Model Accuracy: 80.86%
FLOAT_5_E3: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.058782
PTQ: FLOAT_6_E1
direct quantization finish
Test set: Quant Model Accuracy: 74.17%
FLOAT_6_E1: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.136655
PTQ: FLOAT_6_E2
direct quantization finish
Test set: Quant Model Accuracy: 84.28%
FLOAT_6_E2: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.018973
PTQ: FLOAT_6_E3
direct quantization finish
Test set: Quant Model Accuracy: 84.81%
FLOAT_6_E3: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.012804
PTQ: FLOAT_6_E4
direct quantization finish
Test set: Quant Model Accuracy: 78.06%
FLOAT_6_E4: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.091375
PTQ: FLOAT_7_E1
direct quantization finish
Test set: Quant Model Accuracy: 76.20%
FLOAT_7_E1: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.113025
PTQ: FLOAT_7_E2
direct quantization finish
Test set: Quant Model Accuracy: 84.83%
FLOAT_7_E2: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.012571
PTQ: FLOAT_7_E3
direct quantization finish
Test set: Quant Model Accuracy: 85.55%
FLOAT_7_E3: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.004190
PTQ: FLOAT_7_E4
direct quantization finish
Test set: Quant Model Accuracy: 82.00%
FLOAT_7_E4: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.045513
PTQ: FLOAT_7_E5
direct quantization finish
Test set: Quant Model Accuracy: 10.00%
FLOAT_7_E5: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.883599
PTQ: FLOAT_8_E1
direct quantization finish
Test set: Quant Model Accuracy: 77.39%
FLOAT_8_E1: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.099174
PTQ: FLOAT_8_E2
direct quantization finish
Test set: Quant Model Accuracy: 85.21%
FLOAT_8_E2: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.008148
PTQ: FLOAT_8_E3
direct quantization finish
Test set: Quant Model Accuracy: 86.00%
FLOAT_8_E3: js_flops: 0.000000 js_param: 0.000000 acc_loss: -0.001048
PTQ: FLOAT_8_E4
direct quantization finish
Test set: Quant Model Accuracy: 83.26%
FLOAT_8_E4: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.030846
PTQ: FLOAT_8_E5
direct quantization finish
Test set: Quant Model Accuracy: 10.02%
FLOAT_8_E5: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.883366
PTQ: FLOAT_8_E6
direct quantization finish
Test set: Quant Model Accuracy: 13.09%
FLOAT_8_E6: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.847631
```
我在加上正确的QElementwiseAdd层前,PTQ后的acc都不超过15%,足以见到该层的重要性,他是负责残差的相加部分,因为两个层的输出结果是在不同量化范围,所以不能直接相加,而是需要做rescale。
目前看到INT量化随位宽增加而先增大后下降,我查看了量化后的参数分布,其整体趋势与全精度模型是较为相似的,因此问题不在Conv,BN等普通的量化层上,我猜想可能是因为量化位宽较大的时候,QElementwiseAdd做rescale的过程中出现了溢出,还有待后续观察确认。
\ No newline at end of file
# -*- coding: utf-8 -*-
import numpy
import numpy as np
import torch
import sys
from mmd_loss import *
from collections import OrderedDict
d1 = sys.argv[1] # bit
d2 = sys.argv[2] # epoch
# d1=4
# d2=5
sum=0
flag=0
total_quan_list=list()
total_base_list=list()
# CNN FLOPs = Cout * Hout * Wout * (2 * Cin * K * K ) 是考虑bias 否则-1
# FCN FLOPs = Cout * Cin 是考虑bias 否则-1
# 把相关的relu,pool也考虑进去了
# MAdd
# weight0 =np.array( [ 705600.0+4704.0+ 3528.0 , 480000.0+ 1600.0 + 1200.0 , 95880.0 + 120.0,
# 20076.0 + 84.0 , 1670.0 ])
# weight1=np.array([705,600.0 , 480,000.0,+ 95,880.0 ,
# 20,076.0 , 1,670.0 ])
# flops
weight_f0= np.array([357504+4704+4704, 241600+1600+1600,48000+120,10080+84,840])
weight_f1=np.array([357504, 241600,48000,10080,840])
summary_quan_dict=OrderedDict()
summary_base_dict=OrderedDict()
losses=[]
# 最外层:不同epoch的字典 内层:各个网络层的grads
for i in range(int(d2)):
total_quan_list.append(torch.load('./project/p/checkpoint/cifar-10_lenet_bn_quant/' + str(d1) + '/ckpt_cifar-10_lenet_bn_quant_'+str(i+1)+'.pth'))
#total_quan_list.append(torch.load('checkpoint/cifar-10_lenet_bn/full' + '/ckpt_cifar-10_lenet_bn_' + str(d2) + '.pth'))
total_base_list.append(torch.load('./project/p/checkpoint/cifar-10_lenet_bn/full' + '/ckpt_cifar-10_lenet_bn_' + str(i+1) + '.pth'))
for k, _ in total_base_list[i]['grads'].items():
if flag == 0:
summary_quan_dict[k] = total_quan_list[i]['grads'][k].reshape(1,-1)
summary_base_dict[k] = total_base_list[i]['grads'][k].reshape(1,-1)
else :
# 字典里的数据不能直接改,需要重新赋值
a=summary_quan_dict[k]
b=total_quan_list[i]['grads'][k].reshape(1,-1)
c=np.vstack((a,b))
summary_quan_dict[k] = c
a = summary_base_dict[k]
b = total_base_list[i]['grads'][k].reshape(1,-1)
c = np.vstack((a, b))
summary_base_dict[k] = c
flag = 1
cnt = 0
flag = 0
for k, _ in summary_quan_dict.items():
if flag == 0:
sum += 0.99*weight_f1[cnt] * MK_MMD(source=summary_base_dict[k], target=summary_quan_dict[k]) # weight
else:
sum += 0.01*weight_f1[cnt] * MK_MMD(source=summary_base_dict[k], target=summary_quan_dict[k]) #bias
if flag == 1:
cnt = cnt + 1
flag = 0
else:
flag=1
sum=sum/(weight_f0.sum()*2)
print(sum)
f = open('./project/p/lenet_ptq_similarity.txt','a')
f.write('bit:' + str(d1) + ' epoch_num:' + str(d2) +': '+str(sum)+'\n')
f.close()
# for k,v in summary_base_dict.items():
# if k== 'conv_layers.conv1.weight':
# print(v)
# print('===========')
# print(summary_quan_dict[k])
\ No newline at end of file
# -*- coding: utf-8 -*-
import numpy
import numpy as np
import torch
import sys
from collections import OrderedDict
import scipy.stats
import pandas as pd
import os
# 整体思路: 本函数实现的是关于bit的,在不同epoch节点(5, 10, ...) 的梯度分布相似度计算 (考虑到是不同epoch节点,则需要在这一段epoch内取平均相似度?)
# 外界调用: 会用不同的bit分别调用该函数
# csv中每行记录的是该bit量化情况下,不同epoch节点的平均加权梯度分布相似度
#
d1 = sys.argv[1] # bit
d2 = sys.argv[2] # mode
d3 = sys.argv[3] # n_exp
# d2 = sys.argv[2] # epoch
# d1=4
# d2=5
tag = 0
dirpath = './project/p/qat_analysis_data/mode' + str(d2)
if not os.path.isdir(dirpath):
os.makedirs(dirpath, mode=0o777)
os.chmod(dirpath, mode=0o777)
# if int(d2) == 1:
# csvpath = './project/p/qat_analysis_data/wasserstein_distance.csv'
# else:
if int(d2) != 3:
csvpath = './project/p/qat_analysis_data/mode' + str(d2) + '/wasserstein_distance.csv'
else:
csvpath = './project/p/qat_analysis_data/mode' + str(d2) + '/wasserstein_distance_' + str(d3) + '.csv'
# if os.path.exists("./qat_analysis_data/wasserstein_distance.csv"):
if os.path.exists(csvpath):
tag = 1
if tag == 0: # 还没有csv
df = pd.DataFrame()
else: # 已有csv
# df = pd.read_csv("./qat_analysis_data/wasserstein_distance.csv", index_col=0)
df = pd.read_csv(csvpath, index_col=0)
df2 = pd.DataFrame()
# CNN FLOPs = Cout * Hout * Wout * (2 * Cin * K * K ) 是考虑bias 否则-1
# FCN FLOPs = Cout * Cin 是考虑bias 否则-1
# 把相关的relu,pool也考虑进去了
# MAdd
# weight0 =np.array( [ 705600.0+4704.0+ 3528.0 , 480000.0+ 1600.0 + 1200.0 , 95880.0 + 120.0,
# 20076.0 + 84.0 , 1670.0 ])
# weight1=np.array([705,600.0 , 480,000.0,+ 95,880.0 ,
# 20,076.0 , 1,670.0 ])
# flops
weight_f0= np.array([357504+4704+4704, 241600+1600+1600,48000+120,10080+84,840])
weight_f1=np.array([357504, 241600,48000,10080,840])
# 对不同的epoch节点
for epoch in [5, 10, 15, 20, 25, 30]:
total_quan_list = []
total_base_list = []
summary_quan_dict = OrderedDict()
summary_base_dict = OrderedDict()
flag = 0
result = 0
# 最外层:不同epoch的字典 内层:各个网络层的grads
# 遍历epoch节点内的epoch,收集梯度信息
for i in range(epoch):
if int(d2) == 1:
total_quan_list.append(torch.load(
'./project/p/checkpoint/cifar-10_lenet_bn_quant/scratch/' + str(d1) + '/ckpt_cifar-10_lenet_bn_quant_' + str(
i + 1) + '.pth'))
elif int(d2) == 2:
total_quan_list.append(torch.load(
'./project/p/checkpoint/cifar-10_lenet_bn_quant/scratch/mode' + str(d2) + '/' + str(d1)+ '/ckpt_cifar-10_lenet_bn_quant_' + str(
epoch) + '.pth'))
else:
total_quan_list.append(torch.load(
'./project/p/checkpoint/cifar-10_lenet_bn_quant/scratch/mode' + str(d2) + '_' + str(d3) + '/' + str(d1)+ '/ckpt_cifar-10_lenet_bn_quant_' + str(
epoch) + '.pth'))
# total_quan_list.append(torch.load('checkpoint/cifar-10_lenet_bn/full' + '/ckpt_cifar-10_lenet_bn_' + str(d2) + '.pth'))
# full的数据数不够
total_base_list.append(
torch.load('./project/p/checkpoint/cifar-10_lenet_bn/full' + '/ckpt_cifar-10_lenet_bn_' + str(i + 1) + '.pth'))
for k, _ in total_base_list[i]['grads'].items(): # 得到每个epoch i 的各个层的梯度
if flag == 0: # 读的第一个epoch i 要新建立个数据矩阵的第一行,后续的epoch i都是在这行的基础上向下拓展
summary_quan_dict[k] = total_quan_list[i]['grads'][k].reshape(1, -1)
summary_base_dict[k] = total_base_list[i]['grads'][k].reshape(1, -1)
else:
# 字典里的数据不能直接改,需要重新赋值
a = summary_quan_dict[k]
b = total_quan_list[i]['grads'][k].reshape(1, -1)
c = np.vstack((a, b))
summary_quan_dict[k] = c
a = summary_base_dict[k]
b = total_base_list[i]['grads'][k].reshape(1, -1)
c = np.vstack((a, b))
summary_base_dict[k] = c
flag = 1
# loss = total_quan_list[i]['losses']
# print(loss)
# df = pd.read_csv('./data_analysis_folder/data.csv', index_col=0)
# # df = pd.DataFrame()
# df2 = pd.DataFrame()
# 上面是在收集数据,下面才是求和
for j in range(epoch):
flag0 = 0 # 各个layer的weight和bias
cnt = 0 # 依次遍历各个layer
sum = 0 # sum只是对一个epoch j 的加权梯度分布相似度记录
for k, _ in summary_quan_dict.items():
w = summary_base_dict[k][j, :] # 这里不合适 要改造
v = summary_quan_dict[k][j, :]
if flag0 == 0:
cur_weight = weight_f1[cnt] * scipy.stats.wasserstein_distance(w, v) # weight
# 不是很方便存 需要三维了(sheet)
# if tag == 1:
# df2[k] = [cur_weight]
# else:
# df[k] = [cur_weight]
sum += 0.99 * cur_weight
else:
cur_bias = weight_f1[cnt] * scipy.stats.wasserstein_distance(w, v) # bias
# if tag == 1:
# df2[k] = [cur_bias]
# else:
# df[k] = [cur_bias]
sum += 0.01 * cur_bias
if flag0 == 1:
cnt = cnt + 1
flag0 = 0
else:
flag0 = 1
sum = sum / (weight_f1.sum() * 2)
result += sum # 对各个epoch i的加权梯度相似度求和
print(sum)
result /= epoch # 对epoch节点阶段内的梯度相似度求平均
if tag == 1:
df2[str(epoch)] = [result]
else :
df[str(epoch)] = [result]
result = 0
if tag == 1 :
df = df.append(df2)
# df.to_csv('./qat_analysis_data/wasserstein_distance.csv')
df.to_csv(csvpath)
else :
# df.to_csv('./qat_analysis_data/wasserstein_distance.csv')
df.to_csv(csvpath)
# f = open('lenet_ptq_wasserstein_similarity.txt','a')
# f.write('bit:' + str(d1) + ' epoch_num:' + str(d2) +': '+str(sum)+'\n')
# f.close()
# -*- coding: utf-8 -*-
import numpy
import numpy as np
import torch
import sys
from collections import OrderedDict
import scipy.stats
import pandas as pd
from model import *
# from audtorch.metrics.functional import pearsonr
import math
# 该函数用于读出全精度、量化模型的weight和bias值,以作观察
if __name__ == "__main__":
d1 = sys.argv[1]
# d2 = sys.argv[2]
# d1=8
# df = pd.read_csv('./ptq_analysis_data/seperate_data.csv', index_col=0)
df = pd.DataFrame()
# df2 = pd.DataFrame()
base_data = torch.load('./project/p/ckpt/trail/model_trail.pt')
# checkpoint_data = torch.load('./project/p/ckpt/trail/model_trail.pt')
print('full_precision weight/bias loaded!')
checkpoint_dir = './project/p/checkpoint/cifar-10_trail_model'
# quan_data = torch.load('ckpt/cifar-10_lenet_bn_ptq_' + str(d1) + '_.pt')
# print('quantization bit ' + str(d1) + ' weight/bias loaded!')
sum=0
if int(d1) == 1:
print(base_data)
# for k, _ in base_data.items():
# base_data[k] = base_data[k].reshape(1, -1)
# # quan_data[k] = quan_data[k].reshape(1, -1)
# print(base_data[k])
else:
for i in [4,9,14,19]:
check_data = torch.load(checkpoint_dir + '/ckpt_cifar-10_trail_model%s.pt' % (str(i)))
print(check_data)
# if int(d2) == 1:
# print(base_data[k])
# else:
# print(quan_data[k])
# -*- coding: utf-8 -*-
from torch.autograd import Function
class FakeQuantize(Function):
@staticmethod
def forward(ctx, x, qparam): # 有qparam i.e. self 中记录的mode、scale、zeropoint、n_exp等信息,其实不用再额外传参
x = qparam.quantize_tensor(x, qparam.mode) # INT
x = qparam.dequantize_tensor(x, qparam.mode) # FP(int)
return x
@staticmethod
def backward(ctx, grad_output): # 用线性粗略近似 STE
return grad_output, None
\ No newline at end of file
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from collections import OrderedDict
def get_model_histogram(model):
"""
Description:
- get norm gradients from model, and store in a OrderDict
Args:
- model: (torch.nn.Module), torch model
Returns:
- grads in OrderDict
"""
gradshisto = OrderedDict()
grads = OrderedDict()
for name, params in model.named_parameters():
grad = params.grad
if grad is not None:
tmp = {}
params_np = grad.cpu().numpy()
histogram, bins = np.histogram(params_np.flatten(),bins=20)
tmp['histogram'] = list(histogram)
tmp['bins'] = list(bins)
gradshisto[name] = tmp
grads[name] = params_np
return gradshisto,grads
def get_model_norm_gradient(model):
"""
Description:
- get norm gradients from model, and store in a OrderDict
Args:
- model: (torch.nn.Module), torch model
Returns:
- grads in OrderDict
"""
grads = OrderedDict()
for name, params in model.named_parameters():
grad = params.grad
if grad is not None:
grads[name] = grad.norm().item()
return grads
def get_grad_histogram(grads_sum):
gradshisto = OrderedDict()
# grads = OrderedDict()
for name, params in grads_sum.items():
grad = params
if grad is not None:
tmp = {}
#params_np = grad.cpu().numpy()
params_np = grad
histogram, bins = np.histogram(params_np.flatten(),bins=20)
tmp['histogram'] = list(histogram)
tmp['bins'] = list(bins)
gradshisto[name] = tmp #每层一个histogram (tmp中的是描述直方图的信息)
# grads[name] = params_np
return gradshisto
\ No newline at end of file
# -*- coding: utf-8 -*-
import numpy
import numpy as np
import torch
import sys
from collections import OrderedDict
import scipy.stats
import pandas as pd
import os
import os.path
#
d1 = sys.argv[1] # bit
d2 = sys.argv[2] # mode
d3 = sys.argv[3] # n_exp
# d1=2
# d2 = sys.argv[2] # epoch
# d1=2
# d2=3
sum=0
flag=0
# CNN FLOPs = Cout * Hout * Wout * (2 * Cin * K * K ) 是考虑bias 否则-1
# FCN FLOPs = Cout * Cin 是考虑bias 否则-1
# 把相关的relu,pool也考虑进去了
# MAdd
# weight0 =np.array( [ 705600.0+4704.0+ 3528.0 , 480000.0+ 1600.0 + 1200.0 , 95880.0 + 120.0,
# 20076.0 + 84.0 , 1670.0 ])
# weight1=np.array([705,600.0 , 480,000.0,+ 95,880.0 ,
# 20,076.0 , 1,670.0 ])
# flops
weight_f0= np.array([357504+4704+4704, 241600+1600+1600,48000+120,10080+84,840])
weight_f1=np.array([357504, 241600,48000,10080,840])
summary_quan_dict=OrderedDict()
summary_base_dict=OrderedDict()
# 最外层:不同epoch的字典 内层:各个网络层的grads
flag = 0
dirpath = './project/p/qat_analysis_data/mode' + str(d2)
if not os.path.isdir(dirpath):
os.makedirs(dirpath, mode=0o777)
os.chmod(dirpath, mode=0o777)
if int(d2) == 1 or int(d2) == 2:
csvpath = dirpath + '/scratch_loss.csv'
else:
csvpath = dirpath + '/scratch_loss_' + str(d3) + '.csv'
if os.path.exists(csvpath):
flag = 1
if flag == 0: # 还没有csv
df = pd.DataFrame()
else: # 已有csv
df = pd.read_csv(csvpath, index_col=0)
df2 = pd.DataFrame()
for epoch in ([5, 10, 15, 20, 25, 30]):
sums = []
total_quan_list = []
total_base_list = []
for i in range(int(epoch)):
if int(d2) == 1:
total_quan_list.append(torch.load(
'./project/p/checkpoint/cifar-10_lenet_bn_quant/scratch/' + str(d1) + '/ckpt_cifar-10_lenet_bn_quant_' + str(
i + 1) + '.pth'))
elif int(d2) == 2:
total_quan_list.append(torch.load(
'./project/p/checkpoint/cifar-10_lenet_bn_quant/scratch/mode' + str(d2) + '/' + str(
d1) + '/ckpt_cifar-10_lenet_bn_quant_' + str(
i + 1) + '.pth'))
else:
total_quan_list.append(torch.load(
'./project/p/checkpoint/cifar-10_lenet_bn_quant/scratch/mode' + str(d2) + '_' + str(d3) + '/' + str(
d1) + '/ckpt_cifar-10_lenet_bn_quant_' + str(
i + 1) + '.pth'))
sum_loss = 0
loss = total_quan_list[i]['losses']
# print(len(loss))
# 每个epoch的不同batch的
for j in range(len(loss)):
sum_loss += loss[j].cpu()
# print(sum_loss)
sum_loss /= j
sums.append(sum_loss)
# print(sums)
#print(sums[0] - sums[int(d1) - 1])
if flag == 0:
df[str(epoch)] = [(sums[0] - sums[int(epoch) - 1]).detach().numpy()]
else:
df2[str(epoch)] = [(sums[0] - sums[int(epoch) - 1]).detach().numpy()]
if flag == 0:
# df.to_csv('./qat_analysis_data/scratch_loss.csv')
df.to_csv(csvpath)
else:
df = df.append(df2)
# df.to_csv('./qat_analysis_data/scratch_loss.csv')
df.to_csv(csvpath)
\ No newline at end of file
# -*- coding: utf-8 -*-
import torch
import torch.nn as nn
import torch.nn.functional as F
from module import *
# class VGG_19(nn.Module):
# def __init__(self, img_size=32, input_channel=3, num_class=10):
# super().__init__()
# self.conv_param_layer_name = (
# 'conv1_1', 'relu1_1', 'conv1_2', 'bn1_1', 'relu1_2', 'pool1',
# 'conv2_1', 'bn2_1', 'relu2_1', 'conv2_2', 'bn2_2', 'relu2_2', 'pool2',
# 'conv3_1', 'bn3_1', 'relu3_1', 'conv3_2', 'bn3_2', 'relu3_2', 'conv3_3', 'bn3_3', 'relu3_3', 'conv3_4',
# 'bn3_4', 'relu3_4', 'pool3',
# 'conv4_1', 'bn4_1', 'relu4_1', 'conv4_2', 'bn4_2', 'relu4_2', 'conv4_3', 'bn4_3', 'relu4_3', 'conv4_4',
# 'bn4_4', 'relu4_4', 'pool4',
# 'conv5_1', 'bn5_1', 'relu5_1', 'conv5_2', 'bn5_2', 'relu5_2', 'conv5_3', 'bn5_3', 'relu5_3', 'conv5_4',
# 'bn5_4', 'relu5_4', 'pool5'
# )
# self.fc_param_layer_name = (
# 'fc1','relu1','drop1','fc2','relu2','drop2','fc3'
# )
# self.conv_layers = nn.ModuleDict({
# # block1
# 'conv1_1': nn.Conv2d(in_channels=3, out_channels=64, kernel_size=(3, 3), stride=(1, 1), padding=1),
# 'relu1_1': nn.ReLU(),
# 'conv1_2': nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3, 3), stride=(1, 1), padding=1),
# 'bn1_1': nn.BatchNorm2d(num_features=64),
# 'relu1_2': nn.ReLU(),
# 'pool1': nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),
# # block2
# 'conv2_1': nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(3, 3), stride=(1, 1), padding=1),
# 'bn2_1': nn.BatchNorm2d(num_features=128),
# 'relu2_1': nn.ReLU(),
# 'conv2_2': nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3, 3), stride=(1, 1), padding=1),
# 'bn2_2': nn.BatchNorm2d(num_features=128),
# 'relu2_2': nn.ReLU(),
# 'pool2': nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),
# # block3
# 'conv3_1': nn.Conv2d(in_channels=128, out_channels=256, kernel_size=(3, 3), stride=(1, 1), padding=1),
# 'bn3_1': nn.BatchNorm2d(num_features=256),
# 'relu3_1': nn.ReLU(),
# 'conv3_2': nn.Conv2d(in_channels=256, out_channels=256, kernel_size=(3, 3), stride=(1, 1), padding=1),
# 'bn3_2':nn.BatchNorm2d(num_features=256),
# 'relu3_2': nn.ReLU(),
# 'conv3_3': nn.Conv2d(in_channels=256, out_channels=256, kernel_size=(3, 3), stride=(1, 1), padding=1),
# 'bn3_3': nn.BatchNorm2d(num_features=256),
# 'relu3_3': nn.ReLU(),
# 'conv3_4': nn.Conv2d(in_channels=256, out_channels=256, kernel_size=(3, 3), stride=(1, 1), padding=1),
# 'bn3_4': nn.BatchNorm2d(num_features=256),
# 'relu3_4': nn.ReLU(),
# 'pool3': nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),
# # block4
# 'conv4_1': nn.Conv2d(in_channels=256, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=1),
# 'bn4_1': nn.BatchNorm2d(num_features=512),
# 'relu4_1': nn.ReLU(),
# 'conv4_2': nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=1),
# 'bn4_2': nn.BatchNorm2d(num_features=512),
# 'relu4_2': nn.ReLU(),
# 'conv4_3': nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=1),
# 'bn4_3': nn.BatchNorm2d(num_features=512),
# 'relu4_3': nn.ReLU(),
# 'conv4_4': nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=1),
# 'bn4_4': nn.BatchNorm2d(num_features=512),
# 'relu4_4': nn.ReLU(),
# 'pool4': nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),
# # block5
# 'conv5_1': nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=1),
# 'bn5_1': nn.BatchNorm2d(num_features=512),
# 'relu5_1': nn.ReLU(),
# 'conv5_2': nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=1),
# 'bn5_2': nn.BatchNorm2d(num_features=512),
# 'relu5_2': nn.ReLU(),
# 'conv5_3': nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=1),
# 'bn5_3': nn.BatchNorm2d(num_features=512),
# 'relu5_3': nn.ReLU(),
# 'conv5_4': nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=1),
# 'bn5_4': nn.BatchNorm2d(num_features=512),
# 'relu5_4': nn.ReLU(),
# 'pool5': nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
# })
# self.fc_layers = nn.ModuleDict({
# # classifier
# 'fc1': nn.Linear(512 * (int)(img_size * img_size / 32 / 32), 4096),
# 'relu1': nn.ReLU(),
# 'drop1': nn.Dropout(0.5),
# 'fc2': nn.Linear(4096, 4096),
# 'relu2': nn.ReLU(),
# 'drop2': nn.Dropout(0.5),
# 'fc3': nn.Linear(4096, num_class)
# })
# def forward(self,x):
# for _,layer in self.conv_layers.items():
# x = layer(x)
# output = x.view(x.size()[0], -1)
# for _,layer in self.fc_layers.items():
# output = layer(output)
# out = F.softmax(output,dim = 1) # 这里不softmax也行 影响不大
# return out
# def quantize(self, num_bits=8):
# self.quantize_conv_layers=nn.ModuleDict({
# # qi=true: 前一层输出的结果是没有量化过的,需要量化。 maxpool和relu都不会影响INT和minmax,所以在这俩之后的层的pi是false
# #若前一层是conv,数据minmax被改变,则需要qi=true来量化
# 'qconv1_1': QConv2d(self.conv_layers['conv1_1'], qi=True, qo=True, num_bits=num_bits),
# 'qrelu1_1': QReLU(),
# 'qconvbnrelu1_1': QConvBNReLU(self.conv_layers['conv1_2'],self.conv_layers['bn1_1'],qi=False,qo=True,num_bits=num_bits),
# 'qpool1': QMaxPooling2d(kernel_size=2,stride=2,padding=0),
# # block2
# 'qconvbnrelu2_1': QConvBNReLU(self.conv_layers['conv2_1'], self.conv_layers['bn2_1'], qi=False, qo=True, num_bits=num_bits),
# 'qconvbnrelu2_2': QConvBNReLU(self.conv_layers['conv2_2'], self.conv_layers['bn2_2'], qi=False, qo=True, num_bits=num_bits),
# 'qpool2': QMaxPooling2d(kernel_size=2,stride=2,padding=0),
# # block3
# 'qconvbnrelu3_1': QConvBNReLU(self.conv_layers['conv3_1'], self.conv_layers['bn3_1'], qi=False, qo=True,
# num_bits=num_bits),
# 'qconvbnrelu3_2': QConvBNReLU(self.conv_layers['conv3_2'], self.conv_layers['bn3_2'], qi=False, qo=True,
# num_bits=num_bits),
# 'qconvbnrelu3_3': QConvBNReLU(self.conv_layers['conv3_3'], self.conv_layers['bn3_3'], qi=False, qo=True,
# num_bits=num_bits),
# 'qconvbnrelu3_4': QConvBNReLU(self.conv_layers['conv3_4'], self.conv_layers['bn3_4'], qi=False, qo=True,
# num_bits=num_bits),
# 'qpool3': QMaxPooling2d(kernel_size=2,stride=2,padding=0),
# # block4
# 'qconvbnrelu4_1': QConvBNReLU(self.conv_layers['conv4_1'], self.conv_layers['bn4_1'], qi=False, qo=True,
# num_bits=num_bits),
# 'qconvbnrelu4_2': QConvBNReLU(self.conv_layers['conv4_2'], self.conv_layers['bn4_2'], qi=False, qo=True,
# num_bits=num_bits),
# 'qconvbnrelu4_3': QConvBNReLU(self.conv_layers['conv4_3'], self.conv_layers['bn4_3'], qi=False, qo=True,
# num_bits=num_bits),
# 'qconvbnrelu4_4': QConvBNReLU(self.conv_layers['conv4_4'], self.conv_layers['bn4_4'], qi=False, qo=True,
# num_bits=num_bits),
# 'qpool4': QMaxPooling2d(kernel_size=2,stride=2,padding=0),
# # block5
# 'qconvbnrelu5_1': QConvBNReLU(self.conv_layers['conv5_1'], self.conv_layers['bn5_1'], qi=False, qo=True,
# num_bits=num_bits),
# 'qconvbnrelu5_2': QConvBNReLU(self.conv_layers['conv5_2'], self.conv_layers['bn5_2'], qi=False, qo=True,
# num_bits=num_bits),
# 'qconvbnrelu5_3': QConvBNReLU(self.conv_layers['conv5_3'], self.conv_layers['bn5_3'], qi=False, qo=True,
# num_bits=num_bits),
# 'qconvbnrelu5_4': QConvBNReLU(self.conv_layers['conv5_4'], self.conv_layers['bn5_4'], qi=False, qo=True,
# num_bits=num_bits),
# 'qpool5': QMaxPooling2d(kernel_size=2,stride=2,padding=0)
# })
# self.quantize_fc_layers = nn.ModuleDict({
# 'qfc1': QLinear(self.fc_layers['fc1'],qi=False,qo=True,num_bits=num_bits),
# 'qrelu1': QReLU(),
# 'qdrop1': nn.Dropout(0.5),
# 'qfc2': QLinear(self.fc_layers['fc2'],qi=False,qo=True,num_bits=num_bits),
# 'qrelu2': QReLU(),
# 'qdrop2': nn.Dropout(0.5),
# 'qfc3': QLinear(self.fc_layers['fc3'],qi=False,qo=True,num_bits=num_bits)
# })
# def quantize_forward(self, x):
# for _, layer in self.quantize_conv_layers.items():
# x = layer(x)
# output = x.view(x.size()[0],-1)
# for s, layer in self.quantize_fc_layers.items():
# # if (s=='qrelu1') == True or (s=='qrelu2')==True:
# # output = nn.Dropout(0.5)
# # else:
# output = layer(output)
# out = F.softmax(output, dim=1) # 这里不softmax也行 影响不大 算loss用
# return out
# def freeze(self):
# self.quantize_conv_layers['qconv1_1'].freeze()
# self.quantize_conv_layers['qrelu1_1'].freeze(self.quantize_conv_layers['qconv1_1'].qo)
# self.quantize_conv_layers['qconvbnrelu1_1'].freeze(qi=self.quantize_conv_layers['qconv1_1'].qo)
# #self.quantize_conv_layers['qconvbnrelu1_1'].freeze(qi=self.quantize_conv_layers['qrelu1_1'].qo)
# self.quantize_conv_layers['qpool1'].freeze(self.quantize_conv_layers['qconvbnrelu1_1'].qo)
# self.quantize_conv_layers['qconvbnrelu2_1'].freeze(qi=self.quantize_conv_layers['qconvbnrelu1_1'].qo)
# self.quantize_conv_layers['qconvbnrelu2_2'].freeze(qi=self.quantize_conv_layers['qconvbnrelu2_1'].qo)
# self.quantize_conv_layers['qpool2'].freeze(self.quantize_conv_layers['qconvbnrelu2_2'].qo)
# self.quantize_conv_layers['qconvbnrelu3_1'].freeze(qi=self.quantize_conv_layers['qconvbnrelu2_2'].qo)
# #self.quantize_conv_layers['qconvbnrelu3_1'].freeze(qi=self.quantize_conv_layers['qpool2'].qo)
# self.quantize_conv_layers['qconvbnrelu3_2'].freeze(qi=self.quantize_conv_layers['qconvbnrelu3_1'].qo)
# self.quantize_conv_layers['qconvbnrelu3_3'].freeze(qi=self.quantize_conv_layers['qconvbnrelu3_2'].qo)
# self.quantize_conv_layers['qconvbnrelu3_4'].freeze(qi=self.quantize_conv_layers['qconvbnrelu3_3'].qo)
# self.quantize_conv_layers['qpool3'].freeze(self.quantize_conv_layers['qconvbnrelu3_4'].qo)
# self.quantize_conv_layers['qconvbnrelu4_1'].freeze(qi=self.quantize_conv_layers['qconvbnrelu3_4'].qo)
# #self.quantize_conv_layers['qconvbnrelu4_1'].freeze(qi=self.quantize_conv_layers['qpool3'].qo)
# self.quantize_conv_layers['qconvbnrelu4_2'].freeze(qi=self.quantize_conv_layers['qconvbnrelu4_1'].qo)
# self.quantize_conv_layers['qconvbnrelu4_3'].freeze(qi=self.quantize_conv_layers['qconvbnrelu4_2'].qo)
# self.quantize_conv_layers['qconvbnrelu4_4'].freeze(qi=self.quantize_conv_layers['qconvbnrelu4_3'].qo)
# self.quantize_conv_layers['qpool4'].freeze(self.quantize_conv_layers['qconvbnrelu4_4'].qo)
# self.quantize_conv_layers['qconvbnrelu5_1'].freeze(qi=self.quantize_conv_layers['qconvbnrelu4_4'].qo)
# #self.quantize_conv_layers['qconvbnrelu5_1'].freeze(qi=self.quantize_conv_layers['qpool4'].qo)
# self.quantize_conv_layers['qconvbnrelu5_2'].freeze(qi=self.quantize_conv_layers['qconvbnrelu5_1'].qo)
# self.quantize_conv_layers['qconvbnrelu5_3'].freeze(qi=self.quantize_conv_layers['qconvbnrelu5_2'].qo)
# self.quantize_conv_layers['qconvbnrelu5_4'].freeze(qi=self.quantize_conv_layers['qconvbnrelu5_3'].qo)
# self.quantize_conv_layers['qpool5'].freeze(self.quantize_conv_layers['qconvbnrelu5_4'].qo)
# self.quantize_fc_layers['qfc1'].freeze(qi=self.quantize_conv_layers['qconvbnrelu5_4'].qo)
# #self.quantize_fc_layers['qfc1'].freeze(qi=self.quantize_conv_layers['qpool5'].qo)
# self.quantize_fc_layers['qrelu1'].freeze(self.quantize_fc_layers['qfc1'].qo)
# self.quantize_fc_layers['qfc2'].freeze(qi=self.quantize_fc_layers['qfc1'].qo)
# #self.quantize_fc_layers['qfc2'].freeze(qi=self.quantize_fc_layers['qrelu1'].qo)
# self.quantize_fc_layers['qrelu2'].freeze(self.quantize_fc_layers['qfc2'].qo)
# self.quantize_fc_layers['qfc3'].freeze(qi=self.quantize_fc_layers['qfc2'].qo)
# #self.quantize_fc_layers['qfc3'].freeze(qi=self.quantize_fc_layers['qrelu2'].qo)
# def quantize_inference(self, x):
# x = self.quantize_conv_layers['qconv1_1'].qi.quantize_tensor(x)
# for s, layer in self.quantize_conv_layers.items():
# x=layer.quantize_inference(x)
# output = x.view(x.size()[0], -1)
# for s, layer in self.quantize_fc_layers.items():
# # elif (s == 'qrelu1') == True or (s == 'qrelu2') == True:
# # output = nn.Dropout(0.5)
# # if (s == 'qdrop1')==True or (s=='qdrop2')==True:
# # output = F.dropout(output,0.45)
# # else:
# if ((s == 'qdrop1') == False ) and ((s == 'qdrop2') == False):
# output = layer.quantize_inference(output)
# else:
# output = output
# output = self.quantize_fc_layers['qfc3'].qo.dequantize_tensor(output)
# out = F.softmax(output, dim=1) # 这里应该用 Qsoftmax可能好些 之后改改
# return out
class LeNet(nn.Module):
# CONV FLOPs: 考虑bias:(2 * C_in * K_h * K_w )* H_out * W_out * C_out
# 不考虑bias: (2 * C_in * K_h * K_w -1)* H_out * W_out * C_out
# FCN FLOPs: 考虑bias: (2 * I )* O
# 不考虑bias: (2 * I - 1) * O
def __init__(self, img_size=32, input_channel=3, num_class=10, n_exp=4, mode=1):
super().__init__()
self.conv_layers = nn.ModuleDict({
# block1
'conv1': nn.Conv2d(3,6,5), # (2*3*5*5) * 32*32*6 (bias占其中的32*32*6) 6144/921600
'bn1': nn.BatchNorm2d(6),
'reluc1': nn.ReLU(),
'pool1': nn.MaxPool2d(2,2),
# block2
'conv2': nn.Conv2d(6,16,5), # (2*6*5*5) * 16*16*16 (bias占其中的16*16*6) 1536/1228800
'bn2': nn.BatchNorm2d(16),
'reluc2': nn.ReLU(),
'pool2': nn.MaxPool2d(2,2),
})
self.fc_layers = nn.ModuleDict({
# classifier
'fc1': nn.Linear(16*5*5,120), # (2*16*5*5)*120 (bias占其中的120) 120/96000
'reluf1': nn.ReLU(),
'fc2': nn.Linear(120,84), # (2*120)*84 (bias占其中的84) 84/2016
'reluf2': nn.ReLU(),
'fc3': nn.Linear(84, num_class)
})
self.mode = mode
self.n_exp = n_exp
def forward(self,x):
for _,layer in self.conv_layers.items():
x = layer(x)
output = x.view(-1,16*5*5)
for _,layer in self.fc_layers.items():
output = layer(output)
out = F.softmax(output,dim = 1) # 这里不softmax也行 影响不大
return out
def quantize(self, num_bits=8):
self.quantize_conv_layers=nn.ModuleDict({
# qi=true: 前一层输出的结果是没有量化过的,需要量化。 maxpool和relu都不会影响INT和minmax,所以在这俩之后的层的pi是false
#若前一层是conv,数据minmax被改变,则需要qi=true来量化
# 'qconv1': QConv2d(self.conv_layers['conv1'], qi=True, qo=True, num_bits=num_bits, n_exp=self.n_exp, mode=self.mode),
# 'qreluc1': QReLU(n_exp=self.n_exp, mode=self.mode),
'qconvbnrelu1': QConvBNReLU(self.conv_layers['conv1'],self.conv_layers['bn1'],qi=True,qo=True,num_bits=num_bits,n_exp=self.n_exp,mode=self.mode),
'qpool1': QMaxPooling2d(kernel_size=2,stride=2,padding=0, n_exp=self.n_exp, mode=self.mode),
# 'qconv2': QConv2d(self.conv_layers['conv2'], qi=False, qo=True, num_bits=num_bits, n_exp=self.n_exp, mode=self.mode),
# 'qreluc2': QReLU(n_exp=self.n_exp, mode=self.mode),
'qconvbnrelu1': QConvBNReLU(self.conv_layers['conv2'],self.conv_layers['bn2'],qi=True,qo=True,num_bits=num_bits,n_exp=self.n_exp,mode=self.mode),
'qpool2': QMaxPooling2d(kernel_size=2, stride=2, padding=0, n_exp=self.n_exp, mode=self.mode)
})
self.quantize_fc_layers = nn.ModuleDict({
'qfc1': QLinear(self.fc_layers['fc1'],qi=False,qo=True,num_bits=num_bits, n_exp=self.n_exp, mode=self.mode),
'qreluf1': QReLU(n_exp=self.n_exp, mode=self.mode),
'qfc2': QLinear(self.fc_layers['fc2'],qi=False,qo=True,num_bits=num_bits, n_exp=self.n_exp, mode=self.mode),
'qreluf2': QReLU(n_exp=self.n_exp, mode=self.mode),
'qfc3': QLinear(self.fc_layers['fc3'],qi=False,qo=True,num_bits=num_bits, n_exp=self.n_exp, mode=self.mode)
})
def quantize_forward(self, x):
for _, layer in self.quantize_conv_layers.items():
x = layer(x)
output = x.view(-1,16*5*5)
for s, layer in self.quantize_fc_layers.items():
output = layer(output)
out = F.softmax(output, dim=1) # 这里不softmax也行 影响不大 算loss用
return out
def freeze(self):
# self.quantize_conv_layers['qconv1'].freeze()
# self.quantize_conv_layers['qreluc1'].freeze(self.quantize_conv_layers['qconv1'].qo)
self.quantize_conv_layers['qconvbnrelu1'].freeze()
#self.quantize_conv_layers['qpool1'].freeze(self.quantize_conv_layers['qconv1'].qo)
self.quantize_conv_layers['qpool1'].freeze(self.quantize_conv_layers['qconvbnrelu1'].qo)
# self.quantize_conv_layers['qconv2'].freeze(self.quantize_conv_layers['qconv1'].qo)
# self.quantize_conv_layers['qreluc2'].freeze(self.quantize_conv_layers['qconv2'].qo)
self.quantize_conv_layers['qconvbnrelu2'].freeze()
# self.quantize_conv_layers['qpool2'].freeze(self.quantize_conv_layers['qconv2'].qo)
self.quantize_conv_layers['qpool2'].freeze(self.quantize_conv_layers['qconvbnrelu2'].qo)
# self.quantize_fc_layers['qfc1'].freeze(qi=self.quantize_conv_layers['qconv2'].qo)
self.quantize_fc_layers['qfc1'].freeze(qi=self.quantize_conv_layers['qconvbnrelu2'].qo)
self.quantize_fc_layers['qreluf1'].freeze(self.quantize_fc_layers['qfc1'].qo)
self.quantize_fc_layers['qfc2'].freeze(qi=self.quantize_fc_layers['qfc1'].qo)
self.quantize_fc_layers['qreluf2'].freeze(self.quantize_fc_layers['qfc2'].qo)
self.quantize_fc_layers['qfc3'].freeze(qi=self.quantize_fc_layers['qfc2'].qo)
def fakefreeze(self):
# self.quantize_conv_layers['qconv1'].fakefreeze()
# self.quantize_conv_layers['qreluc1'].fakefreeze(self.quantize_conv_layers['qconv1'].qo)
# self.quantize_conv_layers['qpool1'].fakefreeze(self.quantize_conv_layers['qconv1'].qo)
self.quantize_conv_layers['qconvbnrelu1'].fakefreeze()
self.quantize_conv_layers['qpool1'].fakefreeze(self.quantize_conv_layers['qconvbnrelu1'].qo)
# self.quantize_conv_layers['qconv2'].fakefreeze(self.quantize_conv_layers['qconv1'].qo)
# self.quantize_conv_layers['qreluc2'].fakefreeze(self.quantize_conv_layers['qconv2'].qo)
# self.quantize_conv_layers['qpool2'].fakefreeze(self.quantize_conv_layers['qconv2'].qo)
self.quantize_conv_layers['qconvbnrelu2'].fakefreeze()
self.quantize_conv_layers['qpool2'].fakefreeze(self.quantize_conv_layers['qconvbnrelu2'].qo)
# self.quantize_fc_layers['qfc1'].fakefreeze(qi=self.quantize_conv_layers['qconv2'].qo)
self.quantize_fc_layers['qfc1'].fakefreeze(qi=self.quantize_conv_layers['qconvbnrelu2'].qo)
self.quantize_fc_layers['qreluf1'].fakefreeze(self.quantize_fc_layers['qfc1'].qo)
self.quantize_fc_layers['qfc2'].fakefreeze(qi=self.quantize_fc_layers['qfc1'].qo)
self.quantize_fc_layers['qreluf2'].fakefreeze(self.quantize_fc_layers['qfc2'].qo)
self.quantize_fc_layers['qfc3'].fakefreeze(qi=self.quantize_fc_layers['qfc2'].qo)
def quantize_inference(self, x):
# x = self.quantize_conv_layers['qconv1'].qi.quantize_tensor(x, self.mode)
x = self.quantize_conv_layers['qconvbnrelu1'].qi.quantize_tensor(x, self.mode)
for s, layer in self.quantize_conv_layers.items():
x = layer.quantize_inference(x)
output = x.view( -1,16*5*5)
for s, layer in self.quantize_fc_layers.items():
output = layer.quantize_inference(output)
# 只有mode1需要出现范围映射,将量化后的数据恢复到原数据相似的范围,PoT无需,其自带恢复性
if self.mode == 1:
output = self.quantize_fc_layers['qfc3'].qo.dequantize_tensor(output, self.mode)
out = F.softmax(output, dim=1) # 这里应该用 Qsoftmax可能好些 之后改改
return out
class Net(nn.Module):
def __init__(self, num_channels=1):
super(Net, self).__init__()
# self.conv1 = nn.Conv2d(num_channels, 40, 3, 1)
# self.conv2 = nn.Conv2d(40, 40, 3, 1, groups=20)
# self.fc = nn.Linear(5*5*40, 10)
def forward(self, x):
x = F.relu(self.conv1(x))
x = F.max_pool2d(x, 2, 2)
x = F.relu(self.conv2(x))
x = F.max_pool2d(x, 2, 2)
x = x.view(-1, 5*5*40) # 重新定义矩阵的形状
x = self.fc(x)
return x
# 对模型进行初步的量化 (此处还没开始训练量化模型) 对于量化参数的确定是一个预先的过程(之后对量化模型的训练是fine tune)
def quantize(self, num_bits=8):
# 这里仅第一个qi=True,因为在forward的时候除了pool和relu外,每层最后都会根据qo调整一下x,完成量化再恢复的工作,所以x实际上是保持着最新版本的量化再恢复,只有weight需要在各个层不断调整量化再恢复的情况。
self.qconv1 = QConv2d(self.conv1, qi=True, qo=True, num_bits=num_bits)
self.qrelu1 = QReLU()
self.qmaxpool2d_1 = QMaxPooling2d(kernel_size=2, stride=2, padding=0)
self.qconv2 = QConv2d(self.conv2, qi=False, qo=True, num_bits=num_bits) #qi=False的含义是无需在这层换量化参数scale,zeropoint
self.qrelu2 = QReLU()
self.qmaxpool2d_2 = QMaxPooling2d(kernel_size=2, stride=2, padding=0)
self.qfc = QLinear(self.fc, qi=False, qo=True, num_bits=num_bits)
# 训练量化模型时的forward函数
def quantize_forward(self, x):
x = self.qconv1(x)
x = self.qrelu1(x)
x = self.qmaxpool2d_1(x)
x = self.qconv2(x)
x = self.qrelu2(x)
x = self.qmaxpool2d_2(x)
x = x.view(-1, 5*5*40)
x = self.qfc(x)
return x
# 量化模型训练完之后把参数固定住 目的是规定当前层在推断的时候使用什么样的量化参数,量化参数决定了映射,量化后恢复的结果
# 考虑各个层最后对qo的更新,maxpool,relu,drop不会更新qo,conv会,因此只有出现conv后需要改下Qpram,其他的继承下去就行了(层与层输出与输入相连的x自带了这种继承关系,而Qpram的需要看情况(即,x是否可能有max,min范围的突破)来决定是否要更新)
def freeze(self):
self.qconv1.freeze()
self.qrelu1.freeze(self.qconv1.qo) # 就是作为qi 带conv的层后面需要用新的 (qo总是会在训练过程被更新的,因为有不一样的x和模型参数,是在Q... layer的forward过程中不断更新的,min,max是一个全局的统计效果,考虑到是fine tunning 一开始的minmax也不会太离谱)
self.qmaxpool2d_1.freeze(self.qconv1.qo)
self.qconv2.freeze(qi=self.qconv1.qo)
self.qrelu2.freeze(self.qconv2.qo) # relu和maxpool对Qpram不具备改变能力(因为min,max的统计是全局性质的,relu和poolmax对min和max都没有影响)
self.qmaxpool2d_2.freeze(self.qconv2.qo)
self.qfc.freeze(qi=self.qconv2.qo)
# 固定住量化模型参数后的推理 FP32入 过程中量化 FP32出
def quantize_inference(self, x):
qx = self.qconv1.qi.quantize_tensor(x)
qx = self.qconv1.quantize_inference(qx)
qx = self.qrelu1.quantize_inference(qx)
qx = self.qmaxpool2d_1.quantize_inference(qx)
qx = self.qconv2.quantize_inference(qx)
qx = self.qrelu2.quantize_inference(qx)
qx = self.qmaxpool2d_2.quantize_inference(qx)
qx = qx.view(-1, 5*5*40)
qx = self.qfc.quantize_inference(qx)
out = self.qfc.qo.dequantize_tensor(qx)
return out
class NetBN(nn.Module):
def __init__(self, num_channels=1):
super(NetBN, self).__init__()
self.conv1 = nn.Conv2d(num_channels, 40, 3, 1)
self.bn1 = nn.BatchNorm2d(40)
self.conv2 = nn.Conv2d(40, 40, 3, 1)
self.bn2 = nn.BatchNorm2d(40)
self.fc = nn.Linear(5 * 5 * 40, 10)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = F.relu(x)
x = F.max_pool2d(x, 2, 2)
x = self.conv2(x)
x = self.bn2(x)
x = F.relu(x)
x = F.max_pool2d(x, 2, 2)
x = x.view(-1, 5 * 5 * 40)
x = self.fc(x)
return x
def quantize(self, num_bits=8):
self.qconv1 = QConvBNReLU(self.conv1, self.bn1, qi=True, qo=True, num_bits=num_bits)
self.qmaxpool2d_1 = QMaxPooling2d(kernel_size=2, stride=2, padding=0)
self.qconv2 = QConvBNReLU(self.conv2, self.bn2, qi=False, qo=True, num_bits=num_bits)
self.qmaxpool2d_2 = QMaxPooling2d(kernel_size=2, stride=2, padding=0)
self.qfc = QLinear(self.fc, qi=False, qo=True, num_bits=num_bits)
def quantize_forward(self, x):
x = self.qconv1(x)
x = self.qmaxpool2d_1(x)
x = self.qconv2(x)
x = self.qmaxpool2d_2(x)
x = x.view(-1, 5*5*40)
x = self.qfc(x)
return x
def freeze(self):
self.qconv1.freeze()
self.qmaxpool2d_1.freeze(self.qconv1.qo)
self.qconv2.freeze(qi=self.qconv1.qo) # 因为maxpool不会改变min,max
self.qmaxpool2d_2.freeze(self.qconv2.qo)
self.qfc.freeze(qi=self.qconv2.qo) # 因为maxpool不会改变min,max
def quantize_inference(self, x):
qx = self.qconv1.qi.quantize_tensor(x)
qx = self.qconv1.quantize_inference(qx)
qx = self.qmaxpool2d_1.quantize_inference(qx)
qx = self.qconv2.quantize_inference(qx)
qx = self.qmaxpool2d_2.quantize_inference(qx)
qx = qx.view(-1, 5*5*40)
qx = self.qfc.quantize_inference(qx)
out = self.qfc.qo.dequantize_tensor(qx) # INT -> FP
return out
# -*- coding: utf-8 -*-
import math
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import math
from function import FakeQuantize
def quantize_adaptivfloat(float_arr, n_bits=8, n_exp=4):
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
n_mant = n_bits - 1 - n_exp
# 1. store sign value and do the following part as unsigned value
sign = torch.sign(float_arr).cpu()
float_arr = torch.abs(float_arr)
# float_arr = float_arr.cpu().numpy()
float_arr = float_arr.detach().cpu().numpy()
Emin = -2.**(n_exp-1)+1
Emax = 2.**(n_exp-1)
min_e = 2.**(Emin)
max_e = 2.**(Emax)
min_value = min_e
max_value = max_e * (2-2.**(-n_mant))
# Non denormal part
float_arr[float_arr < min_value] = 0
## 2.2. reduce too large values to max value of output format
float_arr[float_arr > max_value] = max_value
# 3. get mant, exp (the format is different from IEEE float)
# mant, exp = torch.frexp(float_arr)
mant, exp = np.frexp(float_arr) # 若是0,则mant是0,后续的float_out也就是0了
mant = torch.tensor(mant)
exp = torch.tensor(exp)
# 3.1 change mant, and exp format to IEEE float format
# no effect for exponent of 0 outputs
mant = 2 * mant
exp = exp - 1
exp = exp.clamp(Emin, Emax) # 防止上下溢出
power_exp = torch.exp2(exp)
## 4. quantize mantissa
scale = 2 ** (-n_mant) ## e.g. 2 bit, scale = 0.25
mant = ((mant / scale).floor()) * scale # 舍掉了无法达到的精度的尾数
float_out = sign * power_exp * mant
float_out = float_out.to(device)
return float_out
def build_power_value(num_bits=8):
base_a = [0.]
for i in range(2 ** num_bits - 1): # 从+ -(1) 到 + -(2 ** B - 1)
base_a.append(2 ** (-i - 1))
values = []
for a in base_a:
values.append(a)
values = torch.Tensor(list(set(values)))
values = values.mul(1.0 / torch.max(values)) # max是1吧,相当于没除
return values
def apot_quantization(tensor, alpha, proj_set): # alpha 可以是 scale
def power_quant(x, value_s):
shape = x.shape
xhard = x.view(-1) # 展平
sign = x.sign() # 应该是一个向量吧
value_s = value_s.type_as(x) # value_s 就是 proj_set
xhard = xhard.abs()
idxs = (xhard.unsqueeze(0) - value_s.unsqueeze(1)).abs().min(dim=0)[1]
xhard = value_s[idxs].view(shape).mul(sign) # 还原形状和符号
xhard = xhard
# xout的值与xhard相等,这里可能就是为了将数据从可求梯度的状态取出来
# 简单来说,上面的代码就是去把x的值映射到了离他们最近的quantization point上了
xout = (xhard - x).detach() + x
return xout
data = tensor / alpha # 相当于归一一下,α是系数 (可以不是min,max,由scale去定呗)
data = data.clamp(-1, 1) # 先clip
data_q = power_quant(data, proj_set) # 再映射
data_q = data_q * alpha # 再乘系数
return data_q
def calcScaleZeroPoint(min_val, max_val, num_bits=8, mode=1):
# 这里是0~127 uint
scale = torch.tensor(0)
zero_point = torch.tensor(0)
if mode == 1 :
qmin = 0.
qmax = 2. ** num_bits - 1.
scale = (max_val - min_val) / (qmax - qmin)
zero_point = qmax - max_val / scale
if zero_point < qmin:
zero_point = torch.tensor([qmin], dtype=torch.float32).to(min_val.device)
elif zero_point > qmax:
zero_point = torch.tensor([qmax], dtype=torch.float32).to(max_val.device)
zero_point.round_() # 截断
# 主要是mode=2用, mode=3其实不用
elif mode == 2 or mode == 3:
# print('BEFORE')
scale = max_val.abs() if max_val.abs()>min_val.abs() else min_val.abs() # 直接找了个最大值
# print(scale)
# print(zero_point)
# print('AFTER')
return scale, zero_point
def quantize_tensor(x, scale, zero_point, num_bits=8, signed=False, n_exp=4 , mode=1):
if mode == 1:
if signed:
qmin = - 2. ** (num_bits - 1)
qmax = 2. ** (num_bits - 1) - 1
else:
qmin = 0.
qmax = 2. ** num_bits - 1.
q_x = zero_point + x / scale
q_x.clamp_(qmin, qmax).round_() # 将q_x限制在[qmin,qimax],并rounding
elif mode == 2:
# 待补充
proj_set = build_power_value(num_bits)
q_x = apot_quantization(x, scale, proj_set)
elif mode == 3:
q_x = quantize_adaptivfloat(float_arr=x, n_bits=num_bits, n_exp=n_exp) # E=4 或 5
return q_x
def dequantize_tensor(q_x, scale, zero_point, mode):
if mode == 1:
return scale * (q_x - zero_point)
elif mode == 2 or mode == 3:
# 待补充
return q_x # 对于mode2,3 quantize的时候实际上就dequantize过了,实际范围是没有大变化的
# def search(M):
# P = 7000
# n = 1
# while True:
# Mo = int(round(2 ** n * M))
# # Mo
# approx_result = Mo * P >> n
# result = int(round(M * P))
# error = approx_result - result
#
# print("n=%d, Mo=%f, approx=%d, result=%d, error=%f" % \
# (n, Mo, approx_result, result, error))
#
# if math.fabs(error) < 1e-9 or n >= 22:
# return Mo, n
# n += 1
# quantize parameter 伪量化层
class QParam(nn.Module):
# mode = 1: INT, mode = 2 : PoT, mode = 3: FP
def __init__(self, num_bits=8 ,n_exp=4, mode=1):
super(QParam, self).__init__()
self.num_bits = num_bits
self.mode = mode
self.n_exp = n_exp
# 在训练时不更新梯度 并且通过register_buffer保留下来
scale = torch.tensor([], requires_grad=False)
zero_point = torch.tensor([], requires_grad=False)
min = torch.tensor([], requires_grad=False)
max = torch.tensor([], requires_grad=False)
# 张量会保存在model.state_dict()中,也就可以随着模型一起通过.cuda()复制到gpu上
self.register_buffer('scale', scale)
self.register_buffer('zero_point', zero_point)
self.register_buffer('min', min)
self.register_buffer('max', max)
# 在记录rmax,rmin
def update(self, tensor):
if self.max.nelement() == 0 or self.max.data < tensor.max().data:
self.max.data = tensor.max().data
self.max.clamp_(min=0) # 限制max>=0 (此限制相当于一种限定了范围的quantized reflection)
if self.min.nelement() == 0 or self.min.data > tensor.min().data:
self.min.data = tensor.min().data
self.min.clamp_(max=0) # 限制min<=0 (此限制相当于一种限定了范围的quantized reflection)
# 更新量化参数
self.scale, self.zero_point = calcScaleZeroPoint(self.min, self.max, self.num_bits, self.mode)
# tensor量化
def quantize_tensor(self, tensor, mode):
return quantize_tensor(x=tensor, scale=self.scale, zero_point=self.zero_point, num_bits=self.num_bits, n_exp=self.n_exp, mode=self.mode)
# tensor还原
def dequantize_tensor(self, q_x, mode):
return dequantize_tensor(q_x=q_x, scale=self.scale, zero_point=self.zero_point, mode=self.mode)
# 从state_dict中恢复模型参数 这里应该是重构了
def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys,
error_msgs):
key_names = ['scale', 'zero_point', 'min', 'max']
for key in key_names:
value = getattr(self, key)
value.data = state_dict[prefix + key].data
state_dict.pop(prefix + key)
# 当使用print输出对象的时候,只要自己定义了__str__(self)方法,那么就会打印从在这个方法中return的数据
def __str__(self):
info = 'scale: %.10f ' % self.scale
info += 'zp: %d ' % self.zero_point
info += 'min: %.6f ' % self.min
info += 'max: %.6f' % self.max
return info
# 伪量化层
class QModule(nn.Module):
def __init__(self, qi=True, qo=True, num_bits=8, n_exp=4, mode=1):
super(QModule, self).__init__()
if qi:
self.qi = QParam(num_bits=num_bits, n_exp=n_exp, mode = mode) # qi在此处就已经被num_bits和mode赋值了
if qo:
self.qo = QParam(num_bits=num_bits, n_exp=n_exp, mode = mode) # qo在此处就已经被num_bits和mode赋值了
def freeze(self):
pass
def fakefreeze(self):
pass
def quantize_inference(self, x):
raise NotImplementedError('quantize_inference should be implemented.')
class QConv2d(QModule):
def __init__(self, conv_module, qi=True, qo=True, num_bits=8, n_exp=4, mode=1): # 此处是为了给内蕴的QModule(i.e. qi,qo)赋值mode
super(QConv2d, self).__init__(qi=qi, qo=qo, num_bits=num_bits, n_exp=n_exp,mode=mode)
self.num_bits = num_bits
self.conv_module = conv_module
self.qw = QParam(num_bits=num_bits, n_exp=n_exp, mode=mode) # 这里是引入一个伪量化层
self.qb = QParam(num_bits=num_bits, n_exp=n_exp, mode=mode)
self.mode = mode #方便层内使用
self.n_exp = n_exp
# 新建qb
self.register_buffer('M', torch.tensor([], requires_grad=False)) # 将M注册为buffer
# freeze是即将要保存并推断了,所有的量化参数、量化量都在做最后一波更新
def freeze(self, qi=None, qo=None):
if hasattr(self, 'qi') and qi is not None:
raise ValueError('qi has been provided in init function.')
if not hasattr(self, 'qi') and qi is None:
raise ValueError('qi is not existed, should be provided.')
if hasattr(self, 'qo') and qo is not None:
raise ValueError('qo has been provided in init function.')
if not hasattr(self, 'qo') and qo is None:
raise ValueError('qo is not existed, should be provided.')
if qi is not None: # 有输入qi,可以给self.qi赋值
self.qi = qi
if qo is not None: # 有输入qo,可以给self.qo赋值
self.qo = qo
if self.mode == 1:
self.M.data = (self.qw.scale * self.qi.scale / self.qo.scale).data
# 量化 weight 且weight实际上是可以直接用于相乘的 (已 -zeropoint)用于finetune后准备量化推理了
self.conv_module.weight.data = self.qw.quantize_tensor(self.conv_module.weight.data, self.mode)
self.conv_module.weight.data = self.conv_module.weight.data - self.qw.zero_point
# 量化 bias
# bias的num_bits是否也应该受设备量化位宽限制
self.conv_module.bias.data = quantize_tensor(self.conv_module.bias.data,
scale=self.qi.scale * self.qw.scale,
zero_point=0, num_bits=self.num_bits, signed=True, n_exp=self.n_exp, mode=self.mode)
elif self.mode == 2 or self.mode == 3:
# 量化 weight 且weight实际上是可以直接用于相乘的 (已 -zeropoint)用于finetune后准备量化推理了
self.conv_module.weight.data = self.qw.quantize_tensor(self.conv_module.weight.data, self.mode)
# 量化 bias
# bias的num_bits是否也应该受设备量化位宽限制
self.conv_module.bias.data = quantize_tensor(self.conv_module.bias.data,
scale=self.qb.scale,
zero_point=0, num_bits=self.num_bits, signed=True, n_exp=self.n_exp, mode=self.mode)
def fakefreeze(self, qi=None, qo=None):
if hasattr(self, 'qi') and qi is not None:
raise ValueError('qi has been provided in init function.')
if not hasattr(self, 'qi') and qi is None:
raise ValueError('qi is not existed, should be provided.')
if hasattr(self, 'qo') and qo is not None:
raise ValueError('qo has been provided in init function.')
if not hasattr(self, 'qo') and qo is None:
raise ValueError('qo is not existed, should be provided.')
if qi is not None: # 有输入qi,可以给self.qi赋值
self.qi = qi
if qo is not None: # 有输入qo,可以给self.qo赋值
self.qo = qo
if self.mode == 1:
self.M.data = (self.qw.scale * self.qi.scale / self.qo.scale).data
# fake quantization weight
self.conv_module.weight.data = self.qw.quantize_tensor(self.conv_module.weight.data, self.mode)
self.conv_module.weight.data = self.qw.dequantize_tensor(self.conv_module.weight.data, self.mode)
# fake quantization bias
self.conv_module.bias.data = quantize_tensor(self.conv_module.bias.data,
scale=self.qi.scale * self.qw.scale,
zero_point=0, num_bits=self.num_bits, signed=True, n_exp=self.n_exp, mode=self.mode)
self.conv_module.bias.data = dequantize_tensor(self.conv_module.bias.data,
scale=self.qi.scale * self.qw.scale,
zero_point=0, mode=self.mode)
elif self.mode == 2 or self.mode == 3:
# fake quantization weight
self.conv_module.weight.data = self.qw.quantize_tensor(self.conv_module.weight.data, self.mode)
self.conv_module.weight.data = self.qw.dequantize_tensor(self.conv_module.weight.data, self.mode)
# fake quantization bias
self.conv_module.bias.data = quantize_tensor(self.conv_module.bias.data,
scale=self.qb.scale,
zero_point=0, num_bits=self.num_bits, signed=True, n_exp=self.n_exp, mode=self.mode)
self.conv_module.bias.data = dequantize_tensor(self.conv_module.bias.data,
scale=self.qb.scale,
zero_point=0, mode=self.mode)
# FakeQuantize.apply是量化再恢复,forward中的各种参数都是量化再恢复后的数据,基本还在原fp范围内
def forward(self, x):
# 在forward中会更新qi (但并不是计算后的数据,而是计算前的x的情况,这也是符合qi input的含义的)
# 若input时令qi=FALSE且为训练时用forward的时候,下面判断attr qi是为了确认下是否要先算下本层对应的qi,pool和relu都不需要,因为与继承conv来的没有区别,x在conv最后已经要么不更新qo,要么更新qo后对x进行重新量化修改了。
if hasattr(self, 'qi'):
# qi 在init时就被定了mode
self.qi.update(x) # qi中包含了伪量化层的参数、方法
x = FakeQuantize.apply(x, self.qi) # forward: FP->INT->FP (qi: input的量化) 量化再恢复
# 每次forward前会update一下qw先,保证下面运算的时候用的正确的scale等去量化weight
self.qw.update(self.conv_module.weight.data)
self.qb.update(self.conv_module.bias.data)
# conv2d(input: Tensor, weight: Tensor, bias: Optional[Tensor]=None, stride: Union[_int, _size]=1, padding: Union[_int, _size]=0, dilation: Union[_int, _size]=1, groups: _int=1) -> Tensor: ...
# x = F.conv2d(x, FakeQuantize.apply(self.conv_module.weight, self.qw), self.conv_module.bias,
# stride=self.conv_module.stride,
# padding=self.conv_module.padding, dilation=self.conv_module.dilation,
# groups=self.conv_module.groups)
x = F.conv2d(x, FakeQuantize.apply(self.conv_module.weight, self.qw),
FakeQuantize.apply(self.conv_module.bias, self.qb),
stride=self.conv_module.stride,
padding=self.conv_module.padding, dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
if hasattr(self, 'qo'):
self.qo.update(x)
x = FakeQuantize.apply(x, self.qo) # output量化再恢复
return x
def quantize_inference(self, x): # 量化后的input x
if self.mode == 1:
x = x - self.qi.zero_point
x = self.conv_module(x) # forward 此处的conv_module的权重参数是在上面freeze量化过的
x = self.M * x # 量化计算过程
# 处理一下刚刚由layer的forward计算完得到的fp32数据
x.round_()
x = x + self.qo.zero_point
x.clamp_(0., 2. ** self.num_bits - 1.).round_() # 截断范围
return x
elif self.mode == 2 or self.mode==3:
x = self.conv_module(x) # forward 此处的conv_module的权重参数是在上面freeze量化过的
# 将计算结果再用PoT重新表示
x = FakeQuantize.apply(x, self.qo) # 首先qo根据forward后的x update过,根据self.qo去quantize+dequantize的话,能得到PoT量化后的结果
return x
# x.round_()
# x.clamp_(0., 2. ** self.num_bits - 1.).round_() # 截断范围
class QBN(QModule):
def __init__(self, bn_module, qi=True, qo=True, num_bits=8, n_exp=4, mode=1): # 此处是为了给内蕴的QModule(i.e. qi,qo)赋值mode
super(QBN, self).__init__(qi=qi, qo=qo, num_bits=num_bits, n_exp=n_exp,mode=mode)
self.num_bits = num_bits
self.bn_module = bn_module
self.qw = QParam(num_bits=num_bits, n_exp=n_exp, mode=mode) # 这里是引入一个伪量化层
self.qb = QParam(num_bits=num_bits, n_exp=n_exp, mode=mode)
self.mode = mode #方便层内使用
self.n_exp = n_exp
# 新建qb
self.register_buffer('M', torch.tensor([], requires_grad=False)) # 将M注册为buffer
# freeze是即将要保存并推断了,所有的量化参数、量化量都在做最后一波更新
def freeze(self, qi=None, qo=None):
if hasattr(self, 'qi') and qi is not None:
raise ValueError('qi has been provided in init function.')
if not hasattr(self, 'qi') and qi is None:
raise ValueError('qi is not existed, should be provided.')
if hasattr(self, 'qo') and qo is not None:
raise ValueError('qo has been provided in init function.')
if not hasattr(self, 'qo') and qo is None:
raise ValueError('qo is not existed, should be provided.')
if qi is not None: # 有输入qi,可以给self.qi赋值
self.qi = qi
if qo is not None: # 有输入qo,可以给self.qo赋值
self.qo = qo
if self.mode == 1:
# self.M.data = (self.qw.scale * self.qi.scale / self.qo.scale).data
# 量化 weight 且weight实际上是可以直接用于相乘的 (已 -zeropoint)用于finetune后准备量化推理了
# self.bn_module.weight.data = self.qw.quantize_tensor(self.bn_module.weight.data, self.mode)
# self.bn_module.weight.data = self.bn_module.weight.data - self.qw.zero_point
self.bn_module.weight.data = FakeQuantize.apply(self.bn_module.weight, self.qw)
# 量化 bias
# bias的num_bits是否也应该受设备量化位宽限制
# self.bn_module.bias.data = quantize_tensor(self.bn_module.bias.data,
# scale=self.qi.scale * self.qw.scale,
# zero_point=0, num_bits=self.num_bits, signed=True, n_exp=self.n_exp, mode=self.mode)
self.bn_module.bias.data = FakeQuantize.apply(self.bn_module.bias,self.qb)
elif self.mode == 2 or self.mode == 3:
# 量化 weight 且weight实际上是可以直接用于相乘的 (已 -zeropoint)用于finetune后准备量化推理了
self.bn_module.weight.data = self.qw.quantize_tensor(self.bn_module.weight.data, self.mode)
# 量化 bias
# bias的num_bits是否也应该受设备量化位宽限制
self.bn_module.bias.data = quantize_tensor(self.bn_module.bias.data,
scale=self.qb.scale,
zero_point=0, num_bits=self.num_bits, signed=True, n_exp=self.n_exp, mode=self.mode)
def fakefreeze(self, qi=None, qo=None):
if hasattr(self, 'qi') and qi is not None:
raise ValueError('qi has been provided in init function.')
if not hasattr(self, 'qi') and qi is None:
raise ValueError('qi is not existed, should be provided.')
if hasattr(self, 'qo') and qo is not None:
raise ValueError('qo has been provided in init function.')
if not hasattr(self, 'qo') and qo is None:
raise ValueError('qo is not existed, should be provided.')
if qi is not None: # 有输入qi,可以给self.qi赋值
self.qi = qi
if qo is not None: # 有输入qo,可以给self.qo赋值
self.qo = qo
if self.mode == 1:
# fake quantization weight
self.bn_module.weight.data = self.qw.quantize_tensor(self.bn_module.weight.data, self.mode)
self.bn_module.weight.data = self.qw.dequantize_tensor(self.bn_module.weight.data, self.mode)
# fake quantization bias
self.bn_module.bias.data = quantize_tensor(self.bn_module.bias.data,
scale=self.qi.scale * self.qw.scale,
zero_point=0, num_bits=self.num_bits, signed=True, n_exp=self.n_exp, mode=self.mode)
self.bn_module.bias.data = dequantize_tensor(self.bn_module.bias.data,
scale=self.qi.scale * self.qw.scale,
zero_point=0, mode=self.mode)
elif self.mode == 2 or self.mode == 3:
# fake quantization weight
self.bn_module.weight.data = self.qw.quantize_tensor(self.bn_module.weight.data, self.mode)
self.bn_module.weight.data = self.qw.dequantize_tensor(self.bn_module.weight.data, self.mode)
# fake quantization bias
self.bn_module.bias.data = quantize_tensor(self.bn_module.bias.data,
scale=self.qb.scale,
zero_point=0, num_bits=self.num_bits, signed=True, n_exp=self.n_exp, mode=self.mode)
self.bn_module.bias.data = dequantize_tensor(self.bn_module.bias.data,
scale=self.qb.scale,
zero_point=0, mode=self.mode)
# FakeQuantize.apply是量化再恢复,forward中的各种参数都是量化再恢复后的数据,基本还在原fp范围内
def forward(self, x):
if hasattr(self, 'qi'):
# qi 在init时就被定了mode
self.qi.update(x) # qi中包含了伪量化层的参数、方法
x = FakeQuantize.apply(x, self.qi) # forward: FP->INT->FP (qi: input的量化) 量化再恢复
self.qw.update(self.bn_module.weight.data)
self.qb.update(self.bn_module.bias.data)
bn_q= torch.nn.BatchNorm2d(num_features=self.bn_module.num_features, affine=self.bn_module.affine, eps=self.bn_module.eps,momentum=self.bn_module.momentum, track_running_stats=self.bn_module.track_running_stats)
bn_q.weight.data = FakeQuantize.apply(self.bn_module.weight, self.qw)
bn_q.bias.data = FakeQuantize.apply(self.bn_module.bias, self.qb)
bn_q.running_mean.data = self.bn_module.running_mean
bn_q.running_var.data = self.bn_module.running_var
x=bn_q(x)
# x = self.bn_module(x)
if hasattr(self, 'qo'):
self.qo.update(x)
x = FakeQuantize.apply(x, self.qo) # output量化再恢复
return x
def quantize_inference(self, x): # 量化后的input x
if self.mode == 1:
x = dequantize_tensor(x,scale=self.qi.scale,zero_point=self.qi.zero_point, mode=self.mode)
x = self.bn_module(x) # forward 此处的conv_module的权重参数是在上面freeze量化过的
x = quantize_tensor(x,scale=self.qo.scale,zero_point=self.qo.zero_point,mode=self.mode)
# x.round_()
# x = x + self.qo.zero_point
# x.clamp_(0., 2. ** self.num_bits - 1.).round_() # 截断范围
return x
elif self.mode == 2 or self.mode==3:
x = self.bn_module(x) # forward 此处的conv_module的权重参数是在上面freeze量化过的
# 将计算结果再用PoT重新表示
x = FakeQuantize.apply(x, self.qo) # 首先qo根据forward后的x update过,根据self.qo去quantize+dequantize的话,能得到PoT量化后的结果
return x
# x.round_()
# x.clamp_(0., 2. ** self.num_bits - 1.).round_() # 截断范围
class QLinear(QModule):
def __init__(self, fc_module, qi=True, qo=True, num_bits=8, n_exp=4, mode=1):
super(QLinear, self).__init__(qi=qi, qo=qo, num_bits=num_bits, n_exp=n_exp, mode=mode)
self.num_bits = num_bits
self.fc_module = fc_module
self.qw = QParam(num_bits=num_bits, n_exp=n_exp, mode=mode)
self.qb = QParam(num_bits=num_bits, n_exp=n_exp, mode=mode)
self.register_buffer('M', torch.tensor([], requires_grad=False)) # 将M注册为buffer
self.mode = mode
self.n_exp = n_exp
def freeze(self, qi=None, qo=None):
if hasattr(self, 'qi') and qi is not None:
raise ValueError('qi has been provided in init function.')
if not hasattr(self, 'qi') and qi is None:
raise ValueError('qi is not existed, should be provided.')
if hasattr(self, 'qo') and qo is not None:
raise ValueError('qo has been provided in init function.')
if not hasattr(self, 'qo') and qo is None:
raise ValueError('qo is not existed, should be provided.')
if qi is not None:
self.qi = qi
if qo is not None:
self.qo = qo
if self.mode == 1:
self.M.data = (self.qw.scale * self.qi.scale / self.qo.scale).data
# quantize后的weight 需要存储 self.qw.quantize_tensor(self.fc_module.weight.data) 和 quantize_tensor(self.fc_module.bias.data, scale=self.qi.scale * self.qw.scale,
# zero_point=0, num_bits=32, signed=True)
self.fc_module.weight.data = self.qw.quantize_tensor(self.fc_module.weight.data, self.mode)
self.fc_module.weight.data = self.fc_module.weight.data - self.qw.zero_point
self.fc_module.bias.data = quantize_tensor(self.fc_module.bias.data, scale=self.qi.scale * self.qw.scale,
zero_point=0, num_bits=self.num_bits, signed=True, n_exp=self.n_exp, mode=self.mode)
# 这里的num_bits需要随着输入的num_bits修改吗 这里是用了qi.scale*qw.scale代替qb.scale,有一定估算成分,误差可忽略
elif self.mode == 2 or self.mode==3:
self.fc_module.weight.data = self.qw.quantize_tensor(self.fc_module.weight.data, self.mode)
self.fc_module.bias.data = quantize_tensor(self.fc_module.bias.data, scale=self.qb.scale,
zero_point=0, num_bits=self.num_bits, signed=True, n_exp=self.n_exp, mode=self.mode)
def fakefreeze(self, qi=None, qo=None):
if hasattr(self, 'qi') and qi is not None:
raise ValueError('qi has been provided in init function.')
if not hasattr(self, 'qi') and qi is None:
raise ValueError('qi is not existed, should be provided.')
if hasattr(self, 'qo') and qo is not None:
raise ValueError('qo has been provided in init function.')
if not hasattr(self, 'qo') and qo is None:
raise ValueError('qo is not existed, should be provided.')
if qi is not None:
self.qi = qi
if qo is not None:
self.qo = qo
if self.mode == 1:
self.M.data = (self.qw.scale * self.qi.scale / self.qo.scale).data
# quantize后的weight 需要存储 self.qw.quantize_tensor(self.fc_module.weight.data) 和 quantize_tensor(self.fc_module.bias.data, scale=self.qi.scale * self.qw.scale,
# zero_point=0, num_bits=32, signed=True)
self.fc_module.weight.data = self.qw.quantize_tensor(self.fc_module.weight.data, self.mode)
self.fc_module.weight.data = self.qw.dequantize_tensor(self.fc_module.weight.data, self.mode)
self.fc_module.bias.data = quantize_tensor(self.fc_module.bias.data, scale=self.qi.scale * self.qw.scale,
zero_point=0, num_bits=self.num_bits, signed=True, n_exp=self.n_exp, mode=self.mode)
self.fc_module.bias.data = dequantize_tensor(self.fc_module.bias.data, scale=self.qi.scale * self.qw.scale,
zero_point=0, mode=self.mode)
elif self.mode == 2 or self.mode==3:
self.fc_module.weight.data = self.qw.quantize_tensor(self.fc_module.weight.data, self.mode)
self.fc_module.weight.data = self.qw.dequantize_tensor(self.fc_module.weight.data, self.mode)
self.fc_module.bias.data = quantize_tensor(self.fc_module.bias.data, scale=self.qb.scale,
zero_point=0, num_bits=self.num_bits, signed=True, n_exp=self.n_exp, mode=self.mode)
self.fc_module.bias.data = dequantize_tensor(self.fc_module.bias.data, scale=self.qb.scale,
zero_point=0, mode=self.mode)
def forward(self, x):
if hasattr(self, 'qi'):
self.qi.update(x)
x = FakeQuantize.apply(x, self.qi)
self.qw.update(self.fc_module.weight.data)
self.qb.update(self.fc_module.bias.data)
# 权重需要经过伪量化层量化 这里bias没量化 这里可改 (训练过程用到,inference也用到了)
# x = F.linear(x, FakeQuantize.apply(self.fc_module.weight, self.qw), self.fc_module.bias)
x = F.linear(x, FakeQuantize.apply(self.fc_module.weight, self.qw),
FakeQuantize.apply(self.fc_module.bias, self.qb))
if hasattr(self, 'qo'):
self.qo.update(x)
x = FakeQuantize.apply(x, self.qo)
return x
# quantize_inference就是用的量化后的数据和参数了 由整个网络第一层进行FP32->INT 最后一层进行INT->FP32
def quantize_inference(self, x):
if self.mode == 1:
x = x - self.qi.zero_point
x = self.fc_module(x)
x = self.M * x
x.round_()
x = x + self.qo.zero_point
x.clamp_(0., 2. ** self.num_bits - 1.).round_()
return x
elif self.mode == 2 or self.mode == 3:
x = self.fc_module(x)
x = FakeQuantize.apply(x, self.qo) # 将计算结果再用PoT重新表示
return x
#需要加入qi 对于resnet,并不是所有的qrelu都不需要统计qi (对于残差结构,需要做elementwise add,则不能直接通过使用前面某一层的qo作为该层的qi) / 但qo可以不加,与qi没有太大区别
#需要加入qi,qo 对于resnet,并不是所有的qrelu都不需要统计qi,qo
class QReLU(QModule):
def __init__(self, qi=False, num_bits=None, n_exp=4, mode=1):
super(QReLU, self).__init__(qi=qi, num_bits=num_bits, n_exp=n_exp, mode=mode)
self.mode = mode
self.n_exp = n_exp
# 要保存最终的量化参数了
def freeze(self, qi=None):
if hasattr(self, 'qi') and qi is not None:
raise ValueError('qi has been provided in init function.')
if not hasattr(self, 'qi') and qi is None:
raise ValueError('qi is not existed, should be provided.')
# 若非none,则是接受外部给的值,否则仍使用自己统计的qi
if qi is not None:
self.qi = qi
def fakefreeze(self, qi=None):
if hasattr(self, 'qi') and qi is not None:
raise ValueError('qi has been provided in init function.')
if not hasattr(self, 'qi') and qi is None:
raise ValueError('qi is not existed, should be provided.')
if qi is not None:
self.qi = qi
def forward(self, x):
if hasattr(self, 'qi'):
self.qi.update(x) # 此处更新了scale等
x = FakeQuantize.apply(x, self.qi) # 此处按照qi的scale,对x进行PoT表示
x = F.relu(x)
return x
def quantize_inference(self, x):
x = x.clone()
# print('before!!!')
# print(x)
# print('==')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
a = self.qi.zero_point.float().to(device)
x[x < a] = a
# print(x)
return x
class QMaxPooling2d(QModule):
def __init__(self, kernel_size=3, stride=1, padding=0, qi=False, num_bits=None, n_exp=4, mode=1):
super(QMaxPooling2d, self).__init__(qi=qi, num_bits=num_bits, n_exp=n_exp, mode=mode)
self.kernel_size = kernel_size
self.stride = stride
self.padding = padding
self.mode = mode
self.n_exp = n_exp
def freeze(self, qi=None):
if hasattr(self, 'qi') and qi is not None:
raise ValueError('qi has been provided in init function.')
if not hasattr(self, 'qi') and qi is None:
raise ValueError('qi is not existed, should be provided.')
if qi is not None:
self.qi = qi
def fakefreeze(self, qi=None):
if hasattr(self, 'qi') and qi is not None:
raise ValueError('qi has been provided in init function.')
if not hasattr(self, 'qi') and qi is None:
raise ValueError('qi is not existed, should be provided.')
if qi is not None:
self.qi = qi
def forward(self, x):
if hasattr(self, 'qi'):
self.qi.update(x)
x = FakeQuantize.apply(x, self.qi) # 与ReLu一样,先更新qi的scale,再将x用PoT表示了 (不过一般前一层的qo都是True,则x已经被PoT表示了)
x = F.max_pool2d(x, self.kernel_size, self.stride, self.padding)
return x
def quantize_inference(self, x):
return F.max_pool2d(x, self.kernel_size, self.stride, self.padding)
class QConvBNReLU(QModule):
def __init__(self, conv_module, bn_module, qi=True, qo=True, num_bits=8, n_exp=4, mode=1):
super(QConvBNReLU, self).__init__(qi=qi, qo=qo, num_bits=num_bits, n_exp=n_exp, mode=mode)
self.num_bits = num_bits
self.conv_module = conv_module
self.bn_module = bn_module
self.qw = QParam(num_bits=num_bits, n_exp=n_exp, mode=mode)
self.qb = QParam(num_bits=num_bits, n_exp=n_exp, mode=mode)
self.register_buffer('M', torch.tensor([], requires_grad=False)) # 将M注册为buffer
self.mode = mode
self.n_exp = n_exp
def fold_bn(self, mean, std):
if self.bn_module.affine:
gamma_ = self.bn_module.weight / std
weight = self.conv_module.weight * gamma_.view(self.conv_module.out_channels, 1, 1, 1)
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean + self.bn_module.bias
else: # 如果conv_module.bias是None
bias = self.bn_module.bias - gamma_ * mean
else:
gamma_ = 1 / std
weight = self.conv_module.weight * gamma_
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean
else:
bias = -gamma_ * mean
return weight, bias
def forward(self, x):
if hasattr(self, 'qi'):
self.qi.update(x)
x = FakeQuantize.apply(x, self.qi)
if self.training:
y = F.conv2d(x, self.conv_module.weight, self.conv_module.bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding,
dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
y = y.permute(1, 0, 2, 3) # NCHW -> CNHW
y = y.contiguous().view(self.conv_module.out_channels, -1) # CNHW -> C,NHW
# mean = y.mean(1)
# var = y.var(1)
mean = y.mean(1).detach()
var = y.var(1).detach()
self.bn_module.running_mean = \
(1 - self.bn_module.momentum) * self.bn_module.running_mean + \
self.bn_module.momentum * mean
self.bn_module.running_var = \
(1 - self.bn_module.momentum) * self.bn_module.running_var + \
self.bn_module.momentum * var
else:
mean = Variable(self.bn_module.running_mean)
var = Variable(self.bn_module.running_var)
std = torch.sqrt(var + self.bn_module.eps)
weight, bias = self.fold_bn(mean, std)
self.qw.update(weight.data)
if self.conv_module.bias is not None:
self.qb.update(bias.data)
# x = F.conv2d(x, FakeQuantize.apply(weight, self.qw), bias,
# stride=self.conv_module.stride,
# padding=self.conv_module.padding, dilation=self.conv_module.dilation,
# groups=self.conv_module.groups)
if self.conv_module.bias is not None:
x = F.conv2d(x, FakeQuantize.apply(weight, self.qw), FakeQuantize.apply(bias, self.qb),
stride=self.conv_module.stride,
padding=self.conv_module.padding, dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
else:
x = F.conv2d(x, FakeQuantize.apply(weight, self.qw),
stride=self.conv_module.stride,
padding=self.conv_module.padding, dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
x = F.relu(x)
if hasattr(self, 'qo'):
self.qo.update(x)
x = FakeQuantize.apply(x, self.qo)
return x
def freeze(self, qi=None, qo=None):
if hasattr(self, 'qi') and qi is not None:
raise ValueError('qi has been provided in init function.')
if not hasattr(self, 'qi') and qi is None:
raise ValueError('qi is not existed, should be provided.')
if hasattr(self, 'qo') and qo is not None:
raise ValueError('qo has been provided in init function.')
if not hasattr(self, 'qo') and qo is None:
raise ValueError('qo is not existed, should be provided.')
if qi is not None:
self.qi = qi
if qo is not None:
self.qo = qo
if self.mode == 1:
self.M.data = (self.qw.scale * self.qi.scale / self.qo.scale).data
std = torch.sqrt(self.bn_module.running_var + self.bn_module.eps)
weight, bias = self.fold_bn(self.bn_module.running_mean, std)
self.conv_module.weight.data = self.qw.quantize_tensor(weight.data, self.mode)
self.conv_module.weight.data = self.conv_module.weight.data - self.qw.zero_point
if self.conv_module.bias is not None:
self.conv_module.bias.data = quantize_tensor(bias, scale=self.qi.scale * self.qw.scale,
zero_point=0, num_bits=self.num_bits, signed=True, n_exp=self.n_exp, mode=self.mode)
elif self.mode == 2 or self.mode == 3:
std = torch.sqrt(self.bn_module.running_var + self.bn_module.eps)
weight, bias = self.fold_bn(self.bn_module.running_mean, std)
self.conv_module.weight.data = self.qw.quantize_tensor(weight.data, self.mode)
if self.conv_module.bias is not None:
self.conv_module.bias.data = quantize_tensor(bias, scale=self.qb.scale,
zero_point=0, num_bits=self.num_bits, signed=True, n_exp=self.n_exp, mode=self.mode)
def fakefreeze(self, qi=None, qo=None):
if hasattr(self, 'qi') and qi is not None:
raise ValueError('qi has been provided in init function.')
if not hasattr(self, 'qi') and qi is None:
raise ValueError('qi is not existed, should be provided.')
if hasattr(self, 'qo') and qo is not None:
raise ValueError('qo has been provided in init function.')
if not hasattr(self, 'qo') and qo is None:
raise ValueError('qo is not existed, should be provided.')
if qi is not None:
self.qi = qi
if qo is not None:
self.qo = qo
if self.mode == 1:
self.M.data = (self.qw.scale * self.qi.scale / self.qo.scale).data
std = torch.sqrt(self.bn_module.running_var + self.bn_module.eps)
weight, bias = self.fold_bn(self.bn_module.running_mean, std)
self.conv_module.weight.data = self.qw.quantize_tensor(weight.data, self.mode)
self.conv_module.weight.data = self.qw.dequantize_tensor(self.conv_module.weight.data, self.mode)
if self.conv_module.bias is not None:
self.conv_module.bias.data = quantize_tensor(bias, scale=self.qi.scale * self.qw.scale,
zero_point=0, num_bits=self.num_bits, signed=True, n_exp=self.n_exp, mode=self.mode)
self.conv_module.bias.data = dequantize_tensor(self.conv_module.bias.data,
scale=self.qi.scale * self.qw.scale,
zero_point=0, mode=self.mode)
elif self.mode == 2 or self.mode == 3:
self.M.data = (self.qw.scale * self.qi.scale / self.qo.scale).data
std = torch.sqrt(self.bn_module.running_var + self.bn_module.eps)
weight, bias = self.fold_bn(self.bn_module.running_mean, std)
self.conv_module.weight.data = self.qw.quantize_tensor(weight.data,self.mode)
self.conv_module.weight.data = self.qw.dequantize_tensor(self.conv_module.weight.data, self.mode)
if self.conv_module.bias is not None:
self.conv_module.bias.data = quantize_tensor(bias, scale=self.qb.scale,
zero_point=0, num_bits=self.num_bits, signed=True, n_exp=self.n_exp,
mode=self.mode)
self.conv_module.bias.data = dequantize_tensor(self.conv_module.bias.data,
scale=self.qb.scale,
zero_point=0, mode=self.mode)
def quantize_inference(self, x):
if self.mode == 1:
x = x - self.qi.zero_point
x = self.conv_module(x)
x = self.M * x
x.round_()
# x = F.relu(x) # 在这里补了一个relu
x = x + self.qo.zero_point
x.clamp_(0., 2. ** self.num_bits - 1.).round_()
return x
elif self.mode == 2 or self.mode == 3:
x = self.conv_module(x)
# x = F.relu(x)
x = FakeQuantize.apply(x, self.qo)
return x
class QConvBN(QModule):
def __init__(self, conv_module, bn_module, qi=True, qo=True, num_bits=8, n_exp=4, mode=1):
super(QConvBN, self).__init__(qi=qi, qo=qo, num_bits=num_bits, n_exp=n_exp, mode=mode)
self.num_bits = num_bits
self.conv_module = conv_module
self.bn_module = bn_module
self.qw = QParam(num_bits=num_bits, n_exp=n_exp, mode=mode)
self.qb = QParam(num_bits=num_bits, n_exp=n_exp, mode=mode)
self.register_buffer('M', torch.tensor([], requires_grad=False)) # 将M注册为buffer
self.mode = mode
self.n_exp = n_exp
def fold_bn(self, mean, std):
if self.bn_module.affine:
gamma_ = self.bn_module.weight / std
weight = self.conv_module.weight * gamma_.view(self.conv_module.out_channels, 1, 1, 1)
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean + self.bn_module.bias
else:
bias = self.bn_module.bias - gamma_ * mean
else:
gamma_ = 1 / std
weight = self.conv_module.weight * gamma_
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean
else:
bias = -gamma_ * mean
return weight, bias
def forward(self, x):
if hasattr(self, 'qi'):
self.qi.update(x)
x = FakeQuantize.apply(x, self.qi)
if self.training:
y = F.conv2d(x, self.conv_module.weight, self.conv_module.bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding,
dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
y = y.permute(1, 0, 2, 3) # NCHW -> CNHW
y = y.contiguous().view(self.conv_module.out_channels, -1) # CNHW -> C,NHW
# mean = y.mean(1)
# var = y.var(1)
mean = y.mean(1).detach()
var = y.var(1).detach()
self.bn_module.running_mean = \
(1 - self.bn_module.momentum) * self.bn_module.running_mean + \
self.bn_module.momentum * mean
self.bn_module.running_var = \
(1 - self.bn_module.momentum) * self.bn_module.running_var + \
self.bn_module.momentum * var
else:
mean = Variable(self.bn_module.running_mean)
var = Variable(self.bn_module.running_var)
std = torch.sqrt(var + self.bn_module.eps)
weight, bias = self.fold_bn(mean, std)
self.qw.update(weight.data)
if self.conv_module.bias is not None:
self.qb.update(bias.data)
# x = F.conv2d(x, FakeQuantize.apply(weight, self.qw), bias,
# stride=self.conv_module.stride,
# padding=self.conv_module.padding, dilation=self.conv_module.dilation,
# groups=self.conv_module.groups)
if self.conv_module.bias is not None:
x = F.conv2d(x, FakeQuantize.apply(weight, self.qw), FakeQuantize.apply(bias, self.qb),
stride=self.conv_module.stride,
padding=self.conv_module.padding, dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
else:
x = F.conv2d(x, FakeQuantize.apply(weight, self.qw),
stride=self.conv_module.stride,
padding=self.conv_module.padding, dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
# x = F.relu(x)
if hasattr(self, 'qo'):
self.qo.update(x)
x = FakeQuantize.apply(x, self.qo)
return x
def freeze(self, qi=None, qo=None):
if hasattr(self, 'qi') and qi is not None:
raise ValueError('qi has been provided in init function.')
if not hasattr(self, 'qi') and qi is None:
raise ValueError('qi is not existed, should be provided.')
if hasattr(self, 'qo') and qo is not None:
raise ValueError('qo has been provided in init function.')
if not hasattr(self, 'qo') and qo is None:
raise ValueError('qo is not existed, should be provided.')
if qi is not None:
self.qi = qi
if qo is not None:
self.qo = qo
if self.mode == 1:
self.M.data = (self.qw.scale * self.qi.scale / self.qo.scale).data
std = torch.sqrt(self.bn_module.running_var + self.bn_module.eps)
weight, bias = self.fold_bn(self.bn_module.running_mean, std)
self.conv_module.weight.data = self.qw.quantize_tensor(weight.data, self.mode)
self.conv_module.weight.data = self.conv_module.weight.data - self.qw.zero_point
if self.conv_module.bias is not None:
self.conv_module.bias.data = quantize_tensor(bias, scale=self.qi.scale * self.qw.scale,
zero_point=0, num_bits=self.num_bits, signed=True, n_exp=self.n_exp, mode=self.mode)
elif self.mode == 2 or self.mode == 3:
std = torch.sqrt(self.bn_module.running_var + self.bn_module.eps)
weight, bias = self.fold_bn(self.bn_module.running_mean, std)
self.conv_module.weight.data = self.qw.quantize_tensor(weight.data, self.mode)
if self.conv_module.bias is not None:
self.conv_module.bias.data = quantize_tensor(bias, scale=self.qb.scale,
zero_point=0, num_bits=self.num_bits, signed=True, n_exp=self.n_exp, mode=self.mode)
def fakefreeze(self, qi=None, qo=None):
if hasattr(self, 'qi') and qi is not None:
raise ValueError('qi has been provided in init function.')
if not hasattr(self, 'qi') and qi is None:
raise ValueError('qi is not existed, should be provided.')
if hasattr(self, 'qo') and qo is not None:
raise ValueError('qo has been provided in init function.')
if not hasattr(self, 'qo') and qo is None:
raise ValueError('qo is not existed, should be provided.')
if qi is not None:
self.qi = qi
if qo is not None:
self.qo = qo
if self.mode == 1:
self.M.data = (self.qw.scale * self.qi.scale / self.qo.scale).data
std = torch.sqrt(self.bn_module.running_var + self.bn_module.eps)
weight, bias = self.fold_bn(self.bn_module.running_mean, std)
self.conv_module.weight.data = self.qw.quantize_tensor(weight.data, self.mode)
self.conv_module.weight.data = self.qw.dequantize_tensor(self.conv_module.weight.data, self.mode)
if self.conv_module.bias is not None:
self.conv_module.bias.data = quantize_tensor(bias, scale=self.qi.scale * self.qw.scale,
zero_point=0, num_bits=self.num_bits, signed=True, n_exp=self.n_exp, mode=self.mode)
self.conv_module.bias.data = dequantize_tensor(self.conv_module.bias.data,
scale=self.qi.scale * self.qw.scale,
zero_point=0, mode=self.mode)
elif self.mode == 2 or self.mode == 3:
self.M.data = (self.qw.scale * self.qi.scale / self.qo.scale).data
std = torch.sqrt(self.bn_module.running_var + self.bn_module.eps)
weight, bias = self.fold_bn(self.bn_module.running_mean, std)
self.conv_module.weight.data = self.qw.quantize_tensor(weight.data,self.mode)
# self.conv_module.weight.data = self.qw.dequantize_tensor(self.conv_module.weight.data, self.mode)
if self.conv_module.bias is not None:
self.conv_module.bias.data = quantize_tensor(bias, scale=self.qb.scale,
zero_point=0, num_bits=self.num_bits, signed=True, n_exp=self.n_exp,
mode=self.mode)
# self.conv_module.bias.data = dequantize_tensor(self.conv_module.bias.data,scale=self.qb.scale,zero_point=0, mode=self.mode)
def quantize_inference(self, x):
if self.mode == 1:
x = x - self.qi.zero_point
x = self.conv_module(x)
x = self.M * x
x.round_()
x = x + self.qo.zero_point
x.clamp_(0., 2. ** self.num_bits - 1.).round_()
return x
elif self.mode == 2 or self.mode == 3:
x = self.conv_module(x)
x = FakeQuantize.apply(x, self.qo)
return x
# 待修改 需要有qo吧
class QAdaptiveAvgPool2d(QModule):
def __init__(self, qi=False ,qo=True, num_bits=None, n_exp=4, mode=1):
super(QAdaptiveAvgPool2d, self).__init__(qi=qi, qo=qo, num_bits=num_bits, n_exp=n_exp, mode=mode)
self.num_bits = num_bits
self.mode = mode
self.n_exp = n_exp
def freeze(self, qi=None):
if hasattr(self, 'qi') and qi is not None:
raise ValueError('qi has been provided in init function.')
if not hasattr(self, 'qi') and qi is None:
raise ValueError('qi is not existed, should be provided.')
if qi is not None:
self.qi = qi
def fakefreeze(self, qi=None):
if hasattr(self, 'qi') and qi is not None:
raise ValueError('qi has been provided in init function.')
if not hasattr(self, 'qi') and qi is None:
raise ValueError('qi is not existed, should be provided.')
if qi is not None:
self.qi = qi
def forward(self, x):
if hasattr(self, 'qi'):
self.qi.update(x)
x = FakeQuantize.apply(x, self.qi) # 与ReLu一样,先更新qi的scale,再将x用PoT表示了 (不过一般前一层的qo都是True,则x已经被PoT表示了)
x = F.adaptive_avg_pool2d(x,(1, 1)) # 对输入输出都量化一下就算是量化了
if hasattr(self, 'qo'):
self.qo.update(x)
x = FakeQuantize.apply(x, self.qo)
return x
def quantize_inference(self, x):
x = F.adaptive_avg_pool2d(x,(1,1))
x = FakeQuantize.apply(x, self.qo)
return x
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from get_weight import *
from torch.utils.tensorboard import SummaryWriter
from torchvision import datasets, transforms
from torchvision.datasets import CIFAR10
from torch.optim.lr_scheduler import CosineAnnealingLR
from resnet import *
from torchvision.transforms import transforms
# import models
import time
import os
import argparse
# 定义模型
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 32, kernel_size=3)
self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
self.fc1 = nn.Linear(64 * 6 * 6, 512)
self.fc2 = nn.Linear(512, 10)
self.relu = nn.ReLU()
def forward(self, x):
x = self.relu(self.conv1(x))
x = self.pool(x)
x = self.relu(self.conv2(x))
x = self.pool(x)
x = torch.flatten(x, start_dim=1)
x = self.relu(self.fc1(x))
x = self.fc2(x)
return x
def train(model, optimizer, criterion, train_loader, device):
model.train()
running_loss = 0.0
flag = 0
cnt = 0
for i, data in enumerate(train_loader):
inputs, labels = data
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
histo, grads = (get_model_histogram(model))
if flag == 0:
flag = 1
grads_sum = grads
else:
for k,v in grads_sum.items():
grads_sum[k] += grads[k]
optimizer.step()
running_loss += loss.item()
train_loss = running_loss / len(train_loader)
for k, v in grads_sum.items():
grads_sum[k] = v / len(train_loader)
return train_loss,grads_sum
def evaluate(model, criterion, test_loader, device):
model.eval()
correct, total = 0, 0
with torch.no_grad():
for data in test_loader:
images, labels = data
images, labels = images.to(device), labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
accuracy = 100 * correct / total
return accuracy
def get_children(model: torch.nn.Module):
# get children form model!
# 为了后续也能够更新参数,需要用nn.ModuleList来承载
children = nn.ModuleList(model.children())
# print(children)
# 方便对其中的module进行后续的更新
flatt_children = nn.ModuleList()
# children = list(model.children())
# flatt_children = nn.ModuleList()
# flatt_children = []
if len(children) == 0:
# if model has no children; model is last child! :O
return model
else:
# look for children from children... to the last child!
for child in children:
try:
flatt_children.extend(get_children(child))
except TypeError:
flatt_children.append(get_children(child))
# print(flatt_children)
return flatt_children
if __name__ == "__main__":
# torch.cuda.empty_cache()
parser = argparse.ArgumentParser(description='PyTorch FP32 Training')
parser.add_argument('-m', '--model', metavar='MODEL ARCH', default='resnet18')
parser.add_argument('-e','--epochs', default=100, type=int, metavar='EPOCHS', help='number of total epochs to run')
parser.add_argument('-b', '--batch_size', default=128, type=int, metavar='BATCH SIZE', help='mini-batch size (default: 128)')
parser.add_argument('-j','--workers', default=4, type=int, metavar='WORKERS',help='number of data loading workers (default: 4)')
parser.add_argument('-lr', '--learning-rate', default=0.001, type=float, metavar='LR', help='initial learning rate', dest='lr')
parser.add_argument('-wd','--weight_decay',default=0.0001,type=float,metavar='WD',help='lr schduler weight decay',dest='wd')
parser.add_argument('-t', '--test', dest='test', action='store_true', help='test model on test set')
# models = ['resnet18', 'resnet50', 'resnet152','resnet18']
# 训练参数
args = parser.parse_args()
num_epochs = args.epochs
print(num_epochs)
batch_size = args.batch_size
print(batch_size)
num_workers = args.workers
lr = args.lr
weight_decay = args.wd
best_acc = float("-inf")
start_time = time.time()
# 模型、损失函数和优化器
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 加入设备选择
print(device)
# model = Net().to(device) # 将模型移动到 device 上
# model = resnet18().to(device)
# model = models.__dict__[args.model]().to(device)
# t = torch.cuda.get_device_properties(0).total_memory
# r = torch.cuda.memory_reserved(0)
# a = torch.cuda.memory_allocated(0)
# f = r-a # free memory
# print(f"Total memory: {t}")
# print(f"Reserved memory: {r}")
# print(f"Allocated memory: {a}")
# print(f"Free memory: {f}")
if args.model == 'resnet18' :
model = resnet18().to(device)
elif args.model == 'resnet50' :
model = resnet50().to(device)
elif args.model == 'resnet152' :
model = resnet152().to(device)
elif args.model == 'LeNet' :
model = LeNet().to(device)
elif args.model == 'NetBN' :
model = NetBN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)
# optimizer = optim.AdaBound(model.parameters(), lr=lr,
# weight_decay=weight_decay, final_lr=0.001*lr)
print("ok!")
# 数据并行
if torch.cuda.device_count() > 1:
print(f"Using {torch.cuda.device_count()} GPUs")
model = nn.DataParallel(model)
# 加载数据
train_loader = torch.utils.data.DataLoader(
datasets.CIFAR10('./project/p/data', train=True, download=False,
transform=transforms.Compose([
transforms.RandomCrop(32, padding=2),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(
(0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])),
batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True
)
test_loader = torch.utils.data.DataLoader(
datasets.CIFAR10('./project/p/data', train=False, download=False, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465),
(0.2023, 0.1994, 0.2010))
])),
batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True
)
# train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
# test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
# 学习率调度器
# lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
lr_scheduler = CosineAnnealingLR(optimizer, T_max=num_epochs)
# TensorBoard
# WARN
# writer = SummaryWriter(log_dir='./project/p/models_log/trail/full_log')
writer = SummaryWriter(log_dir='./project/p/models_log/' + args.model + '/full_log')
# Early Stopping 参数
patience = 30
count = 0
# WARN
# save_dir = './project/p/ckpt/trail'
save_dir = './project/p/ckpt/' + args.model
if not os.path.isdir(save_dir):
os.makedirs(save_dir, mode=0o777)
os.chmod(save_dir, mode=0o777)
# checkpoint_dir = './project/p/checkpoint/cifar-10_trail_model'
checkpoint_dir = './project/p/checkpoint/cifar-10_' + args.model
if not os.path.isdir(checkpoint_dir):
os.makedirs(checkpoint_dir, mode=0o777)
os.chmod(checkpoint_dir, mode=0o777)
# 训练循环
if args.test == True:
model.load_state_dict(torch.load(save_dir+'/' + args.model + '.pt'))
acc = evaluate(model, criterion, test_loader, device=device)
print(f"test accuracy: {acc:.2f}%")
for name, module in model.named_modules():
print(f"{name}: {module}\n")
print('========================================================')
print('========================================================')
model.quantize()
for name , layer in model.quantize_layers.items():
print(f"Layer {name}: {layer} ") # 足够遍历了
else:
for epoch in range(num_epochs):
# 训练模型并记录 loss
train_loss,grads_sum = train(model, optimizer, criterion,
train_loader, device=device)
writer.add_scalar("Training Loss", train_loss, epoch + 1)
# 评估模型并记录 accuracy
if (epoch + 1) % 5 == 0:
acc = evaluate(model, criterion, test_loader, device=device)
writer.add_scalar("Validation Accuracy", acc, epoch + 1)
checkpoint = {
'model': model.state_dict(),
'optimizer': optimizer.state_dict(),
'epoch': epoch,
'grads': grads_sum,
'accuracy':acc
}
# for name, param in model.named_parameters():
# writer.add_histogram(tag=name + '_grad', values=param.grad, global_step=epoch)
# writer.add_histogram(tag=name + '_data', values=param.data, global_step=epoch)
for name, param in grads_sum.items():
# 此处的grad是累加值吧 不是平均值
writer.add_histogram(tag=name + '_grad', values=param, global_step=epoch)
# 取这个epoch最后一个batch算完之后的weight
for name, param in model.named_parameters():
writer.add_histogram(tag=name + '_data', values=param.data, global_step=epoch)
# WARN
# torch.save(checkpoint, checkpoint_dir + '/ckpt_cifar-10_trail_model%s.pt' % (str(epoch+1)))
torch.save(checkpoint, checkpoint_dir + '/ckpt_cifar-10_' + args.model + '_%s.pt' % (str(epoch+1)))
# 存储最好的模型
if acc > best_acc:
best_acc = acc
count = 0
# WARN
# torch.save(model.state_dict(), save_dir+'/model_trail.pt')
torch.save(model.state_dict(), save_dir+'/' + args.model + '.pt')
else:
count += 1
print(
f"Epoch {epoch + 1}/{num_epochs}, Train Loss: {train_loss:.5f}, Val Acc: {acc:.2f}%")
# 判断是否需要 early stopping
if count == patience:
print(f"No improvement after {patience} epochs. Early stop!")
break
# 更新学习率
lr_scheduler.step()
# 训练用时和最佳验证集准确率
print(f"Training took {(time.time() - start_time) / 60:.2f} minutes")
print(f"Best validation accuracy: {best_acc:.2f}%")
# 加载并测试最佳模型
# model.load_state_dict(torch.load("best_model.pth"))
# model.to(device)
# test_acc = evaluate(model, criterion, test_loader, device="cuda")
# print(f"Test Accuracy: {test_acc:.2f}%")
# 关闭 TensorBoard 写入器
writer.close()
# -*- coding: utf-8 -*-
from torch.serialization import load
from model import *
import argparse
import torch
import sys
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
import os
import os.path as osp
from torch.utils.tensorboard import SummaryWriter
# 为了得到PTQ的权重数据的伪量化版 (先quantize再dequantize,与full precision的权重数据分布相似,便于用wasserstein距离求相似度)
def direct_quantize(model, test_loader, device):
for i, (data, target) in enumerate(test_loader, 1):
data, target = data.to(device), target.to(device)
output = model.quantize_forward(data) # 这里会依次调用model中各个层的forward,则会update qw
if i % 5000 == 0:
break
print('direct quantization finish')
def full_inference(model, test_loader, device):
correct = 0
for i, (data, target) in enumerate(test_loader, 1):
data, target = data.to(device), target.to(device)
output = model(data)
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
print('\nTest set: Full Model Accuracy: {:.4f}%\n'.format(100. * correct / len(test_loader.dataset)))
def quantize_inference(model, test_loader, device):
correct = 0
for i, (data, target) in enumerate(test_loader, 1):
data, target = data.to(device), target.to(device)
output = model.quantize_inference(data)
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
acc = 100. * correct / len(test_loader.dataset)
print('\nTest set: Quant Model Accuracy: {:.4f}%\n'.format(acc))
return acc
if __name__ == "__main__":
d1 = sys.argv[1]
batch_size = 32
using_bn = True
load_quant_model_file = None
# load_model_file = None
net = 'LeNet' # 1:
acc = 0
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
train_loader = torch.utils.data.DataLoader(
datasets.CIFAR10('data', train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])),
batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=False
)
test_loader = torch.utils.data.DataLoader(
datasets.CIFAR10('data', train=False, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])),
batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=False
)
if using_bn:
model = LeNet().to(device)
# 生成梯度分布图的时候是从0开始训练的
model.load_state_dict(torch.load('ckpt/cifar-10_lenet_bn.pt', map_location='cpu'))
# else:
# model = Net()
# model.load_state_dict(torch.load('ckpt/mnist_cnn.pt', map_location='cpu'))
# save_file = "ckpt/mnist_cnn_ptq.pt"
# model.to(device)
model.eval()
full_inference(model, test_loader, device)
num_bits = int(d1)
model.quantize(num_bits=num_bits)
model.eval()
print('Quantization bit: %d' % num_bits)
dir_name = './ptq_fake_log/' + 'quant_bit_' + str(d1) + '_log'
if not os.path.isdir(dir_name):
os.makedirs(dir_name, mode=0o777)
os.chmod(dir_name, mode=0o777)
qwriter = SummaryWriter(log_dir=dir_name)
# for name, param in model.named_parameters():
# qwriter.add_histogram(tag=name + '_data', values=param.data)
if load_quant_model_file is not None:
model.load_state_dict(torch.load(load_quant_model_file))
print("Successfully load quantized model %s" % load_quant_model_file)
direct_quantize(model, train_loader, device)
model.fakefreeze() # 权重量化
for name, param in model.named_parameters():
qwriter.add_histogram(tag=name + '_data', values=param.data)
dir_name ='ckpt/ptq_fakefreeze'
if not os.path.isdir(dir_name):
os.makedirs(dir_name, mode=0o777)
os.chmod(dir_name, mode=0o777)
save_file = 'ckpt/ptq_fakefreeze/cifar-10_lenet_bn_ptq_' + str(d1) + '_.pt'
torch.save(model.state_dict(), save_file)
# -*- coding: utf-8 -*-
from torch.serialization import load
# from model import *
import argparse
import torch
import sys
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
import os
import os.path as osp
from torch.utils.tensorboard import SummaryWriter
from resnet import *
def direct_quantize(model, test_loader, device):
for i, (data, target) in enumerate(test_loader, 1):
data, target = data.to(device), target.to(device)
output = model.quantize_forward(data) # 这里会依次调用model中各个层的forward,则会update qw
if i % 5000 == 0:
break
print('direct quantization finish')
def full_inference(model, test_loader, device):
correct = 0
for i, (data, target) in enumerate(test_loader, 1):
data, target = data.to(device), target.to(device)
output = model(data)
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
print('\nTest set: Full Model Accuracy: {:.4f}%\n'.format(100. * correct / len(test_loader.dataset)))
def quantize_inference(model, test_loader, device):
correct = 0
for i, (data, target) in enumerate(test_loader, 1):
data, target = data.to(device), target.to(device)
output = model.quantize_inference(data)
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
acc = 100. * correct / len(test_loader.dataset)
print('\nTest set: Quant Model Accuracy: {:.4f}%\n'.format(acc))
return acc
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='PTQ Training')
parser.add_argument('-m', '--model', metavar='MODEL ARCH', default='resnet18')
parser.add_argument('-n','--num_bits', default=8, type=int, metavar='BITS', help='number of bits')
parser.add_argument('-t','--mode', default=1, type=int, metavar='MODES', help='PTQ mode(1:INT 2:PoT 3:FP)')
parser.add_argument('-e','--n_exp', default=4, type=int, metavar='N_EXP', help='number of exp')
# d1 = sys.argv[1] # num_bits
# d2 = sys.argv[2] # mode
# d3 = sys.argv[3] # n_exp
# d1 = 8
# d2 = 3
# d3 = 4
args = parser.parse_args()
d1 = args.num_bits
d2 = args.mode
d3 = args.n_exp
batch_size = 128
using_bn = True
load_quant_model_file = None
# load_model_file = None
acc = 0
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
train_loader = torch.utils.data.DataLoader(
datasets.CIFAR10('./project/p/data', train=True, download=False,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])),
batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=False
)
test_loader = torch.utils.data.DataLoader(
datasets.CIFAR10('./project/p/data', train=False, download=False ,transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])),
batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=False
)
if using_bn:
# model = LeNet(n_exp=int(d3), mode=int(d2)).to(device)
if args.model == 'resnet18' :
model = resnet18(n_exp=int(d3), mode=int(d2)).to(device)
elif args.model == 'resnet50' :
model = resnet50(n_exp=int(d3), mode=int(d2)).to(device)
elif args.model == 'resnet152' :
model = resnet152(n_exp=int(d3), mode=int(d2)).to(device)
elif args.model == 'LeNet' :
model = LeNet(n_exp=int(d3), mode=int(d2)).to(device)
elif args.model == 'NetBN' :
model = NetBN().to(device)
# model = resnet18(n_exp=int(d3), mode=int(d2)).to(device)
# 生成梯度分布图的时候是从0开始训练的
# model.load_state_dict(torch.load('./project/p/ckpt/cifar-10_lenet_bn.pt', map_location='cpu'))
model.load_state_dict(torch.load('./project/p/ckpt/' + args.model + '/' + args.model + '.pt', map_location='cpu'))
# else:
# model = Net()
# model.load_state_dict(torch.load('ckpt/mnist_cnn.pt', map_location='cpu'))
# save_file = "ckpt/mnist_cnn_ptq.pt"
# model.to(device)
model.eval()
full_inference(model, test_loader, device)
full_writer = SummaryWriter(log_dir='./project/p/' + args.model +'/ptqlog_mode' + str(d2) + '/' + str(d3) + '/' + 'full_log')
for name, param in model.named_parameters():
full_writer.add_histogram(tag=name + '_data', values=param.data)
num_bits = int(d1)
model.quantize(num_bits=num_bits)
model.eval()
print('Quantization bit: %d' % num_bits)
writer = SummaryWriter(log_dir='./project/p/'+ args.model + '/ptqlog_mode' + str(d2) + '/' + str(d3) + '/' + 'quant_bit_' + str(d1) + '_log')
if load_quant_model_file is not None:
model.load_state_dict(torch.load(load_quant_model_file))
print("Successfully load quantized model %s" % load_quant_model_file)
direct_quantize(model, train_loader, device)
model.freeze() # 权重量化
for name, param in model.named_parameters():
writer.add_histogram(tag=name + '_data', values=param.data)
# 原PTQ mode=1时
# save_file = 'ckpt/cifar-10_lenet_bn_ptq_' + str(d1) + '_.pt'
dir_name ='./project/p/ckpt/' + args.model + '/mode'+ str(d2) + '_' + str(d3) + '/ptq'
if not os.path.isdir(dir_name):
os.makedirs(dir_name, mode=0o777)
os.chmod(dir_name, mode=0o777)
save_file = './project/p/ckpt/' + args.model + '/mode'+ str(d2) + '_' + str(d3) + '/ptq' + '/cifar-10_' + args.model + '_ptq_' + str(d1) + '_.pt'
torch.save(model.state_dict(), save_file)
# 测试是否设备转移是否正确
# model.cuda()
# print(model.qconv1.M.device)
# model.cpu()
# print(model.qconv1.M.device)
acc = quantize_inference(model, test_loader, device)
f = open('./project/p/' + args.model + '_ptq_acc' + '.txt', 'a')
f.write('bit ' + str(d1) + ': ' + str(acc) + '\n')
f.close()
# -*- coding: utf-8 -*-
from model import *
# from easydict import EasyDict
# from cleverhans.torch.attacks.fast_gradient_method import fast_gradient_method
# from cleverhans.torch.attacks.projected_gradient_descent import (
# projected_gradient_descent,
# )
import argparse
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torchvision import datasets, transforms
import os
import os.path as osp
import sys
import time
# import matplotlib.pyplot as plt
# import matplotlib
# sys.path.append("./project/p")
from get_weight import *
from torch.utils.tensorboard import SummaryWriter
def quantize_aware_training(model, device, train_loader, optimizer, epoch):
lossLayer = torch.nn.CrossEntropyLoss()
flag = 0
cnt = 0
losses=[]
for batch_idx, (data, target) in enumerate(train_loader, 1):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model.quantize_forward(data) # 各个layer的forward
loss = lossLayer(output, target) #此处loss与layers联系起来
loss.backward()
# cnt = cnt + 1
losses.append(loss)
histo, grads = (get_model_histogram(model))
if flag == 0:
flag = 1
grads_sum = grads
# 对一个epoch的每个batch的梯度求和
else:
for k, v in grads_sum.items():
grads_sum[k] += grads[k]
#print(k)
optimizer.step()
if batch_idx % 50 == 0:
print('Quantize Aware Training Epoch: {} [{}/{}]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset), loss.item()
))
# print(grad_sum['conv_layers.conv1.weight'])
# sys.exit(0)
# print('batch_idx: ' +str(batch_idx))
# print('cnt: ' + str(cnt))
# 一个epoch的平均梯度
for k, v in grads_sum.items():
grads_sum[k] = v/len(train_loader.dataset)
return grads_sum,losses
#
# print(grads_sum)
#
# histo = get_grad_histogram(grads_sum)
#
# for s,_ in grads_sum.items():
# data = histo[s]
# bins = data['bins']
# histogram = data['histogram']
# max_idx = np.argmax(histogram)
# min_idx = np.argmin(histogram)
# width = abs(bins[max_idx] - bins[min_idx])
#
# plt.figure(figsize=(9, 6))
# plt.bar(bins[:-1], histogram, width=width)
# #plt.show()
#
# plt.savefig('diff_fig/int'+ sys.argv[1] + '/' + s +'.jpg')
#
# np.save('diff_fig/int' + sys.argv[1] + '/grads_sum.npy', grads_sum)
# sys.exit(0)
def full_inference(model, test_loader):
correct = 0
# report = EasyDict(nb_test=0, correct=0, correct_fgm=0, correct_pgd=0)
for i, (data, target) in enumerate(test_loader, 1):
data, target = data.to(device), target.to(device)
with torch.no_grad():
output = model(data)
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
#x_fgm = fast_gradient_method(model, data, 0.01, np.inf)
#x_pgd = projected_gradient_descent(model, data, 0.01, 0.01, 40, np.inf)
# model prediction on clean examples
# _, y_pred = model(data).max(1)
# model prediction on FGM adversarial examples
#_, y_pred_fgm = model(x_fgm).max(1)
# model prediction on PGD adversarial examples
#_, y_pred_pgd = model(x_pgd).max(1)
# report.nb_test += target.size(0)
# report.correct += y_pred.eq(target).sum().item()
#report.correct_fgm += y_pred_fgm.eq(target).sum().item()
#report.correct_pgd += y_pred_pgd.eq(target).sum().item()
print('\nTest set: Full Model Accuracy: {:.0f}%\n'.format(100. * correct / len(test_loader.dataset)))
# print('\nTest set: Full Model Accuracy:')
# print(
# "test acc on clean examples (%): {:.3f}".format(
# report.correct / report.nb_test * 100.0
# )
# )
# print(
# "test acc on FGM adversarial examples (%): {:.3f}".format(
# report.correct_fgm / report.nb_test * 100.0
# )
# )
# print(
# "test acc on PGD adversarial examples (%): {:.3f}".format(
# report.correct_pgd / report.nb_test * 100.0
# )
# )
print('============================================')
def quantize_inference(model, test_loader):
correct = 0
acc=0
# report = EasyDict(nb_test=0, correct=0, correct_fgm=0, correct_pgd=0)
for i, (data, target) in enumerate(test_loader, 1):
data, target = data.to(device), target.to(device)
output = model.quantize_inference(data)
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
acc = 100. * correct / len(test_loader.dataset)
print('\nTest set: Quant Model Accuracy: {:.0f}%\n'.format(acc))
# data, target = data.to(device), target.to(device)
# x_fgm = fast_gradient_method(model, data, 0.01, np.inf)
# x_pgd = projected_gradient_descent(model, data, 0.01, 0.01, 40, np.inf)
# model prediction on clean examples
# _, y_pred = model.quantize_inference(data).max(1)
# model prediction on FGM adversarial examples
# _, y_pred_fgm = model.quantize_inference(x_fgm).max(1)
# model prediction on PGD adversarial examples
#_, y_pred_pgd = model.quantize_inference(x_pgd).max(1)
# report.nb_test += target.size(0)
# report.correct += y_pred.eq(target).sum().item()
# report.correct_fgm += y_pred_fgm.eq(target).sum().item()
# report.correct_pgd += y_pred_pgd.eq(target).sum().item()
# acc = report.correct / report.nb_test * 100.0
# print(
# "test acc on clean examples (%): {:.3f}".format(acc
# )
# )
# print(
# "test acc on FGM adversarial examples (%): {:.3f}".format(
# report.correct_fgm / report.nb_test * 100.0
# )
# )
# print(
# "test acc on PGD adversarial examples (%): {:.3f}".format(
# report.correct_pgd / report.nb_test * 100.0
# )
# )
return acc
if __name__ == "__main__":
# d1=20
# d2=5
d1 = sys.argv[1] # num_bits
d2 = sys.argv[2] # epochs
d3 = sys.argv[3] # mode
d4 = sys.argv[4] # n_exp
batch_size = 32
test_batch_size = 32 # test的与train的batch_size相等才更合理点吧 有batch norm
seed = 1
epochs = int(d2)
lr = 0.001 # 1%*0.01
momentum = 0.5
net = 'LeNet' # 1:
acc=0
using_bn = True
load_quant_model_file = None
# load_quant_model_file = "ckpt/mnist_cnnbn_qat.pt"
torch.manual_seed(seed)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
# datasets.imagenet
train_loader = torch.utils.data.DataLoader(
datasets.CIFAR10('./project/p/data', train=True, download=False,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])),
batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=False
)
test_loader = torch.utils.data.DataLoader(
datasets.CIFAR10('./project/p/data', train=False, download=False,transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])),
batch_size=test_batch_size, shuffle=True, num_workers=1, pin_memory=False
)
#if using_bn:
#model = NetBN()
# if (net=='VGG19') == True:
# model = VGG_19().to(device)
# model.load_state_dict(torch.load('ckpt/cifar-10_vgg19_bn.pt', map_location='cpu'))
# save_file = "ckpt/cifar-10_vgg19_bn_qat.pt"
# elif (net=='LeNet') == True:
model = LeNet(n_exp=int(d4), mode = int(d3)).to(device)
#生成梯度分布图的时候是从0开始训练的
# fine tune qat
#model.load_state_dict(torch.load('ckpt/cifar-10_lenet_bn.pt', map_location='cuda'))
# save_file = "ckpt/cifar-10_lenet_bn_qat.pt"
# else:
# model = Net().to(device)
# model.load_state_dict(torch.load('ckpt/cifar-10_vgg19.pt', map_location='cpu'))
# save_file = "ckpt/cifar-10_vgg19_qat.pt"
model.to(device)
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
#考虑用Adam
# INT
# writer = SummaryWriter(log_dir='./scratchlog/quant_bit_' + str(d1) + '_log')
writer = SummaryWriter(log_dir='./project/p/scratchlog/mode' + str(d3) + '_' + str(d4) + '/quant_bit_' + str(d1) + '_log')
model.eval() # 评价模式(不更新梯度,不dropout)
full_inference(model, test_loader)
num_bits = int(d1)
# 先进行self中的各个量化层的定义
model.quantize(num_bits=num_bits)
print('Quantization bit: %d' % num_bits)
if load_quant_model_file is not None:
model.load_state_dict(torch.load(load_quant_model_file))
print("Successfully load quantized model %s" % load_quant_model_file)
# 进行量化训练
for epoch in range(1, epochs + 1):
model.train() # 训练模式
grads_sum, losses = quantize_aware_training(model, device, train_loader, optimizer, epoch)
print('epoch:', epoch)
checkpoint = {
'model': model.state_dict(),
'optimizer': optimizer.state_dict(),
'grads':grads_sum,
'epoch': epoch,
'losses': losses
}
for name, param in grads_sum.items():
# 此处的grad是累加值吧 不是平均值
writer.add_histogram(tag=name + '_grad', values=param, global_step=epoch)
for name, param in model.named_parameters():
writer.add_histogram(tag=name + '_data', values=param.data, global_step=epoch)
# if (net == 'VGG19') == True:
# torch.save(checkpoint,
# 'checkpoint/cifar-10_vgg_19_bn_quant/ckpt_cifar-10_vgg19_bn_quant_%s.pth' % (str(epoch)))
#
#
# elif (net == 'LeNet') == True:
# INT
# dir_name = 'checkpoint/cifar-10_lenet_bn_quant/scratch/' + str(d1)
dir_name = './project/p/checkpoint/cifar-10_lenet_bn_quant/scratch/mode' + str(d3) + '_' + str(d4) + '/' + str(d1)
if not os.path.isdir(dir_name):
os.makedirs(dir_name,mode=0o777)
os.chmod(dir_name,mode=0o777)
# INT
# torch.save(checkpoint,'checkpoint/cifar-10_lenet_bn_quant/scratch/' + str(d1) + '/ckpt_cifar-10_lenet_bn_quant_' + str(epoch) + '.pth')
torch.save(checkpoint,
'./project/p/checkpoint/cifar-10_lenet_bn_quant/scratch/mode' + str(d3) + '_' + str(d4) + '/' + str(d1)+ '/ckpt_cifar-10_lenet_bn_quant_' + str(
epoch) + '.pth')
# quan_dict = torch.load('checkpoint/cifar-10_lenet_bn_quant/' + str(d1) + '/ckpt_cifar-10_lenet_bn_quant_%s.pth' % (str(epoch)))
# print(quan_dict['grads']['conv_layers.conv1.weight'].reshape(1,-1).shape)
#
#
# print('Saved all parameters!\n')
model.eval()
#torch.save(model.state_dict(), save_file)
model.freeze()
acc = quantize_inference(model, test_loader)
f = open('./project/p/lenet_qat_scratch_acc' + '.txt', 'a')
f.write('bit ' + str(d1) + ': ' + str(acc) + '\n')
f.close()
# -*- coding: utf-8 -*-
import torch
import torch.nn as nn
import torch.nn.functional as F
from new_train import get_children
from global_var import GlobalVariables
from module import *
class LeNet(nn.Module):
# CONV FLOPs: 考虑bias:(2 * C_in * K_h * K_w )* H_out * W_out * C_out
# 不考虑bias: (2 * C_in * K_h * K_w -1)* H_out * W_out * C_out
# FCN FLOPs: 考虑bias: (2 * I )* O
# 不考虑bias: (2 * I - 1) * O
def __init__(self, img_size=32, input_channel=3, num_class=10, n_exp=4, mode=1):
super().__init__()
self.conv_layers = nn.ModuleDict({
# block1
'conv1': nn.Conv2d(3,6,5), # (2*3*5*5) * 32*32*6 (bias占其中的32*32*6) 6144/921600
'bn1': nn.BatchNorm2d(6),
'reluc1': nn.ReLU(),
'pool1': nn.MaxPool2d(2,2),
# block2
'conv2': nn.Conv2d(6,16,5), # (2*6*5*5) * 16*16*16 (bias占其中的16*16*6) 1536/1228800
'bn2': nn.BatchNorm2d(16),
'reluc2': nn.ReLU(),
'pool2': nn.MaxPool2d(2,2),
})
self.fc_layers = nn.ModuleDict({
# classifier
'fc1': nn.Linear(16*5*5,120), # (2*16*5*5)*120 (bias占其中的120) 120/96000
'reluf1': nn.ReLU(),
'fc2': nn.Linear(120,84), # (2*120)*84 (bias占其中的84) 84/2016
'reluf2': nn.ReLU(),
'fc3': nn.Linear(84, num_class)
})
self.mode = mode
self.n_exp = n_exp
def forward(self,x):
for _,layer in self.conv_layers.items():
x = layer(x)
output = x.view(-1,16*5*5)
for _,layer in self.fc_layers.items():
output = layer(output)
out = F.softmax(output,dim = 1) # 这里不softmax也行 影响不大
return out
def quantize(self, num_bits=8):
self.quantize_conv_layers=nn.ModuleDict({
# qi=true: 前一层输出的结果是没有量化过的,需要量化。 maxpool和relu都不会影响INT和minmax,所以在这俩之后的层的pi是false
#若前一层是conv,数据minmax被改变,则需要qi=true来量化
'qconv1': QConv2d(self.conv_layers['conv1'], qi=True, qo=True, num_bits=num_bits, n_exp=self.n_exp, mode=self.mode),
'qbn1':QBN(self.conv_layers['bn1'],qi=False,qo=True,num_bits=num_bits,n_exp=self.n_exp,mode=self.mode),
'qreluc1': QReLU(n_exp=self.n_exp, mode=self.mode),
'qpool1': QMaxPooling2d(kernel_size=2,stride=2,padding=0, n_exp=self.n_exp, mode=self.mode),
'qconv2': QConv2d(self.conv_layers['conv2'], qi=False, qo=True, num_bits=num_bits, n_exp=self.n_exp, mode=self.mode),
'qbn2':QBN(self.conv_layers['bn2'],qi=False,qo=True,num_bits=num_bits,n_exp=self.n_exp,mode=self.mode),
'qreluc2': QReLU(n_exp=self.n_exp, mode=self.mode),
'qpool2': QMaxPooling2d(kernel_size=2, stride=2, padding=0, n_exp=self.n_exp, mode=self.mode)
})
self.quantize_fc_layers = nn.ModuleDict({
'qfc1': QLinear(self.fc_layers['fc1'],qi=False,qo=True,num_bits=num_bits, n_exp=self.n_exp, mode=self.mode),
'qreluf1': QReLU(n_exp=self.n_exp, mode=self.mode),
'qfc2': QLinear(self.fc_layers['fc2'],qi=False,qo=True,num_bits=num_bits, n_exp=self.n_exp, mode=self.mode),
'qreluf2': QReLU(n_exp=self.n_exp, mode=self.mode),
'qfc3': QLinear(self.fc_layers['fc3'],qi=False,qo=True,num_bits=num_bits, n_exp=self.n_exp, mode=self.mode)
})
def quantize_forward(self, x):
for s, layer in self.quantize_conv_layers.items():
# print(s)
# print(layer)
x = layer(x)
output = x.view(-1,16*5*5)
for s, layer in self.quantize_fc_layers.items():
output = layer(output)
out = F.softmax(output, dim=1) # 这里不softmax也行 影响不大 算loss用
return out
def freeze(self):
self.quantize_conv_layers['qconv1'].freeze()
self.quantize_conv_layers['qbn1'].freeze(qi=self.quantize_conv_layers['qconv1'].qo)
# self.quantize_conv_layers['qreluc1'].freeze(self.quantize_conv_layers['qconv1'].qo)
# self.quantize_conv_layers['qpool1'].freeze(self.quantize_conv_layers['qconv1'].qo)
# self.quantize_conv_layers['qconv2'].freeze(self.quantize_conv_layers['qconv1'].qo)
self.quantize_conv_layers['qreluc1'].freeze(self.quantize_conv_layers['qbn1'].qo)
self.quantize_conv_layers['qpool1'].freeze(self.quantize_conv_layers['qbn1'].qo)
self.quantize_conv_layers['qconv2'].freeze(self.quantize_conv_layers['qbn1'].qo)
self.quantize_conv_layers['qbn2'].freeze(qi=self.quantize_conv_layers['qconv2'].qo)
# self.quantize_conv_layers['qreluc2'].freeze(self.quantize_conv_layers['qconv2'].qo)
# self.quantize_conv_layers['qpool2'].freeze(self.quantize_conv_layers['qconv2'].qo)
# self.quantize_fc_layers['qfc1'].freeze(qi=self.quantize_conv_layers['qconv2'].qo)
self.quantize_conv_layers['qreluc2'].freeze(self.quantize_conv_layers['qbn2'].qo)
self.quantize_conv_layers['qpool2'].freeze(self.quantize_conv_layers['qbn2'].qo)
self.quantize_fc_layers['qfc1'].freeze(qi=self.quantize_conv_layers['qbn2'].qo)
self.quantize_fc_layers['qreluf1'].freeze(self.quantize_fc_layers['qfc1'].qo)
self.quantize_fc_layers['qfc2'].freeze(qi=self.quantize_fc_layers['qfc1'].qo)
self.quantize_fc_layers['qreluf2'].freeze(self.quantize_fc_layers['qfc2'].qo)
self.quantize_fc_layers['qfc3'].freeze(qi=self.quantize_fc_layers['qfc2'].qo)
def fakefreeze(self):
self.quantize_conv_layers['qconv1'].fakefreeze()
self.quantize_conv_layers['qreluc1'].fakefreeze(self.quantize_conv_layers['qconv1'].qo)
self.quantize_conv_layers['qpool1'].fakefreeze(self.quantize_conv_layers['qconv1'].qo)
self.quantize_conv_layers['qconv2'].fakefreeze(self.quantize_conv_layers['qconv1'].qo)
self.quantize_conv_layers['qreluc2'].fakefreeze(self.quantize_conv_layers['qconv2'].qo)
self.quantize_conv_layers['qpool2'].fakefreeze(self.quantize_conv_layers['qconv2'].qo)
self.quantize_fc_layers['qfc1'].fakefreeze(qi=self.quantize_conv_layers['qconv2'].qo)
self.quantize_fc_layers['qreluf1'].fakefreeze(self.quantize_fc_layers['qfc1'].qo)
self.quantize_fc_layers['qfc2'].fakefreeze(qi=self.quantize_fc_layers['qfc1'].qo)
self.quantize_fc_layers['qreluf2'].fakefreeze(self.quantize_fc_layers['qfc2'].qo)
self.quantize_fc_layers['qfc3'].fakefreeze(qi=self.quantize_fc_layers['qfc2'].qo)
def quantize_inference(self, x):
x = self.quantize_conv_layers['qconv1'].qi.quantize_tensor(x, self.mode)
for s, layer in self.quantize_conv_layers.items():
print(s)
x = layer.quantize_inference(x)
output = x.view( -1,16*5*5)
for s, layer in self.quantize_fc_layers.items():
output = layer.quantize_inference(output)
# 只有mode1需要出现范围映射,将量化后的数据恢复到原数据相似的范围,PoT无需,其自带恢复性
if self.mode == 1:
output = self.quantize_fc_layers['qfc3'].qo.dequantize_tensor(output, self.mode)
out = F.softmax(output, dim=1) # 这里应该用 Qsoftmax可能好些 之后改改
return out
class NetBN(nn.Module):
def __init__(self, num_channels=1):
super(NetBN, self).__init__()
self.conv1 = nn.Conv2d(num_channels, 40, 3, 1)
self.bn1 = nn.BatchNorm2d(40)
self.conv2 = nn.Conv2d(40, 40, 3, 1)
self.bn2 = nn.BatchNorm2d(40)
self.fc = nn.Linear(5 * 5 * 40, 10)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = F.relu(x)
x = F.max_pool2d(x, 2, 2)
x = self.conv2(x)
x = self.bn2(x)
x = F.relu(x)
x = F.max_pool2d(x, 2, 2)
x = x.view(-1, 5 * 5 * 40)
x = self.fc(x)
return x
def quantize(self, num_bits=8):
self.qconv1 = QConvBNReLU(self.conv1, self.bn1, qi=True, qo=True, num_bits=num_bits)
self.qmaxpool2d_1 = QMaxPooling2d(kernel_size=2, stride=2, padding=0)
self.qconv2 = QConvBNReLU(self.conv2, self.bn2, qi=False, qo=True, num_bits=num_bits)
self.qmaxpool2d_2 = QMaxPooling2d(kernel_size=2, stride=2, padding=0)
self.qfc = QLinear(self.fc, qi=False, qo=True, num_bits=num_bits)
def quantize_forward(self, x):
x = self.qconv1(x)
x = self.qmaxpool2d_1(x)
x = self.qconv2(x)
x = self.qmaxpool2d_2(x)
x = x.view(-1, 5*5*40)
x = self.qfc(x)
return x
def freeze(self):
self.qconv1.freeze()
self.qmaxpool2d_1.freeze(self.qconv1.qo)
self.qconv2.freeze(qi=self.qconv1.qo) # 因为maxpool不会改变min,max
self.qmaxpool2d_2.freeze(self.qconv2.qo)
self.qfc.freeze(qi=self.qconv2.qo) # 因为maxpool不会改变min,max
def quantize_inference(self, x):
qx = self.qconv1.qi.quantize_tensor(x)
qx = self.qconv1.quantize_inference(qx)
qx = self.qmaxpool2d_1.quantize_inference(qx)
qx = self.qconv2.quantize_inference(qx)
qx = self.qmaxpool2d_2.quantize_inference(qx)
qx = qx.view(-1, 5*5*40)
qx = self.qfc.quantize_inference(qx)
out = self.qfc.qo.dequantize_tensor(qx) # INT -> FP
return out
# 定义 ResNet 模型
# 适用于Cifar10
class ResNet(nn.Module):
def __init__(self, block, layers, num_classes=10, n_exp=4, mode=1): # 这里将类别数设置为10
super(ResNet, self).__init__()
self.mode = mode
self.n_exp = n_exp
self.inplanes = 16 # 因为 CIFAR-10 图片较小,所以开始时需要更少的通道数
GlobalVariables.SELF_INPLANES = self.inplanes
# print('resnet init:'+ str(GlobalVariables.SELF_INPLANES))
# 输入层
self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1,
bias=False)
self.bn1 = nn.BatchNorm2d(16)
self.relu = nn.ReLU()
# 残差层(4 个阶段,每个阶段包含 6n+2 个卷积层)
self.layer1 = MakeLayer(block, 16, layers[0], n_exp=self.n_exp, mode=self.mode)
self.layer2 = MakeLayer(block, 32, layers[1], stride=2, n_exp=self.n_exp, mode=self.mode)
self.layer3 = MakeLayer(block, 64, layers[2], stride=2, n_exp=self.n_exp, mode=self.mode)
self.layer4 = MakeLayer(block, 128, layers[3], stride=2, n_exp=self.n_exp, mode=self.mode)
# 分类层
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(128 * block.expansion, num_classes)
# 参数初始化
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
def forward(self, x):
# 输入层
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
# 这里相比于imagenet的,少了一个maxpool,因为cifar10本身图片就小,如果再pool就太小了
# 残差层
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
# 分类层
x = self.avgpool(x) # 输出的尺寸为 B,C,1,1
x = x.view(x.size(0), -1)
x = self.fc(x)
out = F.softmax(x,dim = 1) # 这里不softmax也行 影响不大
return out
def quantize(self, num_bits=8):
self.qconvbnrelu1 = QConvBNReLU(self.conv1,self.bn1,qi=True,qo=True,num_bits=num_bits,n_exp=self.n_exp, mode=self.mode)
# 没有输入num_bits 需修改
self.layer1.quantize(num_bits=num_bits)
self.layer2.quantize(num_bits=num_bits)
self.layer3.quantize(num_bits=num_bits)
self.layer4.quantize(num_bits=num_bits)
self.qavgpool1 = QAdaptiveAvgPool2d(qi=False,qo=True,num_bits=num_bits,n_exp=self.n_exp, mode=self.mode)
self.qfc1 = QLinear(self.fc,qi=False,qo=True,num_bits=num_bits,n_exp=self.n_exp, mode=self.mode)
def quantize_forward(self, x):
# for _, layer in self.quantize_layers.items():
# x = layer(x)
# out = F.softmax(x, dim=1)
# return out
x = self.qconvbnrelu1(x)
x = self.layer1.quantize_forward(x)
x = self.layer2.quantize_forward(x)
x = self.layer3.quantize_forward(x)
x = self.layer4.quantize_forward(x)
x = self.qavgpool1(x)
x = x.view(x.size(0), -1)
x = self.qfc1(x)
out = F.softmax(x,dim = 1) # 这里不softmax也行 影响不大
return out
def freeze(self):
self.qconvbnrelu1.freeze() # 因为作为第一层是有qi的,所以freeze的时候无需再重新提供qi
qo = self.layer1.freeze(qinput = self.qconvbnrelu1.qo)
qo = self.layer2.freeze(qinput = qo)
qo = self.layer3.freeze(qinput = qo)
qo = self.layer4.freeze(qinput = qo)
self.qavgpool1.freeze(qo)
self.qfc1.freeze(qi = self.qavgpool1.qo)
def fakefreeze(self):
pass
def quantize_inference(self, x):
qx = self.qconvbnrelu1.qi.quantize_tensor(x,mode=self.mode)
qx = self.qconvbnrelu1.quantize_inference(qx)
qx = self.layer1.quantize_inference(qx)
qx = self.layer2.quantize_inference(qx)
qx = self.layer3.quantize_inference(qx)
qx = self.layer4.quantize_inference(qx)
qx = self.qavgpool1.quantize_inference(qx)
qx = qx.view(qx.size(0), -1)
qx = self.qfc1.quantize_inference(qx)
if self.mode == 1:
qx = self.qfc1.qo.dequantize_tensor(qx,mode=self.mode)
out = F.softmax(qx,dim = 1) # 这里不softmax也行 影响不大
return out
# BasicBlock 类
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None, n_exp=4, mode=1):
super(BasicBlock, self).__init__()
# 第一个卷积层
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride,
padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
# 第二个卷积层
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1,
padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
# shortcut
self.relu = nn.ReLU()
self.downsample = downsample
self.stride = stride
self.mode = mode
self.n_exp = n_exp
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
identity = self.downsample(identity)
out += identity
out = self.relu(out)
return out
def quantize(self, num_bits=8):
self.qconvbnrelu1 = QConvBNReLU(self.conv1,self.bn1,qi=False,qo=True,num_bits=num_bits,n_exp=self.n_exp,mode=self.mode)
self.qconvbn1 = QConvBN(self.conv2,self.bn2,qi=False,qo=True,num_bits=num_bits,n_exp=self.n_exp,mode=self.mode)
if self.downsample is not None:
self.qconvbn2 = QConvBN(self.downsample[0],self.downsample[1],qi=False,qo=True,num_bits=num_bits,n_exp=self.n_exp,mode=self.mode)
self.qrelu1 = QReLU(qi=True,num_bits=num_bits,n_exp=self.n_exp,mode=self.mode) # 需要qi
def quantize_forward(self, x):
identity = x
out = self.qconvbnrelu1(x)
out = self.qconvbn1(out)
if self.downsample is not None:
identity = self.qconvbn2(identity)
# residual add
out = identity + out # 这里是需要写一个elementwiseadd的变换的,待后续修改
out = self.qrelu1(out)
return out
def freeze(self, qinput):
# 这里的qconvbnrelu1其实是可以用前一层的qo的,但感觉不太好传参,就没用
# 还需仔细检查
self.qconvbnrelu1.freeze(qi= qinput) # 需要接前一个module的最后一个qo
self.qconvbn1.freeze(qi = self.qconvbnrelu1.qo)
if self.downsample is not None:
self.qconvbn2.freeze(qi = self.qconvbn1.qo)
self.qrelu1.freeze()
return self.qrelu1.qi # 输入是两路相加的,无法直接用其qo,relu后的qo可用relu统计的qi
else:
self.qrelu1.freeze()
return self.qrelu1.qi # 输入是两路相加的,无法直接用其qo,relu后的qo可用relu统计的qi
def quantize_inference(self, x):
# 感觉是不需要进行初始的quantize_tensor和dequantize_tensor,因为他不是最前/后一层,只要中间的每层都在量化后的领域内,就不需要这种处理。
identity = x
out = self.qconvbnrelu1.quantize_inference(x)
out = self.qconvbn1.quantize_inference(out)
if self.downsample is not None:
identity = self.qconvbn2.quantize_inference(identity)
out = identity + out # 这里是需要写一个elementwiseadd的变换的,待后续修改
out = self.qrelu1.quantize_inference(out)
return out
# Bottleneck 类
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
# 1x1 卷积层
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
# 3x3 卷积层
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
# 1x1 卷积层
self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
bias=False)
self.bn3 = nn.BatchNorm2d(planes * self.expansion)
# shortcut
self.relu = nn.ReLU()
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity # 相加是在这里处理的
out = self.relu(out)
return out
class MakeLayer(nn.Module):
def __init__(self, block, planes, blocks, stride=1, n_exp=4, mode=1):
super(MakeLayer, self).__init__()
# print('makelayer init:'+ str(GlobalVariables.SELF_INPLANES))
self.downsample = None
if stride != 1 or GlobalVariables.SELF_INPLANES != planes * block.expansion:
self.downsample = nn.Sequential(
nn.Conv2d(GlobalVariables.SELF_INPLANES, planes * block.expansion,kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion)
)
self.n_exp = n_exp
self.mode = mode
self.blockdict = nn.ModuleDict()
self.blockdict['block1'] = block(inplanes=GlobalVariables.SELF_INPLANES, planes=planes, stride=stride, downsample=self.downsample,n_exp=self.n_exp,mode=self.mode)
GlobalVariables.SELF_INPLANES = planes * block.expansion
for i in range(1, blocks): # block的个数 这里只能用字典了
self.blockdict['block' + str(i+1)] = block(inplanes=GlobalVariables.SELF_INPLANES, planes=planes,n_exp=self.n_exp, mode=self.mode) # 此处进行实例化了
# def _make_layer(self, block, planes, blocks, stride=1):
# downsample = None
# # stride 是卷积层的步幅,而 self.inplanes 表示当前残差块输入的通道数,
# # planes * block.expansion 则表示当前残差块输出的通道数。因此,当 stride 不等于 1 或者 self.inplanes 不等于 planes * block.expansion 时,就需要进行下采样操作
# #该层中除了第一个残差块之外,其他所有残差块的输入通道数和输出通道数都相等,并且具有相同的步幅(都为 1 或者 2)。这些卷积层的输入张量大小不变, 输出张量高宽尺寸会随着残差块的堆叠而逐渐降低
# if stride != 1 or SELF_INPLANES != planes * block.expansion:
# downsample = nn.Sequential(
# nn.Conv2d(SELF_INPLANES, planes * block.expansion,
# kernel_size=1, stride=stride, bias=False),
# nn.BatchNorm2d(planes * block.expansion),
# )
# layers = []
# layers.append(block(SELF_INPLANES, planes, stride, downsample))
# SELF_INPLANES = planes * block.expansion
# for _ in range(1, blocks): # block的个数
# layers.append(block(SELF_INPLANES, planes))
# return nn.Sequential(*layers)
def forward(self,x):
for _, layer in self.blockdict.items():
x = layer(x)
return x
def quantize(self, num_bits=8):
# 需检查
for _, layer in self.blockdict.items():
layer.quantize(num_bits=num_bits) # 这里是因为每一块都是block,而block中有具体的quantize策略, n_exp和mode已经在__init__中赋值了
def quantize_forward(self, x):
for _, layer in self.blockdict.items():
x = layer.quantize_forward(x) # 各个block中有具体的quantize_forward
return x
def freeze(self, qinput): # 需要在 Module Resnet的freeze里传出来
# 这里的qconvbnrelu1其实是可以用前一层的qo的,但感觉不太好传参,就没用
# 还需仔细检查
cnt = 0
for _, layer in self.blockdict.items():
if cnt == 0:
qo = layer.freeze(qinput = qinput)
cnt = 1
else:
qo = layer.freeze(qinput = qo) # 各个block中有具体的freeze
return qo # 供后续的层用
def quantize_inference(self, x):
# 感觉是不需要进行初始的quantize_tensor和dequantize_tensor,因为他不是最前/后一层,只要中间的每层都在量化后的领域内,就不需要这种处理。
for _, layer in self.blockdict.items():
x = layer.quantize_inference(x) # 每个block中有具体的quantize_inference
return x
# 使用 ResNet18 模型
def resnet18(**kwargs):
model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
return model
# 使用 ResNet50 模型
def resnet50(**kwargs):
model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
return model
# 使用 ResNet152 模型
def resnet152(**kwargs):
model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
return model
# -*- coding: utf-8 -*-
from model import *
from get_weight import *
import torch
import torch.nn as nn
import torchvision
import torch.optim as optim
from torchvision import datasets, transforms
import os
import os.path as osp
import sys
import time
# import matplotlib.pyplot as plt
# import matplotlib
from torchvision.datasets import ImageFolder
from torch.utils.tensorboard import SummaryWriter
from absl import app, flags
# from easydict import EasyDict
# from cleverhans.torch.attacks.fast_gradient_method import fast_gradient_method
# from cleverhans.torch.attacks.projected_gradient_descent import (
# projected_gradient_descent,
# )
def train(model, device, train_loader, optimizer, epoch):
model.train()
lossLayer = torch.nn.CrossEntropyLoss()
flag = 0
cnt = 0
for batch_idx, (data, target) in enumerate(train_loader):
cnt = cnt + 1
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = lossLayer(output, target)
loss.backward()
histo, grads = (get_model_histogram(model))
if flag == 0:
flag = 1
grads_sum = grads
else:
for k,v in grads_sum.items():
grads_sum[k] += grads[k]
optimizer.step()
if batch_idx % 50 == 0:
print('Train Epoch: {} [{}/{}]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset), loss.item()
))
for k, v in grads_sum.items():
grads_sum[k] = v / len(train_loader.dataset)
return grads_sum
def test(model, device, test_loader):
model.eval()
test_loss = 0
correct = 0
acc=0
lossLayer = torch.nn.CrossEntropyLoss(reduction='sum')
# report = EasyDict(nb_test=0, correct=0, correct_fgm=0, correct_pgd=0)
with torch.no_grad:
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
# x_fgm = fast_gradient_method(model, data, 0.01, np.inf)
# x_pgd = projected_gradient_descent(model, data, 0.01, 0.01, 40, np.inf)
# model prediction on clean examples
# _, y_pred = model(data).max(1)
# # model prediction on FGM adversarial examples
# _, y_pred_fgm = model(x_fgm).max(1)
#
# # model prediction on PGD adversarial examples
# _, y_pred_pgd = model(x_pgd).max(1)
# report.nb_test += target.size(0)
# report.correct += y_pred.eq(target).sum().item()
# report.correct_fgm += y_pred_fgm.eq(target).sum().item()
# report.correct_pgd += y_pred_pgd.eq(target).sum().item()
test_loss += lossLayer(output, target).item()
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
acc=100. * correct / len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {:.0f}%\n'.format(
test_loss, acc
))
# print(
# "test acc on clean examples (%): {:.3f}".format(
# report.correct / report.nb_test * 100.0
# )
# )
# print(
# "test acc on FGM adversarial examples (%): {:.3f}".format(
# report.correct_fgm / report.nb_test * 100.0
# )
# )
# print(
# "test acc on PGD adversarial examples (%): {:.3f}".format(
# report.correct_pgd / report.nb_test * 100.0
# )
# )
return acc
batch_size = 32
test_batch_size = 32
seed = 1
# epochs = 15
d1 = sys.argv[1]
epochs = int(d1)
lr = 0.001
momentum = 0.5
save_model = False
using_bn = True
net = 'LeNet'
torch.manual_seed(seed)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
train_loader = torch.utils.data.DataLoader(
datasets.CIFAR10('data', train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])),
batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=True
)
test_loader = torch.utils.data.DataLoader(
datasets.CIFAR10('data', train=False, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])),
batch_size=test_batch_size, shuffle=True, num_workers=1, pin_memory=True
)
#if using_bn:
if (net == 'VGG19') == True:
model = VGG_19().to(device)
elif (net == 'LeNet') == True:
model = LeNet().to(device)
# else:
# model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
writer = SummaryWriter(log_dir='./fullprecision_log')
#optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.9,0.999),eps=1e-08,weight_decay=0,amsgrad=False)
for epoch in range(1, epochs + 1):
grads_sum = train(model, device, train_loader, optimizer, epoch)
acc = test(model, device, test_loader)
print('epoch:', epoch)
checkpoint = {
'model': model.state_dict(),
'optimizer': optimizer.state_dict(),
'epoch': epoch,
'grads': grads_sum,
'accuracy':acc
}
# for name, param in model.named_parameters():
# writer.add_histogram(tag=name + '_grad', values=param.grad, global_step=epoch)
# writer.add_histogram(tag=name + '_data', values=param.data, global_step=epoch)
for name, param in grads_sum.items():
# 此处的grad是累加值吧 不是平均值
writer.add_histogram(tag=name + '_grad', values=param, global_step=epoch)
# 取这个epoch最后一个batch算完之后的weight
for name, param in model.named_parameters():
writer.add_histogram(tag=name + '_data', values=param.data, global_step=epoch)
if (net == 'LeNet') == True:
torch.save(checkpoint, 'checkpoint/cifar-10_lenet_bn/full/ckpt_cifar-10_lenet_bn_%s.pth' % (str(epoch)))
#保存参数
# if (net == 'VGG19') == True:
# torch.save(checkpoint, 'checkpoint/cifar-10_vgg19_bn/ckpt_cifar-10_vgg19_bn_%s.pth' % (str(epoch)))
# elif (net == 'LeNet') == True:
# torch.save(checkpoint, 'checkpoint/cifar-10_lenet_bn/ckpt_cifar-10_lenet_bn_%s.pth' % (str(epoch)))
#print('Saved all parameters!\n')
if save_model:
if not osp.exists('ckpt'):
os.makedirs('ckpt')
#if using_bn:
if (net == 'VGG19') == True:
torch.save(model.state_dict(), 'ckpt/cifar-10_vgg19_bn.pt')
elif (net == 'LeNet') == True:
torch.save(model.state_dict(), 'ckpt/cifar-10_lenet_bn.pt')
# else:
# torch.save(model.state_dict(), 'ckpt/cifar-10_vgg19.pt')
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment