Commit bc4d19f0 by Zhihong Ma
parents 6183244f 3e850e16
# AlexNet_BN 量化说明
+ 结构和AlexNet基本一致,在训练中的每个conv后加了bn层,量化时将conv、bn、relu整合为QConvBNReLu层。
## ptq部分
+ 量化结果:
FP32-acc:87.09
![image-20230409143615651](image/image-20230409143615651.png)
+ 数据拟合:
matlab导入数据,选择列向量
+ 加入FP3-FP7前:
+ js_flops - acc_loss
Rational: Numerator degree 2 / Denominator degree 2
- [ ] center and scale
![image-20230409133159255](image/image-20230409133159255.png)
- [x] center and scale
![image-20230409133216797](image/image-20230409133216797.png)
+ js_param - acc_loss
Rational: Numerator degree 2 / Denominator degree 2
- [ ] center and scale
![image-20230409133305267](image/image-20230409133305267.png)
- [x] center and scale
![image-20230409133331030](image/image-20230409133331030.png)
+ 加入FP3-FP7后
+ js_flops - acc_loss
Rational: Numerator degree 2 / Denominator degree 2
- [ ] center and scale
![image-20230409133616671](image/image-20230409133616671.png)
- [x] center and scale
![image-20230409143802907](image/image-20230409143802907.png)
+ js_param - acc_loss
Rational: Numerator degree 2 / Denominator degree 2
+ [ ] center and scale
![image-20230409143907871](image/image-20230409143907871.png)
+ [x] center and scale
![image-20230409143845569](image/image-20230409143845569.png)
\ No newline at end of file
import sys
import os
# 从get_param.py输出重定向文件val.txt中提取参数量和计算量
def extract_ratio():
fr = open('param_flops.txt','r')
lines = fr.readlines()
layer = []
par_ratio = []
flop_ratio = []
for line in lines:
if '(' in line and ')' in line:
layer.append(line.split(')')[0].split('(')[1])
r1 = line.split('%')[0].split(',')[-1]
r1 = float(r1)
par_ratio.append(r1)
r2 = line.split('%')[-2].split(',')[-1]
r2 = float(r2)
flop_ratio.append(r2)
return layer, par_ratio, flop_ratio
if __name__ == "__main__":
layer, par_ratio, flop_ratio = extract_ratio()
print(layer)
print(par_ratio)
print(flop_ratio)
\ No newline at end of file
from torch.autograd import Function
class FakeQuantize(Function):
@staticmethod
def forward(ctx, x, qparam):
x = qparam.quantize_tensor(x)
x = qparam.dequantize_tensor(x)
return x
@staticmethod
def backward(ctx, grad_output):
return grad_output, None
\ No newline at end of file
from model import *
import torch
from ptflops import get_model_complexity_info
if __name__ == "__main__":
model = AlexNet_BN()
full_file = 'ckpt/cifar10_AlexNet_BN.pt'
model.load_state_dict(torch.load(full_file))
flops, params = get_model_complexity_info(model, (3, 32, 32), as_strings=True, print_per_layer_stat=True)
# -*- coding: utf-8 -*-
# 用于多个module之间共享全局变量
def _init(): # 初始化
global _global_dict
_global_dict = {}
def set_value(value,is_bias=False):
# 定义一个全局变量
if is_bias:
_global_dict[0] = value
else:
_global_dict[1] = value
def get_value(is_bias=False): # 给bias独立于各变量外的精度
if is_bias:
return _global_dict[0]
else:
return _global_dict[1]
import torch
import torch.nn as nn
import torch.nn.functional as F
from module import *
import module
class AlexNet_BN(nn.Module):
def __init__(self, num_channels=3, num_classes=10):
super(AlexNet_BN, self).__init__()
# original size 32x32
self.conv1 = nn.Conv2d(num_channels, 32, kernel_size=3, padding=1, bias=True)
self.bn1 = nn.BatchNorm2d(32)
self.relu1 = nn.ReLU(inplace=True)
self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2) # output[48, 27, 27]
self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1, bias=True) # output[128, 27, 27]
self.bn2 = nn.BatchNorm2d(64)
self.relu2 = nn.ReLU(inplace=True)
self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2) # output[128, 13, 13]
self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1, bias=True) # output[192, 13, 13]
self.bn3 = nn.BatchNorm2d(128)
self.relu3 = nn.ReLU(inplace=True)
self.conv4 = nn.Conv2d(128, 256, kernel_size=3, padding=1, bias=True) # output[192, 13, 13]
self.bn4 = nn.BatchNorm2d(256)
self.relu4 = nn.ReLU(inplace=True)
self.conv5 = nn.Conv2d(256, 256, kernel_size=3, padding=1, bias=True) # output[128, 13, 13]
self.bn5 = nn.BatchNorm2d(256)
self.relu5 = nn.ReLU(inplace=True)
self.pool5 = nn.MaxPool2d(kernel_size=3, stride=2)
self.drop1 = nn.Dropout(p=0.5)
self.fc1 = nn.Linear(256 * 3 * 3, 1024, bias=True)
self.relu6 = nn.ReLU(inplace=True)
self.drop2 = nn.Dropout(p=0.5)
self.fc2 = nn.Linear(1024, 512, bias=True)
self.relu7 = nn.ReLU(inplace=True)
self.fc3 = nn.Linear(512, num_classes, bias=True)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu1(x)
x = self.pool1(x)
x = self.conv2(x)
x = self.bn2(x)
x = self.relu2(x)
x = self.pool2(x)
x = self.conv3(x)
x = self.bn3(x)
x = self.relu3(x)
x = self.conv4(x)
x = self.bn4(x)
x = self.relu4(x)
x = self.conv5(x)
x = self.bn5(x)
x = self.relu5(x)
x = self.pool5(x)
x = torch.flatten(x, start_dim=1)
x = self.drop1(x)
x = self.fc1(x)
x = self.relu6(x)
x = self.drop2(x)
x = self.fc2(x)
x = self.relu7(x)
x = self.fc3(x)
return x
def quantize(self, quant_type, num_bits=8, e_bits=3):
# e_bits仅当使用FLOAT量化时用到
self.qconv1 = QConvBNReLU(quant_type, self.conv1, self.bn1, qi=True, qo=True, num_bits=num_bits, e_bits=e_bits)
self.qpool1 = QMaxPooling2d(quant_type, kernel_size=2, stride=2, padding=0, num_bits=num_bits, e_bits=e_bits)
self.qconv2 = QConvBNReLU(quant_type, self.conv2, self.bn2, qi=False, qo=True, num_bits=num_bits, e_bits=e_bits)
self.qpool2 = QMaxPooling2d(quant_type, kernel_size=2, stride=2, padding=0, num_bits=num_bits, e_bits=e_bits)
self.qconv3 = QConvBNReLU(quant_type, self.conv3, self.bn3, qi=False, qo=True, num_bits=num_bits, e_bits=e_bits)
self.qconv4 = QConvBNReLU(quant_type, self.conv4, self.bn4, qi=False, qo=True, num_bits=num_bits, e_bits=e_bits)
self.qconv5 = QConvBNReLU(quant_type, self.conv5, self.bn5, qi=False, qo=True, num_bits=num_bits, e_bits=e_bits)
self.qpool5 = QMaxPooling2d(quant_type, kernel_size=3, stride=2, padding=0, num_bits=num_bits, e_bits=e_bits)
self.qfc1 = QLinear(quant_type, self.fc1, qi=False, qo=True, num_bits=num_bits, e_bits=e_bits)
self.qrelu6 = QReLU(quant_type, num_bits=num_bits, e_bits=e_bits)
self.qfc2 = QLinear(quant_type, self.fc2, qi=False, qo=True, num_bits=num_bits, e_bits=e_bits)
self.qrelu7 = QReLU(quant_type, num_bits=num_bits, e_bits=e_bits)
self.qfc3 = QLinear(quant_type, self.fc3, qi=False, qo=True, num_bits=num_bits, e_bits=e_bits)
def quantize_forward(self, x):
x = self.qconv1(x)
x = self.qpool1(x)
x = self.qconv2(x)
x = self.qpool2(x)
x = self.qconv3(x)
x = self.qconv4(x)
x = self.qconv5(x)
x = self.qpool5(x)
x = torch.flatten(x, start_dim=1)
x = self.drop1(x)
x = self.qfc1(x)
x = self.qrelu6(x)
x = self.drop2(x)
x = self.qfc2(x)
x = self.qrelu7(x)
x = self.qfc3(x)
return x
def freeze(self):
self.qconv1.freeze()
self.qpool1.freeze(self.qconv1.qo)
self.qconv2.freeze(self.qconv1.qo)
self.qpool2.freeze(self.qconv2.qo)
self.qconv3.freeze(self.qconv2.qo)
self.qconv4.freeze(self.qconv3.qo)
self.qconv5.freeze(self.qconv4.qo)
self.qpool5.freeze(self.qconv5.qo)
self.qfc1.freeze(self.qconv5.qo)
self.qrelu6.freeze(self.qfc1.qo)
self.qfc2.freeze(self.qfc1.qo)
self.qrelu7.freeze(self.qfc2.qo)
self.qfc3.freeze(self.qfc2.qo)
def quantize_inference(self, x):
x = self.qconv1.qi.quantize_tensor(x)
x = self.qconv1.quantize_inference(x)
x = self.qpool1.quantize_inference(x)
x = self.qconv2.quantize_inference(x)
x = self.qpool2.quantize_inference(x)
x = self.qconv3.quantize_inference(x)
x = self.qconv4.quantize_inference(x)
x = self.qconv5.quantize_inference(x)
x = self.qpool5.quantize_inference(x)
x = torch.flatten(x, start_dim=1)
x = self.qfc1.quantize_inference(x)
x = self.qrelu6.quantize_inference(x)
x = self.qfc2.quantize_inference(x)
x = self.qrelu7.quantize_inference(x)
x = self.qfc3.quantize_inference(x)
x = self.qfc3.qo.dequantize_tensor(x)
return x
import math
import numpy as np
import gol
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from function import FakeQuantize
# 获取最近的量化值
def get_nearest_val(quant_type,x,is_bias=False):
if quant_type=='INT':
return x.round_()
plist = gol.get_value(is_bias)
# print('get')
# print(plist)
shape = x.shape
xhard = x.view(-1)
plist = plist.type_as(x)
# 取最近幂次作为索引
idx = (xhard.unsqueeze(0) - plist.unsqueeze(1)).abs().min(dim=0)[1]
xhard = plist[idx].view(shape)
xout = (xhard - x).detach() + x
return xout
# 采用对称有符号量化时,获取量化范围最大值
def get_qmax(quant_type,num_bits=None, e_bits=None):
if quant_type == 'INT':
qmax = 2. ** (num_bits - 1) - 1
elif quant_type == 'POT':
qmax = 1
else: #FLOAT
m_bits = num_bits - 1 - e_bits
dist_m = 2 ** (-m_bits)
e = 2 ** (e_bits - 1)
expo = 2 ** e
m = 2 ** m_bits -1
frac = 1. + m * dist_m
qmax = frac * expo
return qmax
# 都采用有符号量化,zeropoint都置为0
def calcScaleZeroPoint(min_val, max_val, qmax):
scale = torch.max(max_val.abs(),min_val.abs()) / qmax
zero_point = torch.tensor(0.)
return scale, zero_point
# 将输入进行量化,输入输出都为tensor
def quantize_tensor(quant_type, x, scale, zero_point, qmax, is_bias=False):
# 量化后范围,直接根据位宽确定
qmin = -qmax
q_x = zero_point + x / scale
q_x.clamp_(qmin, qmax)
q_x = get_nearest_val(quant_type, q_x, is_bias)
return q_x
# bias使用不同精度,需要根据量化类型指定num_bits/e_bits
def bias_qmax(quant_type):
if quant_type == 'INT':
return get_qmax(quant_type, 64)
elif quant_type == 'POT':
return get_qmax(quant_type)
else:
return get_qmax(quant_type, 16, 5)
# 转化为FP32,不需再做限制
def dequantize_tensor(q_x, scale, zero_point):
return scale * (q_x - zero_point)
class QParam(nn.Module):
def __init__(self,quant_type, num_bits=8, e_bits=3):
super(QParam, self).__init__()
self.quant_type = quant_type
self.num_bits = num_bits
self.e_bits = e_bits
self.qmax = get_qmax(quant_type, num_bits, e_bits)
scale = torch.tensor([], requires_grad=False)
zero_point = torch.tensor([], requires_grad=False)
min = torch.tensor([], requires_grad=False)
max = torch.tensor([], requires_grad=False)
# 通过注册为register,使得buffer可以被记录到state_dict
self.register_buffer('scale', scale)
self.register_buffer('zero_point', zero_point)
self.register_buffer('min', min)
self.register_buffer('max', max)
# 更新统计范围及量化参数
def update(self, tensor):
if self.max.nelement() == 0 or self.max.data < tensor.max().data:
self.max.data = tensor.max().data
self.max.clamp_(min=0)
if self.min.nelement() == 0 or self.min.data > tensor.min().data:
self.min.data = tensor.min().data
self.min.clamp_(max=0)
self.scale, self.zero_point = calcScaleZeroPoint(self.min, self.max, self.qmax)
def quantize_tensor(self, tensor):
return quantize_tensor(self.quant_type, tensor, self.scale, self.zero_point, self.qmax)
def dequantize_tensor(self, q_x):
return dequantize_tensor(q_x, self.scale, self.zero_point)
# 该方法保证了可以从state_dict里恢复
def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys,
error_msgs):
key_names = ['scale', 'zero_point', 'min', 'max']
for key in key_names:
value = getattr(self, key)
value.data = state_dict[prefix + key].data
state_dict.pop(prefix + key)
# 该方法返回值将是打印该对象的结果
def __str__(self):
info = 'scale: %.10f ' % self.scale
info += 'zp: %.6f ' % self.zero_point
info += 'min: %.6f ' % self.min
info += 'max: %.6f' % self.max
return info
# 作为具体量化层的父类,qi和qo分别为量化输入/输出
class QModule(nn.Module):
def __init__(self,quant_type, qi=True, qo=True, num_bits=8, e_bits=3):
super(QModule, self).__init__()
if qi:
self.qi = QParam(quant_type,num_bits, e_bits)
if qo:
self.qo = QParam(quant_type,num_bits, e_bits)
self.quant_type = quant_type
self.num_bits = num_bits
self.e_bits = e_bits
self.bias_qmax = bias_qmax(quant_type)
def freeze(self):
pass # 空语句
def quantize_inference(self, x):
raise NotImplementedError('quantize_inference should be implemented.')
"""
QModule 量化卷积
:quant_type: 量化类型
:conv_module: 卷积模块
:qi: 是否量化输入特征图
:qo: 是否量化输出特征图
:num_bits: 8位bit数
"""
class QConv2d(QModule):
def __init__(self, quant_type, conv_module, qi=True, qo=True, num_bits=8, e_bits=3):
super(QConv2d, self).__init__(quant_type, qi, qo, num_bits, e_bits)
self.conv_module = conv_module
self.qw = QParam(quant_type, num_bits,e_bits)
self.register_buffer('M', torch.tensor([], requires_grad=False)) # 将M注册为buffer
# freeze方法可以固定真量化的权重参数,并将该值更新到原全精度层上,便于散度计算
def freeze(self, qi=None, qo=None):
if hasattr(self, 'qi') and qi is not None:
raise ValueError('qi has been provided in init function.')
if not hasattr(self, 'qi') and qi is None:
raise ValueError('qi is not existed, should be provided.')
if hasattr(self, 'qo') and qo is not None:
raise ValueError('qo has been provided in init function.')
if not hasattr(self, 'qo') and qo is None:
raise ValueError('qo is not existed, should be provided.')
# 这里因为在池化或者激活的输入,不需要对最大值和最小是进行额外的统计,会共享相同的输出
if qi is not None:
self.qi = qi
if qo is not None:
self.qo = qo
# 根据https://zhuanlan.zhihu.com/p/156835141, 这是式3 的系数
self.M.data = (self.qw.scale * self.qi.scale / self.qo.scale).data
self.conv_module.weight.data = self.qw.quantize_tensor(self.conv_module.weight.data)
self.conv_module.weight.data = self.conv_module.weight.data - self.qw.zero_point
self.conv_module.bias.data = quantize_tensor(self.quant_type,
self.conv_module.bias.data, scale=self.qi.scale * self.qw.scale,
zero_point=0.,qmax=self.bias_qmax, is_bias=True)
def forward(self, x): # 前向传播,输入张量,x为浮点型数据
if hasattr(self, 'qi'):
self.qi.update(x)
x = FakeQuantize.apply(x, self.qi) # 对输入张量X完成量化
# foward前更新qw,保证量化weight时候scale正确
self.qw.update(self.conv_module.weight.data)
# 注意:此处主要为了统计各层x和weight范围,未对bias进行量化操作
tmp_wgt = FakeQuantize.apply(self.conv_module.weight, self.qw)
x = F.conv2d(x, tmp_wgt, self.conv_module.bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding, dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
if hasattr(self, 'qo'):
self.qo.update(x)
x = FakeQuantize.apply(x, self.qo)
return x
# 利用公式 q_a = M(\sigma(q_w-Z_w)(q_x-Z_x) + q_b)
def quantize_inference(self, x): # 此处input为已经量化的qx
x = x - self.qi.zero_point
x = self.conv_module(x)
x = self.M * x
x = get_nearest_val(self.quant_type,x)
x = x + self.qo.zero_point
return x
class QLinear(QModule):
def __init__(self, quant_type, fc_module, qi=True, qo=True, num_bits=8, e_bits=3):
super(QLinear, self).__init__(quant_type, qi, qo, num_bits, e_bits)
self.fc_module = fc_module
self.qw = QParam(quant_type, num_bits, e_bits)
self.register_buffer('M', torch.tensor([], requires_grad=False)) # 将M注册为buffer
def freeze(self, qi=None, qo=None):
if hasattr(self, 'qi') and qi is not None:
raise ValueError('qi has been provided in init function.')
if not hasattr(self, 'qi') and qi is None:
raise ValueError('qi is not existed, should be provided.')
if hasattr(self, 'qo') and qo is not None:
raise ValueError('qo has been provided in init function.')
if not hasattr(self, 'qo') and qo is None:
raise ValueError('qo is not existed, should be provided.')
if qi is not None:
self.qi = qi
if qo is not None:
self.qo = qo
self.M.data = (self.qw.scale * self.qi.scale / self.qo.scale).data
self.fc_module.weight.data = self.qw.quantize_tensor(self.fc_module.weight.data)
self.fc_module.weight.data = self.fc_module.weight.data - self.qw.zero_point
self.fc_module.bias.data = quantize_tensor(self.quant_type,
self.fc_module.bias.data, scale=self.qi.scale * self.qw.scale,
zero_point=0., qmax=self.bias_qmax, is_bias=True)
def forward(self, x):
if hasattr(self, 'qi'):
self.qi.update(x)
x = FakeQuantize.apply(x, self.qi)
self.qw.update(self.fc_module.weight.data)
tmp_wgt = FakeQuantize.apply(self.fc_module.weight, self.qw)
x = F.linear(x, tmp_wgt, self.fc_module.bias)
if hasattr(self, 'qo'):
self.qo.update(x)
x = FakeQuantize.apply(x, self.qo)
return x
def quantize_inference(self, x):
x = x - self.qi.zero_point
x = self.fc_module(x)
x = self.M * x
x = get_nearest_val(self.quant_type,x)
x = x + self.qo.zero_point
return x
class QReLU(QModule):
def __init__(self,quant_type, qi=False, qo=True, num_bits=8, e_bits=3):
super(QReLU, self).__init__(quant_type, qi, qo, num_bits, e_bits)
def freeze(self, qi=None):
if hasattr(self, 'qi') and qi is not None:
raise ValueError('qi has been provided in init function.')
if not hasattr(self, 'qi') and qi is None:
raise ValueError('qi is not existed, should be provided.')
if qi is not None:
self.qi = qi
def forward(self, x):
if hasattr(self, 'qi'):
self.qi.update(x)
x = FakeQuantize.apply(x, self.qi)
x = F.relu(x)
return x
def quantize_inference(self, x):
x = x.clone()
x[x < self.qi.zero_point] = self.qi.zero_point
return x
class QMaxPooling2d(QModule):
def __init__(self, quant_type, kernel_size=3, stride=1, padding=0, qi=False, qo=True, num_bits=8,e_bits=3):
super(QMaxPooling2d, self).__init__(quant_type, qi, qo, num_bits, e_bits)
self.kernel_size = kernel_size
self.stride = stride
self.padding = padding
def freeze(self, qi=None):
if hasattr(self, 'qi') and qi is not None:
raise ValueError('qi has been provided in init function.')
if not hasattr(self, 'qi') and qi is None:
raise ValueError('qi is not existed, should be provided.')
if qi is not None:
self.qi = qi
def forward(self, x):
if hasattr(self, 'qi'):
self.qi.update(x)
x = FakeQuantize.apply(x, self.qi)
x = F.max_pool2d(x, self.kernel_size, self.stride, self.padding)
return x
def quantize_inference(self, x):
return F.max_pool2d(x, self.kernel_size, self.stride, self.padding)
class QConvBNReLU(QModule):
def __init__(self, quant_type, conv_module, bn_module, qi=True, qo=True, num_bits=8, e_bits=3):
super(QConvBNReLU, self).__init__(quant_type, qi, qo, num_bits, e_bits)
self.conv_module = conv_module
self.bn_module = bn_module
self.qw = QParam(quant_type, num_bits,e_bits)
self.register_buffer('M', torch.tensor([], requires_grad=False)) # 将M注册为buffer
def fold_bn(self, mean, std):
if self.bn_module.affine:
gamma_ = self.bn_module.weight / std
weight = self.conv_module.weight * gamma_.view(self.conv_module.out_channels, 1, 1, 1)
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean + self.bn_module.bias
else:
bias = self.bn_module.bias - gamma_ * mean
else:
gamma_ = 1 / std
weight = self.conv_module.weight * gamma_
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean
else:
bias = -gamma_ * mean
return weight, bias
def freeze(self, qi=None, qo=None):
if hasattr(self, 'qi') and qi is not None:
raise ValueError('qi has been provided in init function.')
if not hasattr(self, 'qi') and qi is None:
raise ValueError('qi is not existed, should be provided.')
if hasattr(self, 'qo') and qo is not None:
raise ValueError('qo has been provided in init function.')
if not hasattr(self, 'qo') and qo is None:
raise ValueError('qo is not existed, should be provided.')
if qi is not None:
self.qi = qi
if qo is not None:
self.qo = qo
self.M.data = (self.qw.scale * self.qi.scale / self.qo.scale).data
std = torch.sqrt(self.bn_module.running_var + self.bn_module.eps)
weight, bias = self.fold_bn(self.bn_module.running_mean, std)
self.conv_module.weight.data = self.qw.quantize_tensor(weight.data)
self.conv_module.weight.data = self.conv_module.weight.data - self.qw.zero_point
self.conv_module.bias.data = quantize_tensor(self.quant_type,
bias, scale=self.qi.scale * self.qw.scale,
zero_point=0., qmax=self.bias_qmax,is_bias=True)
def forward(self, x):
if hasattr(self, 'qi'):
self.qi.update(x)
x = FakeQuantize.apply(x, self.qi)
if self.training:
y = F.conv2d(x, self.conv_module.weight, self.conv_module.bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding,
dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
y = y.permute(1, 0, 2, 3) # NCHW -> CNHW
y = y.contiguous().view(self.conv_module.out_channels, -1) # CNHW -> C,NHW
# mean = y.mean(1)
# var = y.var(1)
mean = y.mean(1).detach()
var = y.var(1).detach()
self.bn_module.running_mean = \
(1 - self.bn_module.momentum) * self.bn_module.running_mean + \
self.bn_module.momentum * mean
self.bn_module.running_var = \
(1 - self.bn_module.momentum) * self.bn_module.running_var + \
self.bn_module.momentum * var
else:
mean = Variable(self.bn_module.running_mean)
var = Variable(self.bn_module.running_var)
std = torch.sqrt(var + self.bn_module.eps)
weight, bias = self.fold_bn(mean, std)
self.qw.update(weight.data)
x = F.conv2d(x, FakeQuantize.apply(weight, self.qw), bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding, dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
x = F.relu(x)
if hasattr(self, 'qo'):
self.qo.update(x)
x = FakeQuantize.apply(x, self.qo)
return x
def quantize_inference(self, x):
x = x - self.qi.zero_point
x = self.conv_module(x)
x = self.M * x
x = get_nearest_val(self.quant_type,x)
x = x + self.qo.zero_point
x.clamp_(min=0)
return x
\ No newline at end of file
AlexNet_BN(
3.87 M, 100.000% Params, 70.26 MMac, 100.000% MACs,
(conv1): Conv2d(896, 0.023% Params, 917.5 KMac, 1.306% MACs, 3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn1): BatchNorm2d(64, 0.002% Params, 65.54 KMac, 0.093% MACs, 32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(0, 0.000% Params, 32.77 KMac, 0.047% MACs, inplace=True)
(pool1): MaxPool2d(0, 0.000% Params, 32.77 KMac, 0.047% MACs, kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(conv2): Conv2d(18.5 k, 0.478% Params, 4.73 MMac, 6.739% MACs, 32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.003% Params, 32.77 KMac, 0.047% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(0, 0.000% Params, 16.38 KMac, 0.023% MACs, inplace=True)
(pool2): MaxPool2d(0, 0.000% Params, 16.38 KMac, 0.023% MACs, kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(conv3): Conv2d(73.86 k, 1.908% Params, 4.73 MMac, 6.727% MACs, 64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn3): BatchNorm2d(256, 0.007% Params, 16.38 KMac, 0.023% MACs, 128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu3): ReLU(0, 0.000% Params, 8.19 KMac, 0.012% MACs, inplace=True)
(conv4): Conv2d(295.17 k, 7.627% Params, 18.89 MMac, 26.886% MACs, 128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn4): BatchNorm2d(512, 0.013% Params, 32.77 KMac, 0.047% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu4): ReLU(0, 0.000% Params, 16.38 KMac, 0.023% MACs, inplace=True)
(conv5): Conv2d(590.08 k, 15.247% Params, 37.77 MMac, 53.748% MACs, 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn5): BatchNorm2d(512, 0.013% Params, 32.77 KMac, 0.047% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu5): ReLU(0, 0.000% Params, 16.38 KMac, 0.023% MACs, inplace=True)
(pool5): MaxPool2d(0, 0.000% Params, 16.38 KMac, 0.023% MACs, kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
(drop1): Dropout(0, 0.000% Params, 0.0 Mac, 0.000% MACs, p=0.5, inplace=False)
(fc1): Linear(2.36 M, 60.987% Params, 2.36 MMac, 3.359% MACs, in_features=2304, out_features=1024, bias=True)
(relu6): ReLU(0, 0.000% Params, 1.02 KMac, 0.001% MACs, inplace=True)
(drop2): Dropout(0, 0.000% Params, 0.0 Mac, 0.000% MACs, p=0.5, inplace=False)
(fc2): Linear(524.8 k, 13.560% Params, 524.8 KMac, 0.747% MACs, in_features=1024, out_features=512, bias=True)
(relu7): ReLU(0, 0.000% Params, 512.0 Mac, 0.001% MACs, inplace=True)
(fc3): Linear(5.13 k, 0.133% Params, 5.13 KMac, 0.007% MACs, in_features=512, out_features=10, bias=True)
)
from torch.serialization import load
from model import *
from extract_ratio import *
from utils import *
import gol
import openpyxl
import sys
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torchvision.transforms.functional import InterpolationMode
import os
import os.path as osp
from torch.utils.tensorboard import SummaryWriter
def direct_quantize(model, test_loader,device):
for i, (data, target) in enumerate(test_loader, 1):
data = data.to(device)
output = model.quantize_forward(data).cpu()
if i % 500 == 0:
break
print('direct quantization finish')
def full_inference(model, test_loader, device):
correct = 0
for i, (data, target) in enumerate(test_loader, 1):
data = data.to(device)
output = model(data).cpu()
pred = output.argmax(dim=1, keepdim=True)
# print(pred)
correct += pred.eq(target.view_as(pred)).sum().item()
print('\nTest set: Full Model Accuracy: {:.2f}%'.format(100. * correct / len(test_loader.dataset)))
return 100. * correct / len(test_loader.dataset)
def quantize_inference(model, test_loader, device):
correct = 0
for i, (data, target) in enumerate(test_loader, 1):
data = data.to(device)
output = model.quantize_inference(data).cpu()
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
print('Test set: Quant Model Accuracy: {:.2f}%'.format(100. * correct / len(test_loader.dataset)))
return 100. * correct / len(test_loader.dataset)
def js_div(p_output, q_output, get_softmax=True):
"""
Function that measures JS divergence between target and output logits:
"""
KLDivLoss = nn.KLDivLoss(reduction='sum')
if get_softmax:
p_output = F.softmax(p_output)
q_output = F.softmax(q_output)
log_mean_output = ((p_output + q_output)/2).log()
return (KLDivLoss(log_mean_output, p_output) + KLDivLoss(log_mean_output, q_output))/2
if __name__ == "__main__":
batch_size = 32
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
train_loader = torch.utils.data.DataLoader(
datasets.CIFAR10('data', train=True, download=True,
transform=transforms.Compose([
transforms.Resize((32, 32), interpolation=InterpolationMode.BICUBIC),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])),
batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=True
)
test_loader = torch.utils.data.DataLoader(
datasets.CIFAR10('data', train=False, transform=transforms.Compose([
transforms.Resize((32, 32), interpolation=InterpolationMode.BICUBIC),
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])),
batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=True
)
model = AlexNet_BN()
writer = SummaryWriter(log_dir='./log')
full_file = 'ckpt/cifar10_AlexNet_BN.pt'
model.load_state_dict(torch.load(full_file))
model.to(device)
load_ptq = True
ptq_file_prefix = 'ckpt/cifar10_AlexNet_BN_ptq_'
model.eval()
full_acc = full_inference(model, test_loader, device)
full_params = []
layer, par_ratio, flop_ratio = extract_ratio()
for name, param in model.named_parameters():
param_norm = F.normalize(param.data.cpu(),p=2,dim=-1)
full_params.append(param_norm)
writer.add_histogram(tag='Full_' + name + '_data', values=param.data)
gol._init()
quant_type_list = ['INT','POT','FLOAT']
title_list = []
js_flops_list = []
js_param_list = []
ptq_acc_list = []
acc_loss_list = []
for quant_type in quant_type_list:
num_bit_list = numbit_list(quant_type)
# 对一个量化类别,只需设置一次bias量化表
# int由于位宽大,使用量化表开销过大,直接_round即可
if quant_type != 'INT':
bias_list = build_bias_list(quant_type)
gol.set_value(bias_list, is_bias=True)
for num_bits in num_bit_list:
e_bit_list = ebit_list(quant_type,num_bits)
for e_bits in e_bit_list:
model_ptq = AlexNet_BN()
if quant_type == 'FLOAT':
title = '%s_%d_E%d' % (quant_type, num_bits, e_bits)
else:
title = '%s_%d' % (quant_type, num_bits)
print('\nPTQ: '+title)
title_list.append(title)
# 设置量化表
if quant_type != 'INT':
plist = build_list(quant_type, num_bits, e_bits)
gol.set_value(plist)
# 判断是否需要载入
if load_ptq is True and osp.exists(ptq_file_prefix + title + '.pt'):
model_ptq.quantize(quant_type,num_bits,e_bits)
model_ptq.load_state_dict(torch.load(ptq_file_prefix + title + '.pt'))
model_ptq.to(device)
print('Successfully load ptq model: ' + title)
else:
model_ptq.load_state_dict(torch.load(full_file))
model_ptq.to(device)
model_ptq.quantize(quant_type,num_bits,e_bits)
model_ptq.eval()
direct_quantize(model_ptq, train_loader, device)
torch.save(model_ptq.state_dict(), ptq_file_prefix + title + '.pt')
model_ptq.freeze()
ptq_acc = quantize_inference(model_ptq, test_loader, device)
ptq_acc_list.append(ptq_acc)
acc_loss = (full_acc - ptq_acc) / full_acc
acc_loss_list.append(acc_loss)
idx = -1
# 获取计算量/参数量下的js-div
js_flops = 0.
js_param = 0.
for name, param in model_ptq.named_parameters():
if '.' not in name:
continue
idx = idx + 1
prefix = name.split('.')[0]
if prefix in layer:
layer_idx = layer.index(prefix)
ptq_param = param.data.cpu()
# 取L2范数
ptq_norm = F.normalize(ptq_param,p=2,dim=-1)
writer.add_histogram(tag=title +':'+ name + '_data', values=ptq_param)
js = js_div(ptq_norm,full_params[idx])
js = js.item()
if js < 0.:
js = 0.
js_flops = js_flops + js * flop_ratio[layer_idx]
js_param = js_param + js * flop_ratio[layer_idx]
js_flops_list.append(js_flops)
js_param_list.append(js_param)
print(title + ': js_flops: %f js_param: %f acc_loss: %f' % (js_flops, js_param, acc_loss))
# 写入xlsx
workbook = openpyxl.Workbook()
worksheet = workbook.active
worksheet.cell(row=1,column=1,value='FP32-acc')
worksheet.cell(row=1,column=2,value=full_acc)
worksheet.cell(row=3,column=1,value='title')
worksheet.cell(row=3,column=2,value='js_flops')
worksheet.cell(row=3,column=3,value='js_param')
worksheet.cell(row=3,column=4,value='ptq_acc')
worksheet.cell(row=3,column=5,value='acc_loss')
for i in range(len(title_list)):
worksheet.cell(row=i+4, column=1, value=title_list[i])
worksheet.cell(row=i+4, column=2, value=js_flops_list[i])
worksheet.cell(row=i+4, column=3, value=js_param_list[i])
worksheet.cell(row=i+4, column=4, value=ptq_acc_list[i])
worksheet.cell(row=i+4, column=5, value=acc_loss_list[i])
workbook.save('ptq_result.xlsx')
writer.close()
ft = open('ptq_result.txt','w')
print('title_list:',file=ft)
print(" ".join(title_list),file=ft)
print('js_flops_list:',file=ft)
print(" ".join(str(i) for i in js_flops_list), file=ft)
print('js_param_list:',file=ft)
print(" ".join(str(i) for i in js_param_list), file=ft)
print('ptq_acc_list:',file=ft)
print(" ".join(str(i) for i in ptq_acc_list), file=ft)
print('acc_loss_list:',file=ft)
print(" ".join(str(i) for i in acc_loss_list), file=ft)
ft.close()
title_list:
INT_2 INT_3 INT_4 INT_5 INT_6 INT_7 INT_8 INT_9 INT_10 INT_11 INT_12 INT_13 INT_14 INT_15 INT_16 POT_2 POT_3 POT_4 POT_5 POT_6 POT_7 POT_8 FLOAT_3_E1 FLOAT_4_E1 FLOAT_4_E2 FLOAT_5_E1 FLOAT_5_E2 FLOAT_5_E3 FLOAT_6_E1 FLOAT_6_E2 FLOAT_6_E3 FLOAT_6_E4 FLOAT_7_E1 FLOAT_7_E2 FLOAT_7_E3 FLOAT_7_E4 FLOAT_7_E5 FLOAT_8_E1 FLOAT_8_E2 FLOAT_8_E3 FLOAT_8_E4 FLOAT_8_E5 FLOAT_8_E6
js_flops_list:
7387.838496104369 2625.646833355205 589.9163523840543 139.95492844215636 33.898346427925894 8.359296461665465 2.1216004520452856 0.5949235039400762 0.21309794043284996 0.11825646935172719 0.09429331800099429 0.08841095083362506 0.08699678964334162 0.08663305654197168 0.08647598591627721 7387.827067868839 1617.97485666175 133.61810656803314 131.5187461175448 131.5184480272645 131.52048525420298 131.520379017506 1067.9230850082097 255.60428668878217 239.45983640568826 94.13499025246169 85.9906047976941 36.80986958674102 54.05786859221271 47.867086055234125 9.63279058044633 36.55164430227947 42.24566761329685 35.471179319270334 2.5217779013551413 9.531897021062111 36.55105201198746 38.2852725131297 30.667455239210526 0.7272006067102655 2.4588845270355084 9.531607285329187 36.55102319987964
js_param_list:
7387.838496104369 2625.646833355205 589.9163523840543 139.95492844215636 33.898346427925894 8.359296461665465 2.1216004520452856 0.5949235039400762 0.21309794043284996 0.11825646935172719 0.09429331800099429 0.08841095083362506 0.08699678964334162 0.08663305654197168 0.08647598591627721 7387.827067868839 1617.97485666175 133.61810656803314 131.5187461175448 131.5184480272645 131.52048525420298 131.520379017506 1067.9230850082097 255.60428668878217 239.45983640568826 94.13499025246169 85.9906047976941 36.80986958674102 54.05786859221271 47.867086055234125 9.63279058044633 36.55164430227947 42.24566761329685 35.471179319270334 2.5217779013551413 9.531897021062111 36.55105201198746 38.2852725131297 30.667455239210526 0.7272006067102655 2.4588845270355084 9.531607285329187 36.55102319987964
ptq_acc_list:
10.0 19.92 48.84 81.62 85.89 86.9 87.03 87.1 87.13 87.09 87.08 87.1 87.08 87.09 87.08 10.0 18.75 39.19 40.72 43.55 41.77 41.42 16.37 58.14 70.31 75.27 81.85 82.79 79.64 84.63 86.46 79.42 81.14 85.76 86.84 78.98 37.06 81.02 85.85 86.88 74.87 41.3 36.46
acc_loss_list:
0.8851762544494202 0.771271098863245 0.43920082673096794 0.06280858881616717 0.013778849466069614 0.002181651165460991 0.0006889424733035052 -0.00011482374555047542 -0.00045929498220222804 0.0 0.0001148237455506386 -0.00011482374555047542 0.0001148237455506386 0.0 0.0001148237455506386 0.8851762544494202 0.7847054770926628 0.5500057411872775 0.5324377081180388 0.49994258812722475 0.5203812148352279 0.5244000459294982 0.8120335285337007 0.33241474336892873 0.192674245033873 0.13572166724078547 0.06016764266850395 0.049374210586749304 0.08554369043518202 0.028246641405442734 0.0072338959696866415 0.08806981283729477 0.06832012860259505 0.015271558158227101 0.0028705936387644964 0.09312205764152026 0.574463198989551 0.06969801354920206 0.014238144448272006 0.002411298656562268 0.14031461706280857 0.5257779308761052 0.5813526237225859
from model import *
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torchvision.transforms.functional import InterpolationMode
import os
import os.path as osp
def train(model, device, train_loader, optimizer, epoch):
model.train()
lossLayer = torch.nn.CrossEntropyLoss()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = lossLayer(output, target)
loss.backward()
optimizer.step()
if batch_idx % 50 == 0:
print('Train Epoch: {} [{}/{}]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset), loss.item()
))
def test(model, device, test_loader):
model.eval()
test_loss = 0
correct = 0
lossLayer = torch.nn.CrossEntropyLoss(reduction='sum')
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
test_loss += lossLayer(output, target).item()
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {:.2f}%\n'.format(
test_loss, 100. * correct / len(test_loader.dataset)
))
if __name__ == "__main__":
batch_size = 32
test_batch_size = 32
seed = 1
epochs1 = 15
epochs2 = epochs1+10
epochs3 = epochs2+10
lr1 = 0.01
lr2 = 0.001
lr3 = 0.0001
momentum = 0.5
save_model = True
torch.manual_seed(seed)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
train_loader = torch.utils.data.DataLoader(
datasets.CIFAR10('data', train=True, download=True,
transform=transforms.Compose([
transforms.Resize((32, 32), interpolation=InterpolationMode.BICUBIC),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])),
batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=True
)
test_loader = torch.utils.data.DataLoader(
datasets.CIFAR10('data', train=False, transform=transforms.Compose([
transforms.Resize((32, 32), interpolation=InterpolationMode.BICUBIC),
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])),
batch_size=test_batch_size, shuffle=True, num_workers=1, pin_memory=True
)
model = AlexNet_BN().to(device)
optimizer1 = optim.SGD(model.parameters(), lr=lr1, momentum=momentum)
optimizer2 = optim.SGD(model.parameters(), lr=lr2, momentum=momentum)
optimizer3 = optim.SGD(model.parameters(), lr=lr3, momentum=momentum)
for epoch in range(1, epochs1 + 1):
train(model, device, train_loader, optimizer1, epoch)
test(model, device, test_loader)
for epoch in range(epochs1 + 1, epochs2 + 1):
train(model, device, train_loader, optimizer2, epoch)
test(model, device, test_loader)
for epoch in range(epochs2 + 1, epochs3 + 1):
train(model, device, train_loader, optimizer3, epoch)
test(model, device, test_loader)
if save_model:
if not osp.exists('ckpt'):
os.makedirs('ckpt')
torch.save(model.state_dict(), 'ckpt/cifar10_AlexNet_BN.pt')
\ No newline at end of file
import torch
def ebit_list(quant_type, num_bits):
if quant_type == 'FLOAT':
e_bit_list = list(range(1,num_bits-1))
else:
e_bit_list = [0]
return e_bit_list
def numbit_list(quant_type):
if quant_type == 'INT':
num_bit_list = list(range(2,17))
elif quant_type == 'POT':
num_bit_list = list(range(2,9))
else:
num_bit_list = list(range(2,9))
# num_bit_list = [8]
return num_bit_list
def build_bias_list(quant_type):
if quant_type == 'POT':
return build_pot_list(8)
else:
return build_float_list(16,7)
def build_list(quant_type, num_bits, e_bits):
if quant_type == 'POT':
return build_pot_list(num_bits)
else:
return build_float_list(num_bits,e_bits)
def build_pot_list(num_bits):
plist = [0.]
for i in range(-2 ** (num_bits-1) + 2, 1):
# i最高到0,即pot量化最大值为1
plist.append(2. ** i)
plist.append(-2. ** i)
plist = torch.Tensor(list(set(plist)))
# plist = plist.mul(1.0 / torch.max(plist))
return plist
def build_float_list(num_bits,e_bits):
m_bits = num_bits - 1 - e_bits
plist = [0.]
# 相邻尾数的差值
dist_m = 2 ** (-m_bits)
e = -2 ** (e_bits - 1) + 1
for m in range(1, 2 ** m_bits):
frac = m * dist_m # 尾数部分
expo = 2 ** e # 指数部分
flt = frac * expo
plist.append(flt)
plist.append(-flt)
for e in range(-2 ** (e_bits - 1) + 2, 2 ** (e_bits - 1) + 1):
expo = 2 ** e
for m in range(0, 2 ** m_bits):
frac = 1. + m * dist_m
flt = frac * expo
plist.append(flt)
plist.append(-flt)
plist = torch.Tensor(list(set(plist)))
return plist
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment