Commit b965e176 by Klin

feat: VGG_19: ptq and fit

parent 15882bd1
# VGG_19 量化说明
+ 相比VGG_16仅改动了model中的cfg
## ptq部分
+ 量化结果
![image-20230415132704998](image/VGG19_table.png)
+ 拟合结果
+ 应该是FLOAT量化中js散度不大,但精度仅为10的量化点导致的R方较低
![flops](image/flops.png)
![param](image/param.png)
\ No newline at end of file
import sys
import os
# 从get_param.py输出重定向文件val.txt中提取参数量和计算量
def extract_ratio():
fr = open('param_flops.txt','r')
lines = fr.readlines()
layer = []
par_ratio = []
flop_ratio = []
for line in lines:
if '(' in line and ')' in line:
layer.append(line.split(')')[0].split('(')[1])
r1 = line.split('%')[0].split(',')[-1]
r1 = float(r1)
par_ratio.append(r1)
r2 = line.split('%')[-2].split(',')[-1]
r2 = float(r2)
flop_ratio.append(r2)
return layer, par_ratio, flop_ratio
if __name__ == "__main__":
layer, par_ratio, flop_ratio = extract_ratio()
print(layer)
print(par_ratio)
print(flop_ratio)
\ No newline at end of file
from torch.autograd import Function
class FakeQuantize(Function):
@staticmethod
def forward(ctx, x, qparam):
x = qparam.quantize_tensor(x)
x = qparam.dequantize_tensor(x)
return x
@staticmethod
def backward(ctx, grad_output):
return grad_output, None
\ No newline at end of file
from model import *
import torch
from ptflops import get_model_complexity_info
if __name__ == "__main__":
model = VGG_19()
full_file = 'ckpt/cifar10_VGG_19.pt'
model.load_state_dict(torch.load(full_file))
flops, params = get_model_complexity_info(model, (3, 32, 32), as_strings=True, print_per_layer_stat=True)
# -*- coding: utf-8 -*-
# 用于多个module之间共享全局变量
def _init(): # 初始化
global _global_dict
_global_dict = {}
def set_value(value,is_bias=False):
# 定义一个全局变量
if is_bias:
_global_dict[0] = value
else:
_global_dict[1] = value
def get_value(is_bias=False): # 给bias独立于各变量外的精度
if is_bias:
return _global_dict[0]
else:
return _global_dict[1]
import torch
import torch.nn as nn
import torch.nn.functional as F
from module import *
import module
# cfg = {
# 'A' : [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
# 'B' : [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
# 'D' : [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
# 'E' : [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M']
# }
feature_cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M']
classifier_cfg = [4096, 4096, 'LF']
def make_feature_layers(cfg, batch_norm=False):
layers = []
names = []
input_channel = 3
idx = 0
for l in cfg:
if l == 'M':
names.append('pool%d'%idx)
layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
continue
idx += 1
names.append('conv%d'%idx)
layers.append(nn.Conv2d(input_channel, l, kernel_size=3, padding=1))
if batch_norm:
names.append('bn%d'%idx)
layers.append(nn.BatchNorm2d(l))
names.append('relu%d'%idx)
layers.append(nn.ReLU(inplace=True))
input_channel = l
return names,layers
def make_classifier_layers(cfg, in_features, num_classes):
layers=[]
names=[]
idx = 0
for l in cfg:
idx += 1
if l=='LF': #last fc
names.append('fc%d'%idx)
layers.append(nn.Linear(in_features,num_classes))
continue
names.append('fc%d'%idx)
layers.append(nn.Linear(in_features,l))
in_features=l
names.append('crelu%d'%idx) # classifier relu
layers.append(nn.ReLU(inplace=True))
names.append('drop%d'%idx)
layers.append(nn.Dropout())
return names,layers
def quantize_feature_layers(model,name_list,quant_type,num_bits,e_bits):
layers=[]
names=[]
last_conv = None
last_bn = None
idx = 0
for name in name_list:
if 'pool' in name:
names.append('qpool%d'%idx)
layers.append(QMaxPooling2d(quant_type, kernel_size=2, stride=2, padding=0, num_bits=num_bits, e_bits=e_bits))
elif 'conv' in name:
last_conv = getattr(model,name)
elif 'bn' in name:
last_bn = getattr(model,name)
elif 'relu' in name:
idx += 1
names.append('qconv%d'%idx)
if idx == 1:
layers.append(QConvBNReLU(quant_type, last_conv, last_bn, qi=True, qo=True, num_bits=num_bits, e_bits=e_bits))
else:
layers.append(QConvBNReLU(quant_type, last_conv, last_bn, qi=False, qo=True, num_bits=num_bits, e_bits=e_bits))
return names,layers
def quantize_classifier_layers(model,name_list,quant_type,num_bits,e_bits):
layers=[]
names=[]
idx=0
for name in name_list:
layer = getattr(model,name)
if 'fc' in name:
idx+=1
names.append('qfc%d'%idx)
layers.append(QLinear(quant_type, layer, qi=False, qo=True, num_bits=num_bits, e_bits=e_bits))
elif 'crelu' in name:
names.append('qcrelu%d'%idx)
layers.append(QReLU(quant_type, num_bits=num_bits, e_bits=e_bits))
elif 'drop' in name:
names.append(name)
layers.append(layer)
return names, layers
def quantize_utils(model,qfeature_name,qclassifier_name,func, x=None):
if func == 'inference':
layer=getattr(model,qfeature_name[0])
x = layer.qi.quantize_tensor(x)
last_qo = None
for name in qfeature_name:
layer = getattr(model,name)
if func == 'forward':
x = layer(x)
elif func == 'inference':
x = layer.quantize_inference(x)
else: #freeze
layer.freeze(last_qo)
if 'conv' in name:
last_qo = layer.qo
if func != 'freeze':
x = torch.flatten(x, start_dim=1)
for name in qclassifier_name:
layer = getattr(model,name)
if func == 'forward':
x = layer(x)
elif 'drop' not in name:
if func == 'inference':
x = layer.quantize_inference(x)
else: # freeze
layer.freeze(last_qo)
if 'fc' in name:
last_qo = layer.qo
if func == 'inference':
x = last_qo.dequantize_tensor(x)
return x
class VGG_19(nn.Module):
def __init__(self, num_class=10):
super().__init__()
feature_name,feature_layer = make_feature_layers(feature_cfg,batch_norm=True)
self.feature_name = feature_name
for name,layer in zip(feature_name,feature_layer):
self.add_module(name,layer)
classifier_name,classifier_layer = make_classifier_layers(classifier_cfg,512,num_class)
self.classifier_name = classifier_name
for name,layer in zip(classifier_name,classifier_layer):
self.add_module(name,layer)
# self.fc1 = nn.Linear(512, 4096)
# self.crelu1 = nn.ReLU(inplace=True)
# self.drop1 = nn.Dropout()
# self.fc2 = nn.Linear(4096, 4096)
# self.crelu2 = nn.ReLU(inplace=True)
# self.drop2 = nn.Dropout()
# self.fc3 = nn.Linear(4096, num_class)
def forward(self, x):
#feature
for name in self.feature_name:
layer = getattr(self,name)
x = layer(x)
x = torch.flatten(x, start_dim=1)
#classifier
for name in self.classifier_name:
layer = getattr(self,name)
x = layer(x)
# x = self.fc1(x)
# x = self.crelu1(x)
# x = self.drop1(x)
# x = self.fc2(x)
# x = self.crelu2(x)
# x = self.drop2(x)
# x = self.fc3(x)
return x
def quantize(self, quant_type, num_bits=8, e_bits=3):
# feature
qfeature_name,qfeature_layer = quantize_feature_layers(self,self.feature_name,quant_type,num_bits,e_bits)
self.qfeature_name = qfeature_name
for name,layer in zip(qfeature_name,qfeature_layer):
self.add_module(name,layer)
# classifier
qclassifier_name,qclassifier_layer = quantize_classifier_layers(self,self.classifier_name,quant_type,num_bits,e_bits)
self.qclassifier_name = qclassifier_name
for name,layer in zip(qclassifier_name,qclassifier_layer):
if 'drop' not in name:
self.add_module(name,layer)
# self.qfc1 = QLinear(quant_type, self.fc1, qi=False, qo=True, num_bits=num_bits, e_bits=e_bits)
# self.qcrelu1 = QReLU(quant_type, num_bits=num_bits, e_bits=e_bits)
# self.qfc2 = QLinear(quant_type, self.fc2, qi=False, qo=True, num_bits=num_bits, e_bits=e_bits)
# self.qcrelu2 = QReLU(quant_type, num_bits=num_bits, e_bits=e_bits)
# self.qfc3 = QLinear(quant_type, self.fc3, qi=False, qo=True, num_bits=num_bits, e_bits=e_bits)
def quantize_forward(self,x):
x = quantize_utils(self, self.qfeature_name, self.qclassifier_name,
func='forward', x=x)
return x
def freeze(self):
quantize_utils(self, self.qfeature_name, self.qclassifier_name,
func='freeze', x=None)
def quantize_inference(self,x):
x = quantize_utils(self, self.qfeature_name, self.qclassifier_name,
func='inference', x=x)
return x
\ No newline at end of file
VGG_19(
38.96 M, 100.000% Params, 418.4 MMac, 100.000% MACs,
(conv1): Conv2d(1.79 k, 0.005% Params, 1.84 MMac, 0.439% MACs, 3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn1): BatchNorm2d(128, 0.000% Params, 131.07 KMac, 0.031% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(0, 0.000% Params, 65.54 KMac, 0.016% MACs, inplace=True)
(conv2): Conv2d(36.93 k, 0.095% Params, 37.81 MMac, 9.038% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, 0.000% Params, 131.07 KMac, 0.031% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(0, 0.000% Params, 65.54 KMac, 0.016% MACs, inplace=True)
(pool2): MaxPool2d(0, 0.000% Params, 65.54 KMac, 0.016% MACs, kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(conv3): Conv2d(73.86 k, 0.190% Params, 18.91 MMac, 4.519% MACs, 64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn3): BatchNorm2d(256, 0.001% Params, 65.54 KMac, 0.016% MACs, 128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu3): ReLU(0, 0.000% Params, 32.77 KMac, 0.008% MACs, inplace=True)
(conv4): Conv2d(147.58 k, 0.379% Params, 37.78 MMac, 9.030% MACs, 128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn4): BatchNorm2d(256, 0.001% Params, 65.54 KMac, 0.016% MACs, 128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu4): ReLU(0, 0.000% Params, 32.77 KMac, 0.008% MACs, inplace=True)
(pool4): MaxPool2d(0, 0.000% Params, 32.77 KMac, 0.008% MACs, kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(conv5): Conv2d(295.17 k, 0.758% Params, 18.89 MMac, 4.515% MACs, 128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn5): BatchNorm2d(512, 0.001% Params, 32.77 KMac, 0.008% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu5): ReLU(0, 0.000% Params, 16.38 KMac, 0.004% MACs, inplace=True)
(conv6): Conv2d(590.08 k, 1.515% Params, 37.77 MMac, 9.026% MACs, 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn6): BatchNorm2d(512, 0.001% Params, 32.77 KMac, 0.008% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu6): ReLU(0, 0.000% Params, 16.38 KMac, 0.004% MACs, inplace=True)
(conv7): Conv2d(590.08 k, 1.515% Params, 37.77 MMac, 9.026% MACs, 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn7): BatchNorm2d(512, 0.001% Params, 32.77 KMac, 0.008% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu7): ReLU(0, 0.000% Params, 16.38 KMac, 0.004% MACs, inplace=True)
(conv8): Conv2d(590.08 k, 1.515% Params, 37.77 MMac, 9.026% MACs, 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn8): BatchNorm2d(512, 0.001% Params, 32.77 KMac, 0.008% MACs, 256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu8): ReLU(0, 0.000% Params, 16.38 KMac, 0.004% MACs, inplace=True)
(pool8): MaxPool2d(0, 0.000% Params, 16.38 KMac, 0.004% MACs, kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(conv9): Conv2d(1.18 M, 3.029% Params, 18.88 MMac, 4.513% MACs, 256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn9): BatchNorm2d(1.02 k, 0.003% Params, 16.38 KMac, 0.004% MACs, 512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu9): ReLU(0, 0.000% Params, 8.19 KMac, 0.002% MACs, inplace=True)
(conv10): Conv2d(2.36 M, 6.057% Params, 37.76 MMac, 9.024% MACs, 512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn10): BatchNorm2d(1.02 k, 0.003% Params, 16.38 KMac, 0.004% MACs, 512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu10): ReLU(0, 0.000% Params, 8.19 KMac, 0.002% MACs, inplace=True)
(conv11): Conv2d(2.36 M, 6.057% Params, 37.76 MMac, 9.024% MACs, 512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn11): BatchNorm2d(1.02 k, 0.003% Params, 16.38 KMac, 0.004% MACs, 512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu11): ReLU(0, 0.000% Params, 8.19 KMac, 0.002% MACs, inplace=True)
(conv12): Conv2d(2.36 M, 6.057% Params, 37.76 MMac, 9.024% MACs, 512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn12): BatchNorm2d(1.02 k, 0.003% Params, 16.38 KMac, 0.004% MACs, 512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu12): ReLU(0, 0.000% Params, 8.19 KMac, 0.002% MACs, inplace=True)
(pool12): MaxPool2d(0, 0.000% Params, 8.19 KMac, 0.002% MACs, kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(conv13): Conv2d(2.36 M, 6.057% Params, 9.44 MMac, 2.256% MACs, 512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn13): BatchNorm2d(1.02 k, 0.003% Params, 4.1 KMac, 0.001% MACs, 512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu13): ReLU(0, 0.000% Params, 2.05 KMac, 0.000% MACs, inplace=True)
(conv14): Conv2d(2.36 M, 6.057% Params, 9.44 MMac, 2.256% MACs, 512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn14): BatchNorm2d(1.02 k, 0.003% Params, 4.1 KMac, 0.001% MACs, 512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu14): ReLU(0, 0.000% Params, 2.05 KMac, 0.000% MACs, inplace=True)
(conv15): Conv2d(2.36 M, 6.057% Params, 9.44 MMac, 2.256% MACs, 512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn15): BatchNorm2d(1.02 k, 0.003% Params, 4.1 KMac, 0.001% MACs, 512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu15): ReLU(0, 0.000% Params, 2.05 KMac, 0.000% MACs, inplace=True)
(conv16): Conv2d(2.36 M, 6.057% Params, 9.44 MMac, 2.256% MACs, 512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn16): BatchNorm2d(1.02 k, 0.003% Params, 4.1 KMac, 0.001% MACs, 512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu16): ReLU(0, 0.000% Params, 2.05 KMac, 0.000% MACs, inplace=True)
(pool16): MaxPool2d(0, 0.000% Params, 2.05 KMac, 0.000% MACs, kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(fc1): Linear(2.1 M, 5.393% Params, 2.1 MMac, 0.502% MACs, in_features=512, out_features=4096, bias=True)
(crelu1): ReLU(0, 0.000% Params, 4.1 KMac, 0.001% MACs, inplace=True)
(drop1): Dropout(0, 0.000% Params, 0.0 Mac, 0.000% MACs, p=0.5, inplace=False)
(fc2): Linear(16.78 M, 43.074% Params, 16.78 MMac, 4.011% MACs, in_features=4096, out_features=4096, bias=True)
(crelu2): ReLU(0, 0.000% Params, 4.1 KMac, 0.001% MACs, inplace=True)
(drop2): Dropout(0, 0.000% Params, 0.0 Mac, 0.000% MACs, p=0.5, inplace=False)
(fc3): Linear(40.97 k, 0.105% Params, 40.97 KMac, 0.010% MACs, in_features=4096, out_features=10, bias=True)
)
from torch.serialization import load
from model import *
from extract_ratio import *
from utils import *
import gol
import openpyxl
import sys
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torchvision.transforms.functional import InterpolationMode
import torch.utils.bottleneck as bn
import os
import os.path as osp
from torch.utils.tensorboard import SummaryWriter
def direct_quantize(model, test_loader,device):
for i, (data, target) in enumerate(test_loader, 1):
data = data.to(device)
output = model.quantize_forward(data).cpu()
if i % 500 == 0:
break
print('direct quantization finish')
def full_inference(model, test_loader, device):
correct = 0
for i, (data, target) in enumerate(test_loader, 1):
data = data.to(device)
output = model(data).cpu()
pred = output.argmax(dim=1, keepdim=True)
# print(pred)
correct += pred.eq(target.view_as(pred)).sum().item()
print('\nTest set: Full Model Accuracy: {:.2f}%'.format(100. * correct / len(test_loader.dataset)))
return 100. * correct / len(test_loader.dataset)
def quantize_inference(model, test_loader, device):
correct = 0
for i, (data, target) in enumerate(test_loader, 1):
data = data.to(device)
output = model.quantize_inference(data).cpu()
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
print('Test set: Quant Model Accuracy: {:.2f}%'.format(100. * correct / len(test_loader.dataset)))
return 100. * correct / len(test_loader.dataset)
def js_div(p_output, q_output, get_softmax=True):
"""
Function that measures JS divergence between target and output logits:
"""
KLDivLoss = nn.KLDivLoss(reduction='sum')
if get_softmax:
p_output = F.softmax(p_output)
q_output = F.softmax(q_output)
log_mean_output = ((p_output + q_output)/2).log()
return (KLDivLoss(log_mean_output, p_output) + KLDivLoss(log_mean_output, q_output))/2
if __name__ == "__main__":
batch_size = 32
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
train_loader = torch.utils.data.DataLoader(
datasets.CIFAR10('../data', train=True, download=True,
transform=transforms.Compose([
transforms.Resize((32, 32), interpolation=InterpolationMode.BICUBIC),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])),
batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=True
)
test_loader = torch.utils.data.DataLoader(
datasets.CIFAR10('../data', train=False, transform=transforms.Compose([
transforms.Resize((32, 32), interpolation=InterpolationMode.BICUBIC),
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])),
batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=True
)
model = VGG_19()
writer = SummaryWriter(log_dir='./log')
full_file = 'ckpt/cifar10_VGG_19.pt'
model.load_state_dict(torch.load(full_file))
model.to(device)
load_ptq = True
store_ptq = False
ptq_file_prefix = 'ckpt/cifar10_VGG_19_ptq_'
model.eval()
full_acc = full_inference(model, test_loader, device)
model_fold = fold_model(model)
full_params = []
layer, par_ratio, flop_ratio = extract_ratio()
par_ratio, flop_ratio = fold_ratio(layer, par_ratio, flop_ratio)
for name, param in model_fold.named_parameters():
if 'bn' in name:
continue
param_norm = F.normalize(param.data.cpu(),p=2,dim=-1)
full_params.append(param_norm)
writer.add_histogram(tag='Full_' + name + '_data', values=param.data)
gol._init()
quant_type_list = ['INT','POT','FLOAT']
title_list = []
js_flops_list = []
js_param_list = []
ptq_acc_list = []
acc_loss_list = []
for quant_type in quant_type_list:
num_bit_list = numbit_list(quant_type)
# 对一个量化类别,只需设置一次bias量化表
# int由于位宽大,使用量化表开销过大,直接_round即可
if quant_type != 'INT':
bias_list = build_bias_list(quant_type)
gol.set_value(bias_list, is_bias=True)
for num_bits in num_bit_list:
e_bit_list = ebit_list(quant_type,num_bits)
for e_bits in e_bit_list:
model_ptq = VGG_19()
if quant_type == 'FLOAT':
title = '%s_%d_E%d' % (quant_type, num_bits, e_bits)
else:
title = '%s_%d' % (quant_type, num_bits)
print('\nPTQ: '+title)
title_list.append(title)
# 设置量化表
if quant_type != 'INT':
plist = build_list(quant_type, num_bits, e_bits)
gol.set_value(plist)
# 判断是否需要载入
if load_ptq is True and osp.exists(ptq_file_prefix + title + '.pt'):
model_ptq.quantize(quant_type,num_bits,e_bits)
model_ptq.load_state_dict(torch.load(ptq_file_prefix + title + '.pt'))
model_ptq.to(device)
print('Successfully load ptq model: ' + title)
else:
model_ptq.load_state_dict(torch.load(full_file))
model_ptq.to(device)
model_ptq.quantize(quant_type,num_bits,e_bits)
model_ptq.eval()
direct_quantize(model_ptq, train_loader, device)
if store_ptq:
torch.save(model_ptq.state_dict(), ptq_file_prefix + title + '.pt')
model_ptq.freeze()
ptq_acc = quantize_inference(model_ptq, test_loader, device)
ptq_acc_list.append(ptq_acc)
acc_loss = (full_acc - ptq_acc) / full_acc
acc_loss_list.append(acc_loss)
idx = -1
# 获取计算量/参数量下的js-div
js_flops = 0.
js_param = 0.
for name, param in model_ptq.named_parameters():
if '.' not in name or 'bn' in name:
continue
idx = idx + 1
prefix = name.split('.')[0]
if prefix in layer:
layer_idx = layer.index(prefix)
ptq_param = param.data.cpu()
# 取L2范数
ptq_norm = F.normalize(ptq_param,p=2,dim=-1)
writer.add_histogram(tag=title +':'+ name + '_data', values=ptq_param)
js = js_div(ptq_norm,full_params[idx])
js = js.item()
if js < 0.:
js = 0.
js_flops = js_flops + js * flop_ratio[layer_idx]
js_param = js_param + js * flop_ratio[layer_idx]
js_flops_list.append(js_flops)
js_param_list.append(js_param)
print(title + ': js_flops: %f js_param: %f acc_loss: %f' % (js_flops, js_param, acc_loss))
# 写入xlsx
workbook = openpyxl.Workbook()
worksheet = workbook.active
worksheet.cell(row=1,column=1,value='FP32-acc')
worksheet.cell(row=1,column=2,value=full_acc)
worksheet.cell(row=3,column=1,value='title')
worksheet.cell(row=3,column=2,value='js_flops')
worksheet.cell(row=3,column=3,value='js_param')
worksheet.cell(row=3,column=4,value='ptq_acc')
worksheet.cell(row=3,column=5,value='acc_loss')
for i in range(len(title_list)):
worksheet.cell(row=i+4, column=1, value=title_list[i])
worksheet.cell(row=i+4, column=2, value=js_flops_list[i])
worksheet.cell(row=i+4, column=3, value=js_param_list[i])
worksheet.cell(row=i+4, column=4, value=ptq_acc_list[i])
worksheet.cell(row=i+4, column=5, value=acc_loss_list[i])
workbook.save('ptq_result.xlsx')
writer.close()
ft = open('ptq_result.txt','w')
print('title_list:',file=ft)
print(" ".join(title_list),file=ft)
print('js_flops_list:',file=ft)
print(" ".join(str(i) for i in js_flops_list), file=ft)
print('js_param_list:',file=ft)
print(" ".join(str(i) for i in js_param_list), file=ft)
print('ptq_acc_list:',file=ft)
print(" ".join(str(i) for i in ptq_acc_list), file=ft)
print('acc_loss_list:',file=ft)
print(" ".join(str(i) for i in acc_loss_list), file=ft)
ft.close()
title_list:
INT_2 INT_3 INT_4 INT_5 INT_6 INT_7 INT_8 INT_9 INT_10 INT_11 INT_12 INT_13 INT_14 INT_15 INT_16 POT_2 POT_3 POT_4 POT_5 POT_6 POT_7 POT_8 FLOAT_3_E1 FLOAT_4_E1 FLOAT_4_E2 FLOAT_5_E1 FLOAT_5_E2 FLOAT_5_E3 FLOAT_6_E1 FLOAT_6_E2 FLOAT_6_E3 FLOAT_6_E4 FLOAT_7_E1 FLOAT_7_E2 FLOAT_7_E3 FLOAT_7_E4 FLOAT_7_E5 FLOAT_8_E1 FLOAT_8_E2 FLOAT_8_E3 FLOAT_8_E4 FLOAT_8_E5 FLOAT_8_E6
js_flops_list:
10125.068753278441 2125.8746406732002 448.16842882616936 102.722270627284 24.664131971463988 6.028952298047002 1.480885231764123 0.3654118682297422 0.09188685026974831 0.022861955175454297 0.0056817018373599695 0.0014282899203300254 0.0003389232698071482 0.00010617853089969107 3.493483722190392e-05 10125.05842558445 1275.5849171285142 202.11000563785234 200.95521018550525 200.95533928139716 200.95570148700347 200.95545858339625 1204.7518158597543 367.27809546334674 207.5115420225552 188.4010740524998 71.7702690201597 55.97867545957229 135.41876706982143 37.94656896957748 14.343459354637504 55.840083188361184 116.40261377054755 27.25354105300921 3.6060440027863243 14.295083000520314 55.85538279966775 108.58905210346933 23.270040495614047 0.9179187100968745 3.580000955703707 14.308950799466206 55.85538134242835
js_param_list:
10125.068753278441 2125.8746406732002 448.16842882616936 102.722270627284 24.664131971463988 6.028952298047002 1.480885231764123 0.3654118682297422 0.09188685026974831 0.022861955175454297 0.0056817018373599695 0.0014282899203300254 0.0003389232698071482 0.00010617853089969107 3.493483722190392e-05 10125.05842558445 1275.5849171285142 202.11000563785234 200.95521018550525 200.95533928139716 200.95570148700347 200.95545858339625 1204.7518158597543 367.27809546334674 207.5115420225552 188.4010740524998 71.7702690201597 55.97867545957229 135.41876706982143 37.94656896957748 14.343459354637504 55.840083188361184 116.40261377054755 27.25354105300921 3.6060440027863243 14.295083000520314 55.85538279966775 108.58905210346933 23.270040495614047 0.9179187100968745 3.580000955703707 14.308950799466206 55.85538134242835
ptq_acc_list:
10.0 11.92 51.46 86.95 89.09 89.19 89.24 89.36 89.26 89.29 89.26 89.26 89.25 89.25 89.25 10.0 17.52 65.19 66.51 66.7 65.97 68.61 12.15 58.19 80.31 79.1 87.07 86.3 82.96 88.53 88.68 74.65 84.34 88.82 89.27 65.12 10.0 84.66 88.8 89.31 61.04 10.0 10.0
acc_loss_list:
0.8879551820728291 0.8664425770308123 0.42341736694677873 0.025770308123249267 0.0017927170868346958 0.0006722689075630507 0.00011204481792722819 -0.0012324929971988731 -0.00011204481792722819 -0.0004481792717087535 -0.00011204481792722819 -0.00011204481792722819 0.0 0.0 0.0 0.8879551820728291 0.8036974789915967 0.26957983193277313 0.2547899159663865 0.2526610644257703 0.2608403361344538 0.23126050420168068 0.8638655462184873 0.34801120448179274 0.10016806722689073 0.1137254901960785 0.024425770308123325 0.03305322128851544 0.07047619047619054 0.00806722689075629 0.006386554621848663 0.16358543417366941 0.05501400560224086 0.004817927170868424 -0.00022408963585429716 0.2703641456582633 0.8879551820728291 0.05142857142857147 0.005042016806722721 -0.0006722689075630507 0.31607843137254904 0.8879551820728291 0.8879551820728291
from model import *
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import CosineAnnealingLR
from torchvision import datasets, transforms
from torchvision.transforms.functional import InterpolationMode
import os
import os.path as osp
def train(model, device, train_loader, optimizer, epoch):
model.train()
lossLayer = torch.nn.CrossEntropyLoss()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = lossLayer(output, target)
loss.backward()
optimizer.step()
if batch_idx % 50 == 0:
print('Train Epoch: {} [{}/{}]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset), loss.item()
))
def test(model, device, test_loader):
model.eval()
test_loss = 0
correct = 0
lossLayer = torch.nn.CrossEntropyLoss(reduction='sum')
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
test_loss += lossLayer(output, target).item()
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {:.2f}%\n'.format(
test_loss, 100. * correct / len(test_loader.dataset)
))
if __name__ == "__main__":
batch_size = 32
test_batch_size = 32
seed = 1
# epoch = 35
# lr = 0.01
epochs1 = 15
epochs2 = epochs1+10
epochs3 = epochs2+10
lr1 = 0.01
lr2 = 0.001
lr3 = 0.0001
momentum = 0.5
save_model = True
torch.manual_seed(seed)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
train_loader = torch.utils.data.DataLoader(
datasets.CIFAR10('../data', train=True, download=True,
transform=transforms.Compose([
transforms.Resize((32, 32), interpolation=InterpolationMode.BICUBIC),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])),
batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=True
)
test_loader = torch.utils.data.DataLoader(
datasets.CIFAR10('../data', train=False, transform=transforms.Compose([
transforms.Resize((32, 32), interpolation=InterpolationMode.BICUBIC),
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])),
batch_size=test_batch_size, shuffle=True, num_workers=1, pin_memory=True
)
model = VGG_19().to(device)
# optimizer = optim.Adam(model.parameters(), lr=lr)
# lr_scheduler = CosineAnnealingLR(optimizer, T_max=epoch)
# for epoch in range(1, epoch + 1):
# train(model, device, train_loader, optimizer, epoch)
# # lr_scheduler.step()
# test(model, device, test_loader)
optimizer1 = optim.SGD(model.parameters(), lr=lr1, momentum=momentum)
optimizer2 = optim.SGD(model.parameters(), lr=lr2, momentum=momentum)
optimizer3 = optim.SGD(model.parameters(), lr=lr3, momentum=momentum)
for epoch in range(1, epochs1 + 1):
train(model, device, train_loader, optimizer1, epoch)
test(model, device, test_loader)
for epoch in range(epochs1 + 1, epochs2 + 1):
train(model, device, train_loader, optimizer2, epoch)
test(model, device, test_loader)
for epoch in range(epochs2 + 1, epochs3 + 1):
train(model, device, train_loader, optimizer3, epoch)
test(model, device, test_loader)
if save_model:
if not osp.exists('ckpt'):
os.makedirs('ckpt')
torch.save(model.state_dict(), 'ckpt/cifar10_VGG_19.pt')
\ No newline at end of file
import torch
import torch.nn as nn
def ebit_list(quant_type, num_bits):
if quant_type == 'FLOAT':
e_bit_list = list(range(1,num_bits-1))
else:
e_bit_list = [0]
return e_bit_list
def numbit_list(quant_type):
if quant_type == 'INT':
num_bit_list = list(range(2,17))
elif quant_type == 'POT':
num_bit_list = list(range(2,9))
else:
num_bit_list = list(range(2,9))
# num_bit_list = [8]
return num_bit_list
def build_bias_list(quant_type):
if quant_type == 'POT':
return build_pot_list(8)
else:
return build_float_list(16,7)
def build_list(quant_type, num_bits, e_bits):
if quant_type == 'POT':
return build_pot_list(num_bits)
else:
return build_float_list(num_bits,e_bits)
def build_pot_list(num_bits):
plist = [0.]
for i in range(-2 ** (num_bits-1) + 2, 1):
# i最高到0,即pot量化最大值为1
plist.append(2. ** i)
plist.append(-2. ** i)
plist = torch.Tensor(list(set(plist)))
# plist = plist.mul(1.0 / torch.max(plist))
return plist
def build_float_list(num_bits,e_bits):
m_bits = num_bits - 1 - e_bits
plist = [0.]
# 相邻尾数的差值
dist_m = 2 ** (-m_bits)
e = -2 ** (e_bits - 1) + 1
for m in range(1, 2 ** m_bits):
frac = m * dist_m # 尾数部分
expo = 2 ** e # 指数部分
flt = frac * expo
plist.append(flt)
plist.append(-flt)
for e in range(-2 ** (e_bits - 1) + 2, 2 ** (e_bits - 1) + 1):
expo = 2 ** e
for m in range(0, 2 ** m_bits):
frac = 1. + m * dist_m
flt = frac * expo
plist.append(flt)
plist.append(-flt)
plist = torch.Tensor(list(set(plist)))
return plist
def fold_ratio(layer, par_ratio, flop_ratio):
idx = -1
for name in layer:
idx = idx + 1
if 'bn' in name:
par_ratio[idx-1] += par_ratio[idx]
flop_ratio[idx-1] += flop_ratio[idx]
return par_ratio,flop_ratio
def fold_model(model):
idx = -1
module_list = []
for name, module in model.named_modules():
idx += 1
module_list.append(module)
if 'bn' in name:
module_list[idx-1] = fold_bn(module_list[idx-1],module)
return model
# def fold_model(model):
# last_conv = None
# last_bn = None
# for name, module in model.named_modules():
# if isinstance(module, nn.Conv2d):
# # 如果当前模块是卷积层,则将其 "fold" 到上一个 BN 层中
# if last_bn is not None:
# last_conv = fold_bn(last_conv, last_bn)
# last_bn = None
# last_conv = module
# elif isinstance(module, nn.BatchNorm2d):
# # 如果当前模块是 BN 层,则将其 "fold" 到上一个卷积层中
# last_bn = module
# if last_conv is not None:
# last_conv = fold_bn(last_conv, last_bn)
# last_bn = None
# # 处理最后一个 BN 层
# if last_bn is not None:
# last_conv = fold_bn(last_conv, last_bn)
# return model
def fold_bn(conv, bn):
# 获取 BN 层的参数
gamma = bn.weight.data
beta = bn.bias.data
mean = bn.running_mean
var = bn.running_var
eps = bn.eps
std = torch.sqrt(var + eps)
feat = bn.num_features
# 获取卷积层的参数
weight = conv.weight.data
bias = conv.bias.data
if bn.affine:
gamma_ = gamma / std
weight = weight * gamma_.view(feat, 1, 1, 1)
if bias is not None:
bias = gamma_ * bias - gamma_ * mean + beta
else:
bias = beta - gamma_ * mean
else:
gamma_ = 1 / std
weight = weight * gamma_
if bias is not None:
bias = gamma_ * bias - gamma_ * mean
else:
bias = -gamma_ * mean
# 设置新的 weight 和 bias
conv.weight.data = weight
conv.bias.data = bias
return conv
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment