Commit b78e8fd4 by Zhihong Ma

feat old version for LeNet. (Resnet,Module not finished)

parent 32783663
# -*- coding: utf-8 -*-
import numpy
import numpy as np
import torch
import sys
from mmd_loss import *
from collections import OrderedDict
d1 = sys.argv[1] # bit
d2 = sys.argv[2] # epoch
# d1=4
# d2=5
sum=0
flag=0
total_quan_list=list()
total_base_list=list()
# CNN FLOPs = Cout * Hout * Wout * (2 * Cin * K * K ) 是考虑bias 否则-1
# FCN FLOPs = Cout * Cin 是考虑bias 否则-1
# 把相关的relu,pool也考虑进去了
# MAdd
# weight0 =np.array( [ 705600.0+4704.0+ 3528.0 , 480000.0+ 1600.0 + 1200.0 , 95880.0 + 120.0,
# 20076.0 + 84.0 , 1670.0 ])
# weight1=np.array([705,600.0 , 480,000.0,+ 95,880.0 ,
# 20,076.0 , 1,670.0 ])
# flops
weight_f0= np.array([357504+4704+4704, 241600+1600+1600,48000+120,10080+84,840])
weight_f1=np.array([357504, 241600,48000,10080,840])
summary_quan_dict=OrderedDict()
summary_base_dict=OrderedDict()
losses=[]
# 最外层:不同epoch的字典 内层:各个网络层的grads
for i in range(int(d2)):
total_quan_list.append(torch.load('./project/p/checkpoint/cifar-10_lenet_bn_quant/' + str(d1) + '/ckpt_cifar-10_lenet_bn_quant_'+str(i+1)+'.pth'))
#total_quan_list.append(torch.load('checkpoint/cifar-10_lenet_bn/full' + '/ckpt_cifar-10_lenet_bn_' + str(d2) + '.pth'))
total_base_list.append(torch.load('./project/p/checkpoint/cifar-10_lenet_bn/full' + '/ckpt_cifar-10_lenet_bn_' + str(i+1) + '.pth'))
for k, _ in total_base_list[i]['grads'].items():
if flag == 0:
summary_quan_dict[k] = total_quan_list[i]['grads'][k].reshape(1,-1)
summary_base_dict[k] = total_base_list[i]['grads'][k].reshape(1,-1)
else :
# 字典里的数据不能直接改,需要重新赋值
a=summary_quan_dict[k]
b=total_quan_list[i]['grads'][k].reshape(1,-1)
c=np.vstack((a,b))
summary_quan_dict[k] = c
a = summary_base_dict[k]
b = total_base_list[i]['grads'][k].reshape(1,-1)
c = np.vstack((a, b))
summary_base_dict[k] = c
flag = 1
cnt = 0
flag = 0
for k, _ in summary_quan_dict.items():
if flag == 0:
sum += 0.99*weight_f1[cnt] * MK_MMD(source=summary_base_dict[k], target=summary_quan_dict[k]) # weight
else:
sum += 0.01*weight_f1[cnt] * MK_MMD(source=summary_base_dict[k], target=summary_quan_dict[k]) #bias
if flag == 1:
cnt = cnt + 1
flag = 0
else:
flag=1
sum=sum/(weight_f0.sum()*2)
print(sum)
f = open('./project/p/lenet_ptq_similarity.txt','a')
f.write('bit:' + str(d1) + ' epoch_num:' + str(d2) +': '+str(sum)+'\n')
f.close()
# for k,v in summary_base_dict.items():
# if k== 'conv_layers.conv1.weight':
# print(v)
# print('===========')
# print(summary_quan_dict[k])
\ No newline at end of file
# -*- coding: utf-8 -*-
import numpy
import numpy as np
import torch
import sys
from collections import OrderedDict
import scipy.stats
import pandas as pd
import os
# 整体思路: 本函数实现的是关于bit的,在不同epoch节点(5, 10, ...) 的梯度分布相似度计算 (考虑到是不同epoch节点,则需要在这一段epoch内取平均相似度?)
# 外界调用: 会用不同的bit分别调用该函数
# csv中每行记录的是该bit量化情况下,不同epoch节点的平均加权梯度分布相似度
#
d1 = sys.argv[1] # bit
d2 = sys.argv[2] # mode
d3 = sys.argv[3] # n_exp
# d2 = sys.argv[2] # epoch
# d1=4
# d2=5
tag = 0
dirpath = './project/p/qat_analysis_data/mode' + str(d2)
if not os.path.isdir(dirpath):
os.makedirs(dirpath, mode=0o777)
os.chmod(dirpath, mode=0o777)
# if int(d2) == 1:
# csvpath = './project/p/qat_analysis_data/wasserstein_distance.csv'
# else:
if int(d2) != 3:
csvpath = './project/p/qat_analysis_data/mode' + str(d2) + '/wasserstein_distance.csv'
else:
csvpath = './project/p/qat_analysis_data/mode' + str(d2) + '/wasserstein_distance_' + str(d3) + '.csv'
# if os.path.exists("./qat_analysis_data/wasserstein_distance.csv"):
if os.path.exists(csvpath):
tag = 1
if tag == 0: # 还没有csv
df = pd.DataFrame()
else: # 已有csv
# df = pd.read_csv("./qat_analysis_data/wasserstein_distance.csv", index_col=0)
df = pd.read_csv(csvpath, index_col=0)
df2 = pd.DataFrame()
# CNN FLOPs = Cout * Hout * Wout * (2 * Cin * K * K ) 是考虑bias 否则-1
# FCN FLOPs = Cout * Cin 是考虑bias 否则-1
# 把相关的relu,pool也考虑进去了
# MAdd
# weight0 =np.array( [ 705600.0+4704.0+ 3528.0 , 480000.0+ 1600.0 + 1200.0 , 95880.0 + 120.0,
# 20076.0 + 84.0 , 1670.0 ])
# weight1=np.array([705,600.0 , 480,000.0,+ 95,880.0 ,
# 20,076.0 , 1,670.0 ])
# flops
weight_f0= np.array([357504+4704+4704, 241600+1600+1600,48000+120,10080+84,840])
weight_f1=np.array([357504, 241600,48000,10080,840])
# 对不同的epoch节点
for epoch in [5, 10, 15, 20, 25, 30]:
total_quan_list = []
total_base_list = []
summary_quan_dict = OrderedDict()
summary_base_dict = OrderedDict()
flag = 0
result = 0
# 最外层:不同epoch的字典 内层:各个网络层的grads
# 遍历epoch节点内的epoch,收集梯度信息
for i in range(epoch):
if int(d2) == 1:
total_quan_list.append(torch.load(
'./project/p/checkpoint/cifar-10_lenet_bn_quant/scratch/' + str(d1) + '/ckpt_cifar-10_lenet_bn_quant_' + str(
i + 1) + '.pth'))
elif int(d2) == 2:
total_quan_list.append(torch.load(
'./project/p/checkpoint/cifar-10_lenet_bn_quant/scratch/mode' + str(d2) + '/' + str(d1)+ '/ckpt_cifar-10_lenet_bn_quant_' + str(
epoch) + '.pth'))
else:
total_quan_list.append(torch.load(
'./project/p/checkpoint/cifar-10_lenet_bn_quant/scratch/mode' + str(d2) + '_' + str(d3) + '/' + str(d1)+ '/ckpt_cifar-10_lenet_bn_quant_' + str(
epoch) + '.pth'))
# total_quan_list.append(torch.load('checkpoint/cifar-10_lenet_bn/full' + '/ckpt_cifar-10_lenet_bn_' + str(d2) + '.pth'))
# full的数据数不够
total_base_list.append(
torch.load('./project/p/checkpoint/cifar-10_lenet_bn/full' + '/ckpt_cifar-10_lenet_bn_' + str(i + 1) + '.pth'))
for k, _ in total_base_list[i]['grads'].items(): # 得到每个epoch i 的各个层的梯度
if flag == 0: # 读的第一个epoch i 要新建立个数据矩阵的第一行,后续的epoch i都是在这行的基础上向下拓展
summary_quan_dict[k] = total_quan_list[i]['grads'][k].reshape(1, -1)
summary_base_dict[k] = total_base_list[i]['grads'][k].reshape(1, -1)
else:
# 字典里的数据不能直接改,需要重新赋值
a = summary_quan_dict[k]
b = total_quan_list[i]['grads'][k].reshape(1, -1)
c = np.vstack((a, b))
summary_quan_dict[k] = c
a = summary_base_dict[k]
b = total_base_list[i]['grads'][k].reshape(1, -1)
c = np.vstack((a, b))
summary_base_dict[k] = c
flag = 1
# loss = total_quan_list[i]['losses']
# print(loss)
# df = pd.read_csv('./data_analysis_folder/data.csv', index_col=0)
# # df = pd.DataFrame()
# df2 = pd.DataFrame()
# 上面是在收集数据,下面才是求和
for j in range(epoch):
flag0 = 0 # 各个layer的weight和bias
cnt = 0 # 依次遍历各个layer
sum = 0 # sum只是对一个epoch j 的加权梯度分布相似度记录
for k, _ in summary_quan_dict.items():
w = summary_base_dict[k][j, :] # 这里不合适 要改造
v = summary_quan_dict[k][j, :]
if flag0 == 0:
cur_weight = weight_f1[cnt] * scipy.stats.wasserstein_distance(w, v) # weight
# 不是很方便存 需要三维了(sheet)
# if tag == 1:
# df2[k] = [cur_weight]
# else:
# df[k] = [cur_weight]
sum += 0.99 * cur_weight
else:
cur_bias = weight_f1[cnt] * scipy.stats.wasserstein_distance(w, v) # bias
# if tag == 1:
# df2[k] = [cur_bias]
# else:
# df[k] = [cur_bias]
sum += 0.01 * cur_bias
if flag0 == 1:
cnt = cnt + 1
flag0 = 0
else:
flag0 = 1
sum = sum / (weight_f1.sum() * 2)
result += sum # 对各个epoch i的加权梯度相似度求和
print(sum)
result /= epoch # 对epoch节点阶段内的梯度相似度求平均
if tag == 1:
df2[str(epoch)] = [result]
else :
df[str(epoch)] = [result]
result = 0
if tag == 1 :
df = df.append(df2)
# df.to_csv('./qat_analysis_data/wasserstein_distance.csv')
df.to_csv(csvpath)
else :
# df.to_csv('./qat_analysis_data/wasserstein_distance.csv')
df.to_csv(csvpath)
# f = open('lenet_ptq_wasserstein_similarity.txt','a')
# f.write('bit:' + str(d1) + ' epoch_num:' + str(d2) +': '+str(sum)+'\n')
# f.close()
# -*- coding: utf-8 -*-
import numpy
import numpy as np
import torch
import sys
from collections import OrderedDict
import scipy.stats
import pandas as pd
from model import *
# from audtorch.metrics.functional import pearsonr
import math
# 该函数用于读出全精度、量化模型的weight和bias值,以作观察
if __name__ == "__main__":
d1 = sys.argv[1]
# d2 = sys.argv[2]
# d1=8
# df = pd.read_csv('./ptq_analysis_data/seperate_data.csv', index_col=0)
df = pd.DataFrame()
# df2 = pd.DataFrame()
base_data = torch.load('./project/p/ckpt/trail/model_trail.pt')
# checkpoint_data = torch.load('./project/p/ckpt/trail/model_trail.pt')
print('full_precision weight/bias loaded!')
checkpoint_dir = './project/p/checkpoint/cifar-10_trail_model'
# quan_data = torch.load('ckpt/cifar-10_lenet_bn_ptq_' + str(d1) + '_.pt')
# print('quantization bit ' + str(d1) + ' weight/bias loaded!')
sum=0
if int(d1) == 1:
print(base_data)
# for k, _ in base_data.items():
# base_data[k] = base_data[k].reshape(1, -1)
# # quan_data[k] = quan_data[k].reshape(1, -1)
# print(base_data[k])
else:
for i in [4,9,14,19]:
check_data = torch.load(checkpoint_dir + '/ckpt_cifar-10_trail_model%s.pt' % (str(i)))
print(check_data)
# if int(d2) == 1:
# print(base_data[k])
# else:
# print(quan_data[k])
# -*- coding: utf-8 -*-
from torch.autograd import Function
class FakeQuantize(Function):
@staticmethod
def forward(ctx, x, qparam): # 有qparam i.e. self 中记录的mode、scale、zeropoint、n_exp等信息,其实不用再额外传参
x = qparam.quantize_tensor(x, qparam.mode) # INT
x = qparam.dequantize_tensor(x, qparam.mode) # FP(int)
return x
@staticmethod
def backward(ctx, grad_output): # 用线性粗略近似 STE
return grad_output, None
\ No newline at end of file
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from collections import OrderedDict
def get_model_histogram(model):
"""
Description:
- get norm gradients from model, and store in a OrderDict
Args:
- model: (torch.nn.Module), torch model
Returns:
- grads in OrderDict
"""
gradshisto = OrderedDict()
grads = OrderedDict()
for name, params in model.named_parameters():
grad = params.grad
if grad is not None:
tmp = {}
params_np = grad.cpu().numpy()
histogram, bins = np.histogram(params_np.flatten(),bins=20)
tmp['histogram'] = list(histogram)
tmp['bins'] = list(bins)
gradshisto[name] = tmp
grads[name] = params_np
return gradshisto,grads
def get_model_norm_gradient(model):
"""
Description:
- get norm gradients from model, and store in a OrderDict
Args:
- model: (torch.nn.Module), torch model
Returns:
- grads in OrderDict
"""
grads = OrderedDict()
for name, params in model.named_parameters():
grad = params.grad
if grad is not None:
grads[name] = grad.norm().item()
return grads
def get_grad_histogram(grads_sum):
gradshisto = OrderedDict()
# grads = OrderedDict()
for name, params in grads_sum.items():
grad = params
if grad is not None:
tmp = {}
#params_np = grad.cpu().numpy()
params_np = grad
histogram, bins = np.histogram(params_np.flatten(),bins=20)
tmp['histogram'] = list(histogram)
tmp['bins'] = list(bins)
gradshisto[name] = tmp #每层一个histogram (tmp中的是描述直方图的信息)
# grads[name] = params_np
return gradshisto
\ No newline at end of file
# -*- coding: utf-8 -*-
import numpy
import numpy as np
import torch
import sys
from collections import OrderedDict
import scipy.stats
import pandas as pd
import os
import os.path
#
d1 = sys.argv[1] # bit
d2 = sys.argv[2] # mode
d3 = sys.argv[3] # n_exp
# d1=2
# d2 = sys.argv[2] # epoch
# d1=2
# d2=3
sum=0
flag=0
# CNN FLOPs = Cout * Hout * Wout * (2 * Cin * K * K ) 是考虑bias 否则-1
# FCN FLOPs = Cout * Cin 是考虑bias 否则-1
# 把相关的relu,pool也考虑进去了
# MAdd
# weight0 =np.array( [ 705600.0+4704.0+ 3528.0 , 480000.0+ 1600.0 + 1200.0 , 95880.0 + 120.0,
# 20076.0 + 84.0 , 1670.0 ])
# weight1=np.array([705,600.0 , 480,000.0,+ 95,880.0 ,
# 20,076.0 , 1,670.0 ])
# flops
weight_f0= np.array([357504+4704+4704, 241600+1600+1600,48000+120,10080+84,840])
weight_f1=np.array([357504, 241600,48000,10080,840])
summary_quan_dict=OrderedDict()
summary_base_dict=OrderedDict()
# 最外层:不同epoch的字典 内层:各个网络层的grads
flag = 0
dirpath = './project/p/qat_analysis_data/mode' + str(d2)
if not os.path.isdir(dirpath):
os.makedirs(dirpath, mode=0o777)
os.chmod(dirpath, mode=0o777)
if int(d2) == 1 or int(d2) == 2:
csvpath = dirpath + '/scratch_loss.csv'
else:
csvpath = dirpath + '/scratch_loss_' + str(d3) + '.csv'
if os.path.exists(csvpath):
flag = 1
if flag == 0: # 还没有csv
df = pd.DataFrame()
else: # 已有csv
df = pd.read_csv(csvpath, index_col=0)
df2 = pd.DataFrame()
for epoch in ([5, 10, 15, 20, 25, 30]):
sums = []
total_quan_list = []
total_base_list = []
for i in range(int(epoch)):
if int(d2) == 1:
total_quan_list.append(torch.load(
'./project/p/checkpoint/cifar-10_lenet_bn_quant/scratch/' + str(d1) + '/ckpt_cifar-10_lenet_bn_quant_' + str(
i + 1) + '.pth'))
elif int(d2) == 2:
total_quan_list.append(torch.load(
'./project/p/checkpoint/cifar-10_lenet_bn_quant/scratch/mode' + str(d2) + '/' + str(
d1) + '/ckpt_cifar-10_lenet_bn_quant_' + str(
i + 1) + '.pth'))
else:
total_quan_list.append(torch.load(
'./project/p/checkpoint/cifar-10_lenet_bn_quant/scratch/mode' + str(d2) + '_' + str(d3) + '/' + str(
d1) + '/ckpt_cifar-10_lenet_bn_quant_' + str(
i + 1) + '.pth'))
sum_loss = 0
loss = total_quan_list[i]['losses']
# print(len(loss))
# 每个epoch的不同batch的
for j in range(len(loss)):
sum_loss += loss[j].cpu()
# print(sum_loss)
sum_loss /= j
sums.append(sum_loss)
# print(sums)
#print(sums[0] - sums[int(d1) - 1])
if flag == 0:
df[str(epoch)] = [(sums[0] - sums[int(epoch) - 1]).detach().numpy()]
else:
df2[str(epoch)] = [(sums[0] - sums[int(epoch) - 1]).detach().numpy()]
if flag == 0:
# df.to_csv('./qat_analysis_data/scratch_loss.csv')
df.to_csv(csvpath)
else:
df = df.append(df2)
# df.to_csv('./qat_analysis_data/scratch_loss.csv')
df.to_csv(csvpath)
\ No newline at end of file
# -*- coding: utf-8 -*-
import torch
import torch.nn as nn
import torch.nn.functional as F
from module import *
# class VGG_19(nn.Module):
# def __init__(self, img_size=32, input_channel=3, num_class=10):
# super().__init__()
# self.conv_param_layer_name = (
# 'conv1_1', 'relu1_1', 'conv1_2', 'bn1_1', 'relu1_2', 'pool1',
# 'conv2_1', 'bn2_1', 'relu2_1', 'conv2_2', 'bn2_2', 'relu2_2', 'pool2',
# 'conv3_1', 'bn3_1', 'relu3_1', 'conv3_2', 'bn3_2', 'relu3_2', 'conv3_3', 'bn3_3', 'relu3_3', 'conv3_4',
# 'bn3_4', 'relu3_4', 'pool3',
# 'conv4_1', 'bn4_1', 'relu4_1', 'conv4_2', 'bn4_2', 'relu4_2', 'conv4_3', 'bn4_3', 'relu4_3', 'conv4_4',
# 'bn4_4', 'relu4_4', 'pool4',
# 'conv5_1', 'bn5_1', 'relu5_1', 'conv5_2', 'bn5_2', 'relu5_2', 'conv5_3', 'bn5_3', 'relu5_3', 'conv5_4',
# 'bn5_4', 'relu5_4', 'pool5'
# )
# self.fc_param_layer_name = (
# 'fc1','relu1','drop1','fc2','relu2','drop2','fc3'
# )
# self.conv_layers = nn.ModuleDict({
# # block1
# 'conv1_1': nn.Conv2d(in_channels=3, out_channels=64, kernel_size=(3, 3), stride=(1, 1), padding=1),
# 'relu1_1': nn.ReLU(),
# 'conv1_2': nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3, 3), stride=(1, 1), padding=1),
# 'bn1_1': nn.BatchNorm2d(num_features=64),
# 'relu1_2': nn.ReLU(),
# 'pool1': nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),
# # block2
# 'conv2_1': nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(3, 3), stride=(1, 1), padding=1),
# 'bn2_1': nn.BatchNorm2d(num_features=128),
# 'relu2_1': nn.ReLU(),
# 'conv2_2': nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3, 3), stride=(1, 1), padding=1),
# 'bn2_2': nn.BatchNorm2d(num_features=128),
# 'relu2_2': nn.ReLU(),
# 'pool2': nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),
# # block3
# 'conv3_1': nn.Conv2d(in_channels=128, out_channels=256, kernel_size=(3, 3), stride=(1, 1), padding=1),
# 'bn3_1': nn.BatchNorm2d(num_features=256),
# 'relu3_1': nn.ReLU(),
# 'conv3_2': nn.Conv2d(in_channels=256, out_channels=256, kernel_size=(3, 3), stride=(1, 1), padding=1),
# 'bn3_2':nn.BatchNorm2d(num_features=256),
# 'relu3_2': nn.ReLU(),
# 'conv3_3': nn.Conv2d(in_channels=256, out_channels=256, kernel_size=(3, 3), stride=(1, 1), padding=1),
# 'bn3_3': nn.BatchNorm2d(num_features=256),
# 'relu3_3': nn.ReLU(),
# 'conv3_4': nn.Conv2d(in_channels=256, out_channels=256, kernel_size=(3, 3), stride=(1, 1), padding=1),
# 'bn3_4': nn.BatchNorm2d(num_features=256),
# 'relu3_4': nn.ReLU(),
# 'pool3': nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),
# # block4
# 'conv4_1': nn.Conv2d(in_channels=256, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=1),
# 'bn4_1': nn.BatchNorm2d(num_features=512),
# 'relu4_1': nn.ReLU(),
# 'conv4_2': nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=1),
# 'bn4_2': nn.BatchNorm2d(num_features=512),
# 'relu4_2': nn.ReLU(),
# 'conv4_3': nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=1),
# 'bn4_3': nn.BatchNorm2d(num_features=512),
# 'relu4_3': nn.ReLU(),
# 'conv4_4': nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=1),
# 'bn4_4': nn.BatchNorm2d(num_features=512),
# 'relu4_4': nn.ReLU(),
# 'pool4': nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),
# # block5
# 'conv5_1': nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=1),
# 'bn5_1': nn.BatchNorm2d(num_features=512),
# 'relu5_1': nn.ReLU(),
# 'conv5_2': nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=1),
# 'bn5_2': nn.BatchNorm2d(num_features=512),
# 'relu5_2': nn.ReLU(),
# 'conv5_3': nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=1),
# 'bn5_3': nn.BatchNorm2d(num_features=512),
# 'relu5_3': nn.ReLU(),
# 'conv5_4': nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=1),
# 'bn5_4': nn.BatchNorm2d(num_features=512),
# 'relu5_4': nn.ReLU(),
# 'pool5': nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
# })
# self.fc_layers = nn.ModuleDict({
# # classifier
# 'fc1': nn.Linear(512 * (int)(img_size * img_size / 32 / 32), 4096),
# 'relu1': nn.ReLU(),
# 'drop1': nn.Dropout(0.5),
# 'fc2': nn.Linear(4096, 4096),
# 'relu2': nn.ReLU(),
# 'drop2': nn.Dropout(0.5),
# 'fc3': nn.Linear(4096, num_class)
# })
# def forward(self,x):
# for _,layer in self.conv_layers.items():
# x = layer(x)
# output = x.view(x.size()[0], -1)
# for _,layer in self.fc_layers.items():
# output = layer(output)
# out = F.softmax(output,dim = 1) # 这里不softmax也行 影响不大
# return out
# def quantize(self, num_bits=8):
# self.quantize_conv_layers=nn.ModuleDict({
# # qi=true: 前一层输出的结果是没有量化过的,需要量化。 maxpool和relu都不会影响INT和minmax,所以在这俩之后的层的pi是false
# #若前一层是conv,数据minmax被改变,则需要qi=true来量化
# 'qconv1_1': QConv2d(self.conv_layers['conv1_1'], qi=True, qo=True, num_bits=num_bits),
# 'qrelu1_1': QReLU(),
# 'qconvbnrelu1_1': QConvBNReLU(self.conv_layers['conv1_2'],self.conv_layers['bn1_1'],qi=False,qo=True,num_bits=num_bits),
# 'qpool1': QMaxPooling2d(kernel_size=2,stride=2,padding=0),
# # block2
# 'qconvbnrelu2_1': QConvBNReLU(self.conv_layers['conv2_1'], self.conv_layers['bn2_1'], qi=False, qo=True, num_bits=num_bits),
# 'qconvbnrelu2_2': QConvBNReLU(self.conv_layers['conv2_2'], self.conv_layers['bn2_2'], qi=False, qo=True, num_bits=num_bits),
# 'qpool2': QMaxPooling2d(kernel_size=2,stride=2,padding=0),
# # block3
# 'qconvbnrelu3_1': QConvBNReLU(self.conv_layers['conv3_1'], self.conv_layers['bn3_1'], qi=False, qo=True,
# num_bits=num_bits),
# 'qconvbnrelu3_2': QConvBNReLU(self.conv_layers['conv3_2'], self.conv_layers['bn3_2'], qi=False, qo=True,
# num_bits=num_bits),
# 'qconvbnrelu3_3': QConvBNReLU(self.conv_layers['conv3_3'], self.conv_layers['bn3_3'], qi=False, qo=True,
# num_bits=num_bits),
# 'qconvbnrelu3_4': QConvBNReLU(self.conv_layers['conv3_4'], self.conv_layers['bn3_4'], qi=False, qo=True,
# num_bits=num_bits),
# 'qpool3': QMaxPooling2d(kernel_size=2,stride=2,padding=0),
# # block4
# 'qconvbnrelu4_1': QConvBNReLU(self.conv_layers['conv4_1'], self.conv_layers['bn4_1'], qi=False, qo=True,
# num_bits=num_bits),
# 'qconvbnrelu4_2': QConvBNReLU(self.conv_layers['conv4_2'], self.conv_layers['bn4_2'], qi=False, qo=True,
# num_bits=num_bits),
# 'qconvbnrelu4_3': QConvBNReLU(self.conv_layers['conv4_3'], self.conv_layers['bn4_3'], qi=False, qo=True,
# num_bits=num_bits),
# 'qconvbnrelu4_4': QConvBNReLU(self.conv_layers['conv4_4'], self.conv_layers['bn4_4'], qi=False, qo=True,
# num_bits=num_bits),
# 'qpool4': QMaxPooling2d(kernel_size=2,stride=2,padding=0),
# # block5
# 'qconvbnrelu5_1': QConvBNReLU(self.conv_layers['conv5_1'], self.conv_layers['bn5_1'], qi=False, qo=True,
# num_bits=num_bits),
# 'qconvbnrelu5_2': QConvBNReLU(self.conv_layers['conv5_2'], self.conv_layers['bn5_2'], qi=False, qo=True,
# num_bits=num_bits),
# 'qconvbnrelu5_3': QConvBNReLU(self.conv_layers['conv5_3'], self.conv_layers['bn5_3'], qi=False, qo=True,
# num_bits=num_bits),
# 'qconvbnrelu5_4': QConvBNReLU(self.conv_layers['conv5_4'], self.conv_layers['bn5_4'], qi=False, qo=True,
# num_bits=num_bits),
# 'qpool5': QMaxPooling2d(kernel_size=2,stride=2,padding=0)
# })
# self.quantize_fc_layers = nn.ModuleDict({
# 'qfc1': QLinear(self.fc_layers['fc1'],qi=False,qo=True,num_bits=num_bits),
# 'qrelu1': QReLU(),
# 'qdrop1': nn.Dropout(0.5),
# 'qfc2': QLinear(self.fc_layers['fc2'],qi=False,qo=True,num_bits=num_bits),
# 'qrelu2': QReLU(),
# 'qdrop2': nn.Dropout(0.5),
# 'qfc3': QLinear(self.fc_layers['fc3'],qi=False,qo=True,num_bits=num_bits)
# })
# def quantize_forward(self, x):
# for _, layer in self.quantize_conv_layers.items():
# x = layer(x)
# output = x.view(x.size()[0],-1)
# for s, layer in self.quantize_fc_layers.items():
# # if (s=='qrelu1') == True or (s=='qrelu2')==True:
# # output = nn.Dropout(0.5)
# # else:
# output = layer(output)
# out = F.softmax(output, dim=1) # 这里不softmax也行 影响不大 算loss用
# return out
# def freeze(self):
# self.quantize_conv_layers['qconv1_1'].freeze()
# self.quantize_conv_layers['qrelu1_1'].freeze(self.quantize_conv_layers['qconv1_1'].qo)
# self.quantize_conv_layers['qconvbnrelu1_1'].freeze(qi=self.quantize_conv_layers['qconv1_1'].qo)
# #self.quantize_conv_layers['qconvbnrelu1_1'].freeze(qi=self.quantize_conv_layers['qrelu1_1'].qo)
# self.quantize_conv_layers['qpool1'].freeze(self.quantize_conv_layers['qconvbnrelu1_1'].qo)
# self.quantize_conv_layers['qconvbnrelu2_1'].freeze(qi=self.quantize_conv_layers['qconvbnrelu1_1'].qo)
# self.quantize_conv_layers['qconvbnrelu2_2'].freeze(qi=self.quantize_conv_layers['qconvbnrelu2_1'].qo)
# self.quantize_conv_layers['qpool2'].freeze(self.quantize_conv_layers['qconvbnrelu2_2'].qo)
# self.quantize_conv_layers['qconvbnrelu3_1'].freeze(qi=self.quantize_conv_layers['qconvbnrelu2_2'].qo)
# #self.quantize_conv_layers['qconvbnrelu3_1'].freeze(qi=self.quantize_conv_layers['qpool2'].qo)
# self.quantize_conv_layers['qconvbnrelu3_2'].freeze(qi=self.quantize_conv_layers['qconvbnrelu3_1'].qo)
# self.quantize_conv_layers['qconvbnrelu3_3'].freeze(qi=self.quantize_conv_layers['qconvbnrelu3_2'].qo)
# self.quantize_conv_layers['qconvbnrelu3_4'].freeze(qi=self.quantize_conv_layers['qconvbnrelu3_3'].qo)
# self.quantize_conv_layers['qpool3'].freeze(self.quantize_conv_layers['qconvbnrelu3_4'].qo)
# self.quantize_conv_layers['qconvbnrelu4_1'].freeze(qi=self.quantize_conv_layers['qconvbnrelu3_4'].qo)
# #self.quantize_conv_layers['qconvbnrelu4_1'].freeze(qi=self.quantize_conv_layers['qpool3'].qo)
# self.quantize_conv_layers['qconvbnrelu4_2'].freeze(qi=self.quantize_conv_layers['qconvbnrelu4_1'].qo)
# self.quantize_conv_layers['qconvbnrelu4_3'].freeze(qi=self.quantize_conv_layers['qconvbnrelu4_2'].qo)
# self.quantize_conv_layers['qconvbnrelu4_4'].freeze(qi=self.quantize_conv_layers['qconvbnrelu4_3'].qo)
# self.quantize_conv_layers['qpool4'].freeze(self.quantize_conv_layers['qconvbnrelu4_4'].qo)
# self.quantize_conv_layers['qconvbnrelu5_1'].freeze(qi=self.quantize_conv_layers['qconvbnrelu4_4'].qo)
# #self.quantize_conv_layers['qconvbnrelu5_1'].freeze(qi=self.quantize_conv_layers['qpool4'].qo)
# self.quantize_conv_layers['qconvbnrelu5_2'].freeze(qi=self.quantize_conv_layers['qconvbnrelu5_1'].qo)
# self.quantize_conv_layers['qconvbnrelu5_3'].freeze(qi=self.quantize_conv_layers['qconvbnrelu5_2'].qo)
# self.quantize_conv_layers['qconvbnrelu5_4'].freeze(qi=self.quantize_conv_layers['qconvbnrelu5_3'].qo)
# self.quantize_conv_layers['qpool5'].freeze(self.quantize_conv_layers['qconvbnrelu5_4'].qo)
# self.quantize_fc_layers['qfc1'].freeze(qi=self.quantize_conv_layers['qconvbnrelu5_4'].qo)
# #self.quantize_fc_layers['qfc1'].freeze(qi=self.quantize_conv_layers['qpool5'].qo)
# self.quantize_fc_layers['qrelu1'].freeze(self.quantize_fc_layers['qfc1'].qo)
# self.quantize_fc_layers['qfc2'].freeze(qi=self.quantize_fc_layers['qfc1'].qo)
# #self.quantize_fc_layers['qfc2'].freeze(qi=self.quantize_fc_layers['qrelu1'].qo)
# self.quantize_fc_layers['qrelu2'].freeze(self.quantize_fc_layers['qfc2'].qo)
# self.quantize_fc_layers['qfc3'].freeze(qi=self.quantize_fc_layers['qfc2'].qo)
# #self.quantize_fc_layers['qfc3'].freeze(qi=self.quantize_fc_layers['qrelu2'].qo)
# def quantize_inference(self, x):
# x = self.quantize_conv_layers['qconv1_1'].qi.quantize_tensor(x)
# for s, layer in self.quantize_conv_layers.items():
# x=layer.quantize_inference(x)
# output = x.view(x.size()[0], -1)
# for s, layer in self.quantize_fc_layers.items():
# # elif (s == 'qrelu1') == True or (s == 'qrelu2') == True:
# # output = nn.Dropout(0.5)
# # if (s == 'qdrop1')==True or (s=='qdrop2')==True:
# # output = F.dropout(output,0.45)
# # else:
# if ((s == 'qdrop1') == False ) and ((s == 'qdrop2') == False):
# output = layer.quantize_inference(output)
# else:
# output = output
# output = self.quantize_fc_layers['qfc3'].qo.dequantize_tensor(output)
# out = F.softmax(output, dim=1) # 这里应该用 Qsoftmax可能好些 之后改改
# return out
class LeNet(nn.Module):
# CONV FLOPs: 考虑bias:(2 * C_in * K_h * K_w )* H_out * W_out * C_out
# 不考虑bias: (2 * C_in * K_h * K_w -1)* H_out * W_out * C_out
# FCN FLOPs: 考虑bias: (2 * I )* O
# 不考虑bias: (2 * I - 1) * O
def __init__(self, img_size=32, input_channel=3, num_class=10, n_exp=4, mode=1):
super().__init__()
self.conv_layers = nn.ModuleDict({
# block1
'conv1': nn.Conv2d(3,6,5), # (2*3*5*5) * 32*32*6 (bias占其中的32*32*6) 6144/921600
'reluc1': nn.ReLU(),
'pool1': nn.MaxPool2d(2,2),
# block2
'conv2': nn.Conv2d(6,16,5), # (2*6*5*5) * 16*16*16 (bias占其中的16*16*6) 1536/1228800
'reluc2': nn.ReLU(),
'pool2': nn.MaxPool2d(2,2),
})
self.fc_layers = nn.ModuleDict({
# classifier
'fc1': nn.Linear(16*5*5,120), # (2*16*5*5)*120 (bias占其中的120) 120/96000
'reluf1': nn.ReLU(),
'fc2': nn.Linear(120,84), # (2*120)*84 (bias占其中的84) 84/2016
'reluf2': nn.ReLU(),
'fc3': nn.Linear(84, num_class)
})
self.mode = mode
self.n_exp = n_exp
def forward(self,x):
for _,layer in self.conv_layers.items():
x = layer(x)
output = x.view(-1,16*5*5)
for _,layer in self.fc_layers.items():
output = layer(output)
out = F.softmax(output,dim = 1) # 这里不softmax也行 影响不大
return out
def quantize(self, num_bits=8):
self.quantize_conv_layers=nn.ModuleDict({
# qi=true: 前一层输出的结果是没有量化过的,需要量化。 maxpool和relu都不会影响INT和minmax,所以在这俩之后的层的pi是false
#若前一层是conv,数据minmax被改变,则需要qi=true来量化
'qconv1': QConv2d(self.conv_layers['conv1'], qi=True, qo=True, num_bits=num_bits, n_exp=self.n_exp, mode=self.mode),
'qreluc1': QReLU(n_exp=self.n_exp, mode=self.mode),
'qpool1': QMaxPooling2d(kernel_size=2,stride=2,padding=0, n_exp=self.n_exp, mode=self.mode),
'qconv2': QConv2d(self.conv_layers['conv2'], qi=False, qo=True, num_bits=num_bits, n_exp=self.n_exp, mode=self.mode),
'qreluc2': QReLU(n_exp=self.n_exp, mode=self.mode),
'qpool2': QMaxPooling2d(kernel_size=2, stride=2, padding=0, n_exp=self.n_exp, mode=self.mode)
})
self.quantize_fc_layers = nn.ModuleDict({
'qfc1': QLinear(self.fc_layers['fc1'],qi=False,qo=True,num_bits=num_bits, n_exp=self.n_exp, mode=self.mode),
'qreluf1': QReLU(n_exp=self.n_exp, mode=self.mode),
'qfc2': QLinear(self.fc_layers['fc2'],qi=False,qo=True,num_bits=num_bits, n_exp=self.n_exp, mode=self.mode),
'qreluf2': QReLU(n_exp=self.n_exp, mode=self.mode),
'qfc3': QLinear(self.fc_layers['fc3'],qi=False,qo=True,num_bits=num_bits, n_exp=self.n_exp, mode=self.mode)
})
def quantize_forward(self, x):
for _, layer in self.quantize_conv_layers.items():
x = layer(x)
output = x.view(-1,16*5*5)
for s, layer in self.quantize_fc_layers.items():
output = layer(output)
out = F.softmax(output, dim=1) # 这里不softmax也行 影响不大 算loss用
return out
def freeze(self):
self.quantize_conv_layers['qconv1'].freeze()
self.quantize_conv_layers['qreluc1'].freeze(self.quantize_conv_layers['qconv1'].qo)
self.quantize_conv_layers['qpool1'].freeze(self.quantize_conv_layers['qconv1'].qo)
self.quantize_conv_layers['qconv2'].freeze(self.quantize_conv_layers['qconv1'].qo)
self.quantize_conv_layers['qreluc2'].freeze(self.quantize_conv_layers['qconv2'].qo)
self.quantize_conv_layers['qpool2'].freeze(self.quantize_conv_layers['qconv2'].qo)
self.quantize_fc_layers['qfc1'].freeze(qi=self.quantize_conv_layers['qconv2'].qo)
self.quantize_fc_layers['qreluf1'].freeze(self.quantize_fc_layers['qfc1'].qo)
self.quantize_fc_layers['qfc2'].freeze(qi=self.quantize_fc_layers['qfc1'].qo)
self.quantize_fc_layers['qreluf2'].freeze(self.quantize_fc_layers['qfc2'].qo)
self.quantize_fc_layers['qfc3'].freeze(qi=self.quantize_fc_layers['qfc2'].qo)
def fakefreeze(self):
self.quantize_conv_layers['qconv1'].fakefreeze()
self.quantize_conv_layers['qreluc1'].fakefreeze(self.quantize_conv_layers['qconv1'].qo)
self.quantize_conv_layers['qpool1'].fakefreeze(self.quantize_conv_layers['qconv1'].qo)
self.quantize_conv_layers['qconv2'].fakefreeze(self.quantize_conv_layers['qconv1'].qo)
self.quantize_conv_layers['qreluc2'].fakefreeze(self.quantize_conv_layers['qconv2'].qo)
self.quantize_conv_layers['qpool2'].fakefreeze(self.quantize_conv_layers['qconv2'].qo)
self.quantize_fc_layers['qfc1'].fakefreeze(qi=self.quantize_conv_layers['qconv2'].qo)
self.quantize_fc_layers['qreluf1'].fakefreeze(self.quantize_fc_layers['qfc1'].qo)
self.quantize_fc_layers['qfc2'].fakefreeze(qi=self.quantize_fc_layers['qfc1'].qo)
self.quantize_fc_layers['qreluf2'].fakefreeze(self.quantize_fc_layers['qfc2'].qo)
self.quantize_fc_layers['qfc3'].fakefreeze(qi=self.quantize_fc_layers['qfc2'].qo)
def quantize_inference(self, x):
x = self.quantize_conv_layers['qconv1'].qi.quantize_tensor(x, self.mode)
for s, layer in self.quantize_conv_layers.items():
x = layer.quantize_inference(x)
output = x.view( -1,16*5*5)
for s, layer in self.quantize_fc_layers.items():
output = layer.quantize_inference(output)
# 只有mode1需要出现范围映射,将量化后的数据恢复到原数据相似的范围,PoT无需,其自带恢复性
if self.mode == 1:
output = self.quantize_fc_layers['qfc3'].qo.dequantize_tensor(output, self.mode)
out = F.softmax(output, dim=1) # 这里应该用 Qsoftmax可能好些 之后改改
return out
class Net(nn.Module):
def __init__(self, num_channels=1):
super(Net, self).__init__()
# self.conv1 = nn.Conv2d(num_channels, 40, 3, 1)
# self.conv2 = nn.Conv2d(40, 40, 3, 1, groups=20)
# self.fc = nn.Linear(5*5*40, 10)
def forward(self, x):
x = F.relu(self.conv1(x))
x = F.max_pool2d(x, 2, 2)
x = F.relu(self.conv2(x))
x = F.max_pool2d(x, 2, 2)
x = x.view(-1, 5*5*40) # 重新定义矩阵的形状
x = self.fc(x)
return x
# 对模型进行初步的量化 (此处还没开始训练量化模型) 对于量化参数的确定是一个预先的过程(之后对量化模型的训练是fine tune)
def quantize(self, num_bits=8):
# 这里仅第一个qi=True,因为在forward的时候除了pool和relu外,每层最后都会根据qo调整一下x,完成量化再恢复的工作,所以x实际上是保持着最新版本的量化再恢复,只有weight需要在各个层不断调整量化再恢复的情况。
self.qconv1 = QConv2d(self.conv1, qi=True, qo=True, num_bits=num_bits)
self.qrelu1 = QReLU()
self.qmaxpool2d_1 = QMaxPooling2d(kernel_size=2, stride=2, padding=0)
self.qconv2 = QConv2d(self.conv2, qi=False, qo=True, num_bits=num_bits) #qi=False的含义是无需在这层换量化参数scale,zeropoint
self.qrelu2 = QReLU()
self.qmaxpool2d_2 = QMaxPooling2d(kernel_size=2, stride=2, padding=0)
self.qfc = QLinear(self.fc, qi=False, qo=True, num_bits=num_bits)
# 训练量化模型时的forward函数
def quantize_forward(self, x):
x = self.qconv1(x)
x = self.qrelu1(x)
x = self.qmaxpool2d_1(x)
x = self.qconv2(x)
x = self.qrelu2(x)
x = self.qmaxpool2d_2(x)
x = x.view(-1, 5*5*40)
x = self.qfc(x)
return x
# 量化模型训练完之后把参数固定住 目的是规定当前层在推断的时候使用什么样的量化参数,量化参数决定了映射,量化后恢复的结果
# 考虑各个层最后对qo的更新,maxpool,relu,drop不会更新qo,conv会,因此只有出现conv后需要改下Qpram,其他的继承下去就行了(层与层输出与输入相连的x自带了这种继承关系,而Qpram的需要看情况(即,x是否可能有max,min范围的突破)来决定是否要更新)
def freeze(self):
self.qconv1.freeze()
self.qrelu1.freeze(self.qconv1.qo) # 就是作为qi 带conv的层后面需要用新的 (qo总是会在训练过程被更新的,因为有不一样的x和模型参数,是在Q... layer的forward过程中不断更新的,min,max是一个全局的统计效果,考虑到是fine tunning 一开始的minmax也不会太离谱)
self.qmaxpool2d_1.freeze(self.qconv1.qo)
self.qconv2.freeze(qi=self.qconv1.qo)
self.qrelu2.freeze(self.qconv2.qo) # relu和maxpool对Qpram不具备改变能力(因为min,max的统计是全局性质的,relu和poolmax对min和max都没有影响)
self.qmaxpool2d_2.freeze(self.qconv2.qo)
self.qfc.freeze(qi=self.qconv2.qo)
# 固定住量化模型参数后的推理 FP32入 过程中量化 FP32出
def quantize_inference(self, x):
qx = self.qconv1.qi.quantize_tensor(x)
qx = self.qconv1.quantize_inference(qx)
qx = self.qrelu1.quantize_inference(qx)
qx = self.qmaxpool2d_1.quantize_inference(qx)
qx = self.qconv2.quantize_inference(qx)
qx = self.qrelu2.quantize_inference(qx)
qx = self.qmaxpool2d_2.quantize_inference(qx)
qx = qx.view(-1, 5*5*40)
qx = self.qfc.quantize_inference(qx)
out = self.qfc.qo.dequantize_tensor(qx)
return out
class NetBN(nn.Module):
def __init__(self, num_channels=1):
super(NetBN, self).__init__()
self.conv1 = nn.Conv2d(num_channels, 40, 3, 1)
self.bn1 = nn.BatchNorm2d(40)
self.conv2 = nn.Conv2d(40, 40, 3, 1)
self.bn2 = nn.BatchNorm2d(40)
self.fc = nn.Linear(5 * 5 * 40, 10)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = F.relu(x)
x = F.max_pool2d(x, 2, 2)
x = self.conv2(x)
x = self.bn2(x)
x = F.relu(x)
x = F.max_pool2d(x, 2, 2)
x = x.view(-1, 5 * 5 * 40)
x = self.fc(x)
return x
def quantize(self, num_bits=8):
self.qconv1 = QConvBNReLU(self.conv1, self.bn1, qi=True, qo=True, num_bits=num_bits)
self.qmaxpool2d_1 = QMaxPooling2d(kernel_size=2, stride=2, padding=0)
self.qconv2 = QConvBNReLU(self.conv2, self.bn2, qi=False, qo=True, num_bits=num_bits)
self.qmaxpool2d_2 = QMaxPooling2d(kernel_size=2, stride=2, padding=0)
self.qfc = QLinear(self.fc, qi=False, qo=True, num_bits=num_bits)
def quantize_forward(self, x):
x = self.qconv1(x)
x = self.qmaxpool2d_1(x)
x = self.qconv2(x)
x = self.qmaxpool2d_2(x)
x = x.view(-1, 5*5*40)
x = self.qfc(x)
return x
def freeze(self):
self.qconv1.freeze()
self.qmaxpool2d_1.freeze(self.qconv1.qo)
self.qconv2.freeze(qi=self.qconv1.qo) # 因为maxpool不会改变min,max
self.qmaxpool2d_2.freeze(self.qconv2.qo)
self.qfc.freeze(qi=self.qconv2.qo) # 因为maxpool不会改变min,max
def quantize_inference(self, x):
qx = self.qconv1.qi.quantize_tensor(x)
qx = self.qconv1.quantize_inference(qx)
qx = self.qmaxpool2d_1.quantize_inference(qx)
qx = self.qconv2.quantize_inference(qx)
qx = self.qmaxpool2d_2.quantize_inference(qx)
qx = qx.view(-1, 5*5*40)
qx = self.qfc.quantize_inference(qx)
out = self.qfc.qo.dequantize_tensor(qx) # INT -> FP
return out
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from get_weight import *
from torch.utils.tensorboard import SummaryWriter
from torchvision import datasets, transforms
from torchvision.datasets import CIFAR10
from resnet import *
from torchvision.transforms import transforms
# import models
import time
import os
import argparse
# 定义模型
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 32, kernel_size=3)
self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
self.fc1 = nn.Linear(64 * 6 * 6, 512)
self.fc2 = nn.Linear(512, 10)
self.relu = nn.ReLU()
def forward(self, x):
x = self.relu(self.conv1(x))
x = self.pool(x)
x = self.relu(self.conv2(x))
x = self.pool(x)
x = torch.flatten(x, start_dim=1)
x = self.relu(self.fc1(x))
x = self.fc2(x)
return x
def train(model, optimizer, criterion, train_loader, device):
model.train()
running_loss = 0.0
flag = 0
cnt = 0
for i, data in enumerate(train_loader):
inputs, labels = data
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
histo, grads = (get_model_histogram(model))
if flag == 0:
flag = 1
grads_sum = grads
else:
for k,v in grads_sum.items():
grads_sum[k] += grads[k]
optimizer.step()
running_loss += loss.item()
train_loss = running_loss / len(train_loader)
for k, v in grads_sum.items():
grads_sum[k] = v / len(train_loader)
return train_loss,grads_sum
def evaluate(model, criterion, test_loader, device):
model.eval()
correct, total = 0, 0
with torch.no_grad():
for data in test_loader:
images, labels = data
images, labels = images.to(device), labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
accuracy = 100 * correct / total
return accuracy
def get_children(model: torch.nn.Module):
# get children form model!
# 为了后续也能够更新参数,需要用nn.ModuleList来承载
children = nn.ModuleList(model.children())
# print(children)
# 方便对其中的module进行后续的更新
flatt_children = nn.ModuleList()
# children = list(model.children())
# flatt_children = nn.ModuleList()
# flatt_children = []
if len(children) == 0:
# if model has no children; model is last child! :O
return model
else:
# look for children from children... to the last child!
for child in children:
try:
flatt_children.extend(get_children(child))
except TypeError:
flatt_children.append(get_children(child))
# print(flatt_children)
return flatt_children
if __name__ == "__main__":
# torch.cuda.empty_cache()
parser = argparse.ArgumentParser(description='PyTorch FP32 Training')
parser.add_argument('-m', '--model', metavar='MODEL ARCH', default='resnet18')
parser.add_argument('-e','--epochs', default=100, type=int, metavar='EPOCHS', help='number of total epochs to run')
parser.add_argument('-b', '--batch_size', default=128, type=int, metavar='BATCH SIZE', help='mini-batch size (default: 128)')
parser.add_argument('-j','--workers', default=4, type=int, metavar='WORKERS',help='number of data loading workers (default: 4)')
parser.add_argument('-lr', '--learning-rate', default=0.001, type=float, metavar='LR', help='initial learning rate', dest='lr')
parser.add_argument('-t', '--test', dest='test', action='store_true', help='test model on test set')
# models = ['resnet18', 'resnet50', 'resnet152','resnet18']
# 训练参数
args = parser.parse_args()
num_epochs = args.epochs
print(num_epochs)
batch_size = args.batch_size
print(batch_size)
num_workers = args.workers
lr = args.lr
best_acc = float("-inf")
start_time = time.time()
# 模型、损失函数和优化器
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 加入设备选择
print(device)
# model = Net().to(device) # 将模型移动到 device 上
# model = resnet18().to(device)
# model = models.__dict__[args.model]().to(device)
# t = torch.cuda.get_device_properties(0).total_memory
# r = torch.cuda.memory_reserved(0)
# a = torch.cuda.memory_allocated(0)
# f = r-a # free memory
# print(f"Total memory: {t}")
# print(f"Reserved memory: {r}")
# print(f"Allocated memory: {a}")
# print(f"Free memory: {f}")
if args.model == 'resnet18' :
model = resnet18().to(device)
elif args.model == 'resnet50' :
model = resnet50().to(device)
elif args.model == 'resnet152' :
model = resnet152().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)
print("ok!")
# 数据并行
if torch.cuda.device_count() > 1:
print(f"Using {torch.cuda.device_count()} GPUs")
model = nn.DataParallel(model)
# 加载数据
train_loader = torch.utils.data.DataLoader(
datasets.CIFAR10('./project/p/data', train=True, download=False,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(
(0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])),
batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True
)
test_loader = torch.utils.data.DataLoader(
datasets.CIFAR10('./project/p/data', train=False, download=False, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465),
(0.2023, 0.1994, 0.2010))
])),
batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True
)
# train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
# test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
# 学习率调度器
lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
# TensorBoard
# WARN
# writer = SummaryWriter(log_dir='./project/p/models_log/trail/full_log')
writer = SummaryWriter(log_dir='./project/p/models_log/' + args.model + '/full_log')
# Early Stopping 参数
patience = 5
count = 0
# WARN
# save_dir = './project/p/ckpt/trail'
save_dir = './project/p/ckpt/' + args.model
if not os.path.isdir(save_dir):
os.makedirs(save_dir, mode=0o777)
os.chmod(save_dir, mode=0o777)
# checkpoint_dir = './project/p/checkpoint/cifar-10_trail_model'
checkpoint_dir = './project/p/checkpoint/cifar-10_' + args.model
if not os.path.isdir(checkpoint_dir):
os.makedirs(checkpoint_dir, mode=0o777)
os.chmod(checkpoint_dir, mode=0o777)
# 训练循环
if args.test == True:
model.load_state_dict(torch.load(save_dir+'/' + args.model + '.pt'))
acc = evaluate(model, criterion, test_loader, device=device)
print(f"test accuracy: {acc:.2f}%")
for name, module in model.named_modules():
print(f"{name}: {module}\n")
print('========================================================')
print('========================================================')
model.quantize()
for name , layer in model.quantize_layers.items():
print(f"Layer {name}: {layer} ") # 足够遍历了
else:
for epoch in range(num_epochs):
# 训练模型并记录 loss
train_loss,grads_sum = train(model, optimizer, criterion,
train_loader, device=device)
writer.add_scalar("Training Loss", train_loss, epoch + 1)
# 评估模型并记录 accuracy
if (epoch + 1) % 5 == 0:
acc = evaluate(model, criterion, test_loader, device=device)
writer.add_scalar("Validation Accuracy", acc, epoch + 1)
checkpoint = {
'model': model.state_dict(),
'optimizer': optimizer.state_dict(),
'epoch': epoch,
'grads': grads_sum,
'accuracy':acc
}
# for name, param in model.named_parameters():
# writer.add_histogram(tag=name + '_grad', values=param.grad, global_step=epoch)
# writer.add_histogram(tag=name + '_data', values=param.data, global_step=epoch)
for name, param in grads_sum.items():
# 此处的grad是累加值吧 不是平均值
writer.add_histogram(tag=name + '_grad', values=param, global_step=epoch)
# 取这个epoch最后一个batch算完之后的weight
for name, param in model.named_parameters():
writer.add_histogram(tag=name + '_data', values=param.data, global_step=epoch)
# WARN
# torch.save(checkpoint, checkpoint_dir + '/ckpt_cifar-10_trail_model%s.pt' % (str(epoch+1)))
torch.save(checkpoint, checkpoint_dir + '/ckpt_cifar-10_' + args.model + '_%s.pt' % (str(epoch+1)))
# 存储最好的模型
if acc > best_acc:
best_acc = acc
count = 0
# WARN
# torch.save(model.state_dict(), save_dir+'/model_trail.pt')
torch.save(model.state_dict(), save_dir+'/' + args.model + '.pt')
else:
count += 1
print(
f"Epoch {epoch + 1}/{num_epochs}, Train Loss: {train_loss:.5f}, Val Acc: {acc:.2f}%")
# 判断是否需要 early stopping
if count == patience:
print(f"No improvement after {patience} epochs. Early stop!")
break
# 更新学习率
lr_scheduler.step()
# 训练用时和最佳验证集准确率
print(f"Training took {(time.time() - start_time) / 60:.2f} minutes")
print(f"Best validation accuracy: {best_acc:.2f}%")
# 加载并测试最佳模型
# model.load_state_dict(torch.load("best_model.pth"))
# model.to(device)
# test_acc = evaluate(model, criterion, test_loader, device="cuda")
# print(f"Test Accuracy: {test_acc:.2f}%")
# 关闭 TensorBoard 写入器
writer.close()
# -*- coding: utf-8 -*-
from torch.serialization import load
from model import *
import argparse
import torch
import sys
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
import os
import os.path as osp
from torch.utils.tensorboard import SummaryWriter
# 为了得到PTQ的权重数据的伪量化版 (先quantize再dequantize,与full precision的权重数据分布相似,便于用wasserstein距离求相似度)
def direct_quantize(model, test_loader, device):
for i, (data, target) in enumerate(test_loader, 1):
data, target = data.to(device), target.to(device)
output = model.quantize_forward(data) # 这里会依次调用model中各个层的forward,则会update qw
if i % 5000 == 0:
break
print('direct quantization finish')
def full_inference(model, test_loader, device):
correct = 0
for i, (data, target) in enumerate(test_loader, 1):
data, target = data.to(device), target.to(device)
output = model(data)
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
print('\nTest set: Full Model Accuracy: {:.4f}%\n'.format(100. * correct / len(test_loader.dataset)))
def quantize_inference(model, test_loader, device):
correct = 0
for i, (data, target) in enumerate(test_loader, 1):
data, target = data.to(device), target.to(device)
output = model.quantize_inference(data)
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
acc = 100. * correct / len(test_loader.dataset)
print('\nTest set: Quant Model Accuracy: {:.4f}%\n'.format(acc))
return acc
if __name__ == "__main__":
d1 = sys.argv[1]
batch_size = 32
using_bn = True
load_quant_model_file = None
# load_model_file = None
net = 'LeNet' # 1:
acc = 0
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
train_loader = torch.utils.data.DataLoader(
datasets.CIFAR10('data', train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])),
batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=False
)
test_loader = torch.utils.data.DataLoader(
datasets.CIFAR10('data', train=False, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])),
batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=False
)
if using_bn:
model = LeNet().to(device)
# 生成梯度分布图的时候是从0开始训练的
model.load_state_dict(torch.load('ckpt/cifar-10_lenet_bn.pt', map_location='cpu'))
# else:
# model = Net()
# model.load_state_dict(torch.load('ckpt/mnist_cnn.pt', map_location='cpu'))
# save_file = "ckpt/mnist_cnn_ptq.pt"
# model.to(device)
model.eval()
full_inference(model, test_loader, device)
num_bits = int(d1)
model.quantize(num_bits=num_bits)
model.eval()
print('Quantization bit: %d' % num_bits)
dir_name = './ptq_fake_log/' + 'quant_bit_' + str(d1) + '_log'
if not os.path.isdir(dir_name):
os.makedirs(dir_name, mode=0o777)
os.chmod(dir_name, mode=0o777)
qwriter = SummaryWriter(log_dir=dir_name)
# for name, param in model.named_parameters():
# qwriter.add_histogram(tag=name + '_data', values=param.data)
if load_quant_model_file is not None:
model.load_state_dict(torch.load(load_quant_model_file))
print("Successfully load quantized model %s" % load_quant_model_file)
direct_quantize(model, train_loader, device)
model.fakefreeze() # 权重量化
for name, param in model.named_parameters():
qwriter.add_histogram(tag=name + '_data', values=param.data)
dir_name ='ckpt/ptq_fakefreeze'
if not os.path.isdir(dir_name):
os.makedirs(dir_name, mode=0o777)
os.chmod(dir_name, mode=0o777)
save_file = 'ckpt/ptq_fakefreeze/cifar-10_lenet_bn_ptq_' + str(d1) + '_.pt'
torch.save(model.state_dict(), save_file)
# -*- coding: utf-8 -*-
from model import *
# from easydict import EasyDict
# from cleverhans.torch.attacks.fast_gradient_method import fast_gradient_method
# from cleverhans.torch.attacks.projected_gradient_descent import (
# projected_gradient_descent,
# )
import argparse
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torchvision import datasets, transforms
import os
import os.path as osp
import sys
import time
# import matplotlib.pyplot as plt
# import matplotlib
# sys.path.append("./project/p")
from get_weight import *
from torch.utils.tensorboard import SummaryWriter
def quantize_aware_training(model, device, train_loader, optimizer, epoch):
lossLayer = torch.nn.CrossEntropyLoss()
flag = 0
cnt = 0
losses=[]
for batch_idx, (data, target) in enumerate(train_loader, 1):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model.quantize_forward(data) # 各个layer的forward
loss = lossLayer(output, target) #此处loss与layers联系起来
loss.backward()
# cnt = cnt + 1
losses.append(loss)
histo, grads = (get_model_histogram(model))
if flag == 0:
flag = 1
grads_sum = grads
# 对一个epoch的每个batch的梯度求和
else:
for k, v in grads_sum.items():
grads_sum[k] += grads[k]
#print(k)
optimizer.step()
if batch_idx % 50 == 0:
print('Quantize Aware Training Epoch: {} [{}/{}]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset), loss.item()
))
# print(grad_sum['conv_layers.conv1.weight'])
# sys.exit(0)
# print('batch_idx: ' +str(batch_idx))
# print('cnt: ' + str(cnt))
# 一个epoch的平均梯度
for k, v in grads_sum.items():
grads_sum[k] = v/len(train_loader.dataset)
return grads_sum,losses
#
# print(grads_sum)
#
# histo = get_grad_histogram(grads_sum)
#
# for s,_ in grads_sum.items():
# data = histo[s]
# bins = data['bins']
# histogram = data['histogram']
# max_idx = np.argmax(histogram)
# min_idx = np.argmin(histogram)
# width = abs(bins[max_idx] - bins[min_idx])
#
# plt.figure(figsize=(9, 6))
# plt.bar(bins[:-1], histogram, width=width)
# #plt.show()
#
# plt.savefig('diff_fig/int'+ sys.argv[1] + '/' + s +'.jpg')
#
# np.save('diff_fig/int' + sys.argv[1] + '/grads_sum.npy', grads_sum)
# sys.exit(0)
def full_inference(model, test_loader):
correct = 0
# report = EasyDict(nb_test=0, correct=0, correct_fgm=0, correct_pgd=0)
for i, (data, target) in enumerate(test_loader, 1):
data, target = data.to(device), target.to(device)
with torch.no_grad():
output = model(data)
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
#x_fgm = fast_gradient_method(model, data, 0.01, np.inf)
#x_pgd = projected_gradient_descent(model, data, 0.01, 0.01, 40, np.inf)
# model prediction on clean examples
# _, y_pred = model(data).max(1)
# model prediction on FGM adversarial examples
#_, y_pred_fgm = model(x_fgm).max(1)
# model prediction on PGD adversarial examples
#_, y_pred_pgd = model(x_pgd).max(1)
# report.nb_test += target.size(0)
# report.correct += y_pred.eq(target).sum().item()
#report.correct_fgm += y_pred_fgm.eq(target).sum().item()
#report.correct_pgd += y_pred_pgd.eq(target).sum().item()
print('\nTest set: Full Model Accuracy: {:.0f}%\n'.format(100. * correct / len(test_loader.dataset)))
# print('\nTest set: Full Model Accuracy:')
# print(
# "test acc on clean examples (%): {:.3f}".format(
# report.correct / report.nb_test * 100.0
# )
# )
# print(
# "test acc on FGM adversarial examples (%): {:.3f}".format(
# report.correct_fgm / report.nb_test * 100.0
# )
# )
# print(
# "test acc on PGD adversarial examples (%): {:.3f}".format(
# report.correct_pgd / report.nb_test * 100.0
# )
# )
print('============================================')
def quantize_inference(model, test_loader):
correct = 0
acc=0
# report = EasyDict(nb_test=0, correct=0, correct_fgm=0, correct_pgd=0)
for i, (data, target) in enumerate(test_loader, 1):
data, target = data.to(device), target.to(device)
output = model.quantize_inference(data)
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
acc = 100. * correct / len(test_loader.dataset)
print('\nTest set: Quant Model Accuracy: {:.0f}%\n'.format(acc))
# data, target = data.to(device), target.to(device)
# x_fgm = fast_gradient_method(model, data, 0.01, np.inf)
# x_pgd = projected_gradient_descent(model, data, 0.01, 0.01, 40, np.inf)
# model prediction on clean examples
# _, y_pred = model.quantize_inference(data).max(1)
# model prediction on FGM adversarial examples
# _, y_pred_fgm = model.quantize_inference(x_fgm).max(1)
# model prediction on PGD adversarial examples
#_, y_pred_pgd = model.quantize_inference(x_pgd).max(1)
# report.nb_test += target.size(0)
# report.correct += y_pred.eq(target).sum().item()
# report.correct_fgm += y_pred_fgm.eq(target).sum().item()
# report.correct_pgd += y_pred_pgd.eq(target).sum().item()
# acc = report.correct / report.nb_test * 100.0
# print(
# "test acc on clean examples (%): {:.3f}".format(acc
# )
# )
# print(
# "test acc on FGM adversarial examples (%): {:.3f}".format(
# report.correct_fgm / report.nb_test * 100.0
# )
# )
# print(
# "test acc on PGD adversarial examples (%): {:.3f}".format(
# report.correct_pgd / report.nb_test * 100.0
# )
# )
return acc
if __name__ == "__main__":
# d1=20
# d2=5
d1 = sys.argv[1] # num_bits
d2 = sys.argv[2] # epochs
d3 = sys.argv[3] # mode
d4 = sys.argv[4] # n_exp
batch_size = 32
test_batch_size = 32 # test的与train的batch_size相等才更合理点吧 有batch norm
seed = 1
epochs = int(d2)
lr = 0.001 # 1%*0.01
momentum = 0.5
net = 'LeNet' # 1:
acc=0
using_bn = True
load_quant_model_file = None
# load_quant_model_file = "ckpt/mnist_cnnbn_qat.pt"
torch.manual_seed(seed)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
# datasets.imagenet
train_loader = torch.utils.data.DataLoader(
datasets.CIFAR10('./project/p/data', train=True, download=False,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])),
batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=False
)
test_loader = torch.utils.data.DataLoader(
datasets.CIFAR10('./project/p/data', train=False, download=False,transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])),
batch_size=test_batch_size, shuffle=True, num_workers=1, pin_memory=False
)
#if using_bn:
#model = NetBN()
# if (net=='VGG19') == True:
# model = VGG_19().to(device)
# model.load_state_dict(torch.load('ckpt/cifar-10_vgg19_bn.pt', map_location='cpu'))
# save_file = "ckpt/cifar-10_vgg19_bn_qat.pt"
# elif (net=='LeNet') == True:
model = LeNet(n_exp=int(d4), mode = int(d3)).to(device)
#生成梯度分布图的时候是从0开始训练的
# fine tune qat
#model.load_state_dict(torch.load('ckpt/cifar-10_lenet_bn.pt', map_location='cuda'))
# save_file = "ckpt/cifar-10_lenet_bn_qat.pt"
# else:
# model = Net().to(device)
# model.load_state_dict(torch.load('ckpt/cifar-10_vgg19.pt', map_location='cpu'))
# save_file = "ckpt/cifar-10_vgg19_qat.pt"
model.to(device)
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
#考虑用Adam
# INT
# writer = SummaryWriter(log_dir='./scratchlog/quant_bit_' + str(d1) + '_log')
writer = SummaryWriter(log_dir='./project/p/scratchlog/mode' + str(d3) + '_' + str(d4) + '/quant_bit_' + str(d1) + '_log')
model.eval() # 评价模式(不更新梯度,不dropout)
full_inference(model, test_loader)
num_bits = int(d1)
# 先进行self中的各个量化层的定义
model.quantize(num_bits=num_bits)
print('Quantization bit: %d' % num_bits)
if load_quant_model_file is not None:
model.load_state_dict(torch.load(load_quant_model_file))
print("Successfully load quantized model %s" % load_quant_model_file)
# 进行量化训练
for epoch in range(1, epochs + 1):
model.train() # 训练模式
grads_sum, losses = quantize_aware_training(model, device, train_loader, optimizer, epoch)
print('epoch:', epoch)
checkpoint = {
'model': model.state_dict(),
'optimizer': optimizer.state_dict(),
'grads':grads_sum,
'epoch': epoch,
'losses': losses
}
for name, param in grads_sum.items():
# 此处的grad是累加值吧 不是平均值
writer.add_histogram(tag=name + '_grad', values=param, global_step=epoch)
for name, param in model.named_parameters():
writer.add_histogram(tag=name + '_data', values=param.data, global_step=epoch)
# if (net == 'VGG19') == True:
# torch.save(checkpoint,
# 'checkpoint/cifar-10_vgg_19_bn_quant/ckpt_cifar-10_vgg19_bn_quant_%s.pth' % (str(epoch)))
#
#
# elif (net == 'LeNet') == True:
# INT
# dir_name = 'checkpoint/cifar-10_lenet_bn_quant/scratch/' + str(d1)
dir_name = './project/p/checkpoint/cifar-10_lenet_bn_quant/scratch/mode' + str(d3) + '_' + str(d4) + '/' + str(d1)
if not os.path.isdir(dir_name):
os.makedirs(dir_name,mode=0o777)
os.chmod(dir_name,mode=0o777)
# INT
# torch.save(checkpoint,'checkpoint/cifar-10_lenet_bn_quant/scratch/' + str(d1) + '/ckpt_cifar-10_lenet_bn_quant_' + str(epoch) + '.pth')
torch.save(checkpoint,
'./project/p/checkpoint/cifar-10_lenet_bn_quant/scratch/mode' + str(d3) + '_' + str(d4) + '/' + str(d1)+ '/ckpt_cifar-10_lenet_bn_quant_' + str(
epoch) + '.pth')
# quan_dict = torch.load('checkpoint/cifar-10_lenet_bn_quant/' + str(d1) + '/ckpt_cifar-10_lenet_bn_quant_%s.pth' % (str(epoch)))
# print(quan_dict['grads']['conv_layers.conv1.weight'].reshape(1,-1).shape)
#
#
# print('Saved all parameters!\n')
model.eval()
#torch.save(model.state_dict(), save_file)
model.freeze()
acc = quantize_inference(model, test_loader)
f = open('./project/p/lenet_qat_scratch_acc' + '.txt', 'a')
f.write('bit ' + str(d1) + ': ' + str(acc) + '\n')
f.close()
# -*- coding: utf-8 -*-
import torch
import torch.nn as nn
import torch.nn.functional as F
from new_train import get_children
from global_var import GlobalVariables
from module import *
class LeNet(nn.Module):
# CONV FLOPs: 考虑bias:(2 * C_in * K_h * K_w )* H_out * W_out * C_out
# 不考虑bias: (2 * C_in * K_h * K_w -1)* H_out * W_out * C_out
# FCN FLOPs: 考虑bias: (2 * I )* O
# 不考虑bias: (2 * I - 1) * O
def __init__(self, img_size=32, input_channel=3, num_class=10, n_exp=4, mode=1):
super().__init__()
self.conv_layers = nn.ModuleDict({
# block1
'conv1': nn.Conv2d(3,6,5), # (2*3*5*5) * 32*32*6 (bias占其中的32*32*6) 6144/921600
'reluc1': nn.ReLU(),
'pool1': nn.MaxPool2d(2,2),
# block2
'conv2': nn.Conv2d(6,16,5), # (2*6*5*5) * 16*16*16 (bias占其中的16*16*6) 1536/1228800
'reluc2': nn.ReLU(),
'pool2': nn.MaxPool2d(2,2),
})
self.fc_layers = nn.ModuleDict({
# classifier
'fc1': nn.Linear(16*5*5,120), # (2*16*5*5)*120 (bias占其中的120) 120/96000
'reluf1': nn.ReLU(),
'fc2': nn.Linear(120,84), # (2*120)*84 (bias占其中的84) 84/2016
'reluf2': nn.ReLU(),
'fc3': nn.Linear(84, num_class)
})
self.mode = mode
self.n_exp = n_exp
def forward(self,x):
for _,layer in self.conv_layers.items():
x = layer(x)
output = x.view(-1,16*5*5)
for _,layer in self.fc_layers.items():
output = layer(output)
out = F.softmax(output,dim = 1) # 这里不softmax也行 影响不大
return out
def quantize(self, num_bits=8):
self.quantize_conv_layers=nn.ModuleDict({
# qi=true: 前一层输出的结果是没有量化过的,需要量化。 maxpool和relu都不会影响INT和minmax,所以在这俩之后的层的pi是false
#若前一层是conv,数据minmax被改变,则需要qi=true来量化
'qconv1': QConv2d(self.conv_layers['conv1'], qi=True, qo=True, num_bits=num_bits, n_exp=self.n_exp, mode=self.mode),
'qreluc1': QReLU(n_exp=self.n_exp, mode=self.mode),
'qpool1': QMaxPooling2d(kernel_size=2,stride=2,padding=0, n_exp=self.n_exp, mode=self.mode),
'qconv2': QConv2d(self.conv_layers['conv2'], qi=False, qo=True, num_bits=num_bits, n_exp=self.n_exp, mode=self.mode),
'qreluc2': QReLU(n_exp=self.n_exp, mode=self.mode),
'qpool2': QMaxPooling2d(kernel_size=2, stride=2, padding=0, n_exp=self.n_exp, mode=self.mode)
})
self.quantize_fc_layers = nn.ModuleDict({
'qfc1': QLinear(self.fc_layers['fc1'],qi=False,qo=True,num_bits=num_bits, n_exp=self.n_exp, mode=self.mode),
'qreluf1': QReLU(n_exp=self.n_exp, mode=self.mode),
'qfc2': QLinear(self.fc_layers['fc2'],qi=False,qo=True,num_bits=num_bits, n_exp=self.n_exp, mode=self.mode),
'qreluf2': QReLU(n_exp=self.n_exp, mode=self.mode),
'qfc3': QLinear(self.fc_layers['fc3'],qi=False,qo=True,num_bits=num_bits, n_exp=self.n_exp, mode=self.mode)
})
def quantize_forward(self, x):
for _, layer in self.quantize_conv_layers.items():
x = layer(x)
output = x.view(-1,16*5*5)
for s, layer in self.quantize_fc_layers.items():
output = layer(output)
out = F.softmax(output, dim=1) # 这里不softmax也行 影响不大 算loss用
return out
def freeze(self):
self.quantize_conv_layers['qconv1'].freeze()
self.quantize_conv_layers['qreluc1'].freeze(self.quantize_conv_layers['qconv1'].qo)
self.quantize_conv_layers['qpool1'].freeze(self.quantize_conv_layers['qconv1'].qo)
self.quantize_conv_layers['qconv2'].freeze(self.quantize_conv_layers['qconv1'].qo)
self.quantize_conv_layers['qreluc2'].freeze(self.quantize_conv_layers['qconv2'].qo)
self.quantize_conv_layers['qpool2'].freeze(self.quantize_conv_layers['qconv2'].qo)
self.quantize_fc_layers['qfc1'].freeze(qi=self.quantize_conv_layers['qconv2'].qo)
self.quantize_fc_layers['qreluf1'].freeze(self.quantize_fc_layers['qfc1'].qo)
self.quantize_fc_layers['qfc2'].freeze(qi=self.quantize_fc_layers['qfc1'].qo)
self.quantize_fc_layers['qreluf2'].freeze(self.quantize_fc_layers['qfc2'].qo)
self.quantize_fc_layers['qfc3'].freeze(qi=self.quantize_fc_layers['qfc2'].qo)
def fakefreeze(self):
self.quantize_conv_layers['qconv1'].fakefreeze()
self.quantize_conv_layers['qreluc1'].fakefreeze(self.quantize_conv_layers['qconv1'].qo)
self.quantize_conv_layers['qpool1'].fakefreeze(self.quantize_conv_layers['qconv1'].qo)
self.quantize_conv_layers['qconv2'].fakefreeze(self.quantize_conv_layers['qconv1'].qo)
self.quantize_conv_layers['qreluc2'].fakefreeze(self.quantize_conv_layers['qconv2'].qo)
self.quantize_conv_layers['qpool2'].fakefreeze(self.quantize_conv_layers['qconv2'].qo)
self.quantize_fc_layers['qfc1'].fakefreeze(qi=self.quantize_conv_layers['qconv2'].qo)
self.quantize_fc_layers['qreluf1'].fakefreeze(self.quantize_fc_layers['qfc1'].qo)
self.quantize_fc_layers['qfc2'].fakefreeze(qi=self.quantize_fc_layers['qfc1'].qo)
self.quantize_fc_layers['qreluf2'].fakefreeze(self.quantize_fc_layers['qfc2'].qo)
self.quantize_fc_layers['qfc3'].fakefreeze(qi=self.quantize_fc_layers['qfc2'].qo)
def quantize_inference(self, x):
x = self.quantize_conv_layers['qconv1'].qi.quantize_tensor(x, self.mode)
for s, layer in self.quantize_conv_layers.items():
x = layer.quantize_inference(x)
output = x.view( -1,16*5*5)
for s, layer in self.quantize_fc_layers.items():
output = layer.quantize_inference(output)
# 只有mode1需要出现范围映射,将量化后的数据恢复到原数据相似的范围,PoT无需,其自带恢复性
if self.mode == 1:
output = self.quantize_fc_layers['qfc3'].qo.dequantize_tensor(output, self.mode)
out = F.softmax(output, dim=1) # 这里应该用 Qsoftmax可能好些 之后改改
return out
class NetBN(nn.Module):
def __init__(self, num_channels=1):
super(NetBN, self).__init__()
self.conv1 = nn.Conv2d(num_channels, 40, 3, 1)
self.bn1 = nn.BatchNorm2d(40)
self.conv2 = nn.Conv2d(40, 40, 3, 1)
self.bn2 = nn.BatchNorm2d(40)
self.fc = nn.Linear(5 * 5 * 40, 10)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = F.relu(x)
x = F.max_pool2d(x, 2, 2)
x = self.conv2(x)
x = self.bn2(x)
x = F.relu(x)
x = F.max_pool2d(x, 2, 2)
x = x.view(-1, 5 * 5 * 40)
x = self.fc(x)
return x
def quantize(self, num_bits=8):
self.qconv1 = QConvBNReLU(self.conv1, self.bn1, qi=True, qo=True, num_bits=num_bits)
self.qmaxpool2d_1 = QMaxPooling2d(kernel_size=2, stride=2, padding=0)
self.qconv2 = QConvBNReLU(self.conv2, self.bn2, qi=False, qo=True, num_bits=num_bits)
self.qmaxpool2d_2 = QMaxPooling2d(kernel_size=2, stride=2, padding=0)
self.qfc = QLinear(self.fc, qi=False, qo=True, num_bits=num_bits)
def quantize_forward(self, x):
x = self.qconv1(x)
x = self.qmaxpool2d_1(x)
x = self.qconv2(x)
x = self.qmaxpool2d_2(x)
x = x.view(-1, 5*5*40)
x = self.qfc(x)
return x
def freeze(self):
self.qconv1.freeze()
self.qmaxpool2d_1.freeze(self.qconv1.qo)
self.qconv2.freeze(qi=self.qconv1.qo) # 因为maxpool不会改变min,max
self.qmaxpool2d_2.freeze(self.qconv2.qo)
self.qfc.freeze(qi=self.qconv2.qo) # 因为maxpool不会改变min,max
def quantize_inference(self, x):
qx = self.qconv1.qi.quantize_tensor(x)
qx = self.qconv1.quantize_inference(qx)
qx = self.qmaxpool2d_1.quantize_inference(qx)
qx = self.qconv2.quantize_inference(qx)
qx = self.qmaxpool2d_2.quantize_inference(qx)
qx = qx.view(-1, 5*5*40)
qx = self.qfc.quantize_inference(qx)
out = self.qfc.qo.dequantize_tensor(qx) # INT -> FP
return out
# 定义 ResNet 模型
# 适用于Cifar10
class ResNet(nn.Module):
def __init__(self, block, layers, num_classes=10, n_exp=4, mode=1): # 这里将类别数设置为10
super(ResNet, self).__init__()
self.inplanes = 16 # 因为 CIFAR-10 图片较小,所以开始时需要更少的通道数
GlobalVariables.SELF_INPLANES = self.inplanes
print('resnet init:'+ str(GlobalVariables.SELF_INPLANES))
# 输入层
self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1,
bias=False)
self.bn1 = nn.BatchNorm2d(16)
self.relu = nn.ReLU()
# 残差层(4 个阶段,每个阶段包含 6n+2 个卷积层)
# self.layer1 = self._make_layer(block, 16, layers[0])
# self.layer2 = self._make_layer(block, 32, layers[1], stride=2)
# self.layer3 = self._make_layer(block, 64, layers[2], stride=2)
# self.layer4 = self._make_layer(block, 128, layers[3], stride=2)
self.layer1 = MakeLayer(block, 16, layers[0])
self.layer2 = MakeLayer(block, 32, layers[1], stride=2)
self.layer3 = MakeLayer(block, 64, layers[2], stride=2)
self.layer4 = MakeLayer(block, 128, layers[3], stride=2)
# 分类层
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(128 * block.expansion, num_classes)
# self.layers_to_quantize = [self.conv1, self.bn1, self.relu, self.layer1, self.layer2, self.layer3, self.layer4, self.avgpool, self.fc]
# 参数初始化
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
# 似乎对于resnet的self.inplanes在不断被改变,传递下去
# def _make_layer(self, block, planes, blocks, stride=1):
# downsample = None
# # stride 是卷积层的步幅,而 self.inplanes 表示当前残差块输入的通道数,
# # planes * block.expansion 则表示当前残差块输出的通道数。因此,当 stride 不等于 1 或者 self.inplanes 不等于 planes * block.expansion 时,就需要进行下采样操作
# #该层中除了第一个残差块之外,其他所有残差块的输入通道数和输出通道数都相等,并且具有相同的步幅(都为 1 或者 2)。这些卷积层的输入张量大小不变, 输出张量高宽尺寸会随着残差块的堆叠而逐渐降低
# if stride != 1 or self.inplanes != planes * block.expansion:
# downsample = nn.Sequential(
# nn.Conv2d(self.inplanes, planes * block.expansion,
# kernel_size=1, stride=stride, bias=False),
# nn.BatchNorm2d(planes * block.expansion),
# )
# layers = []
# layers.append(block(self.inplanes, planes, stride, downsample))
# self.inplanes = planes * block.expansion
# for _ in range(1, blocks): # block的个数
# layers.append(block(self.inplanes, planes))
# return nn.Sequential(*layers)
def forward(self, x):
# 输入层
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
# 这里相比于imagenet的,少了一个maxpool,因为cifar10本身图片就小,如果再pool就太小了
# 残差层
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
# 分类层
x = self.avgpool(x) # 输出的尺寸为 B,C,1,1
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
def quantize(self, num_bits=8):
pass
def quantize_forward(self, x):
# for _, layer in self.quantize_layers.items():
# x = layer(x)
# out = F.softmax(x, dim=1)
# return out
pass
def freeze(self):
pass
def fakefreeze(self):
pass
def quantize_inference(self, x):
pass
# BasicBlock 类
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
# 第一个卷积层
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride,
padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
# 第二个卷积层
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1,
padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
# shortcut
self.relu = nn.ReLU()
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
identity = self.downsample(identity)
out += identity
out = self.relu(out)
return out
def quantize(self, num_bits=8):
self.qconvbnrelu1 = QConvBNReLU(self.conv1,self.bn2,qi=False,qo=True,num_bits=num_bits)
self.qconvbn1 = QConvBN(self.conv2,self.bn2,qi=False,qo=True,num_bits=num_bits)
if self.downsample is not None:
self.qconvbn2 = QConvBN(self.downsample[0],self.downsample[1],qi=False,qo=True,num_bits=num_bits)
self.qrelu1 = QReLU()
def quantize_forward(self, x):
identity = x
out = self.qconvbnrelu1(x)
out = self.qconvbn1(out)
if self.downsample is not None:
identity = self.qconvbn2(identity)
# residual add
out = identity + out # 这里是需要写一个elementwiseadd的变换的,待后续修改
out = self.qrelu1(out)
return out
def freeze(self):
# 这里的qconvbnrelu1其实是可以用前一层的qo的,但感觉不太好传参,就没用
# 还需仔细检查
self.qconvbnrelu1.freeze()
self.qconvbn1.freeze(qi = self.qconvbnrelu1.qo)
if self.downsample is not None:
self.qconvbn2.freeze(qi = self.qconvbn1)
self.qrelu1.freeze(self.qconvbn2)
else:
self.qrelu1.freeze(self.qconvbn1)
def quantize_inference(self, x):
# 感觉是不需要进行初始的quantize_tensor和dequantize_tensor,因为他不是最前/后一层,只要中间的每层都在量化后的领域内,就不需要这种处理。
identity = x
out = self.qconvbnrelu1.quantize_inference(x)
out = self.qconvbn1.quantize_inference(x)
if self.downsample is not None:
identity = self.qconvbn2.quantize_inference(identity)
out = identity + out # 这里是需要写一个elementwiseadd的变换的,待后续修改
out = self.qrelu1.quantize_inference(out)
return out
# Bottleneck 类
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
# 1x1 卷积层
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
# 3x3 卷积层
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
# 1x1 卷积层
self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
bias=False)
self.bn3 = nn.BatchNorm2d(planes * self.expansion)
# shortcut
self.relu = nn.ReLU()
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity # 相加是在这里处理的
out = self.relu(out)
return out
class MakeLayer(nn.Module):
def __init__(self, block, planes, blocks, stride=1):
super(MakeLayer, self).__init__()
print('makelayer init:'+ str(GlobalVariables.SELF_INPLANES))
self.downsample = None
if stride != 1 or GlobalVariables.SELF_INPLANES != planes * block.expansion:
self.downsample = nn.Sequential(
nn.Conv2d(GlobalVariables.SELF_INPLANES, planes * block.expansion,kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion)
)
self.blockdict = nn.ModuleDict()
self.blockdict['block1'] = block(GlobalVariables.SELF_INPLANES, planes, stride, self.downsample)
GlobalVariables.SELF_INPLANES = planes * block.expansion
for i in range(1, blocks): # block的个数 这里只能用字典了
self.blockdict['block' + str(i+1)] = block(GlobalVariables.SELF_INPLANES, planes) # 此处进行实例化了
# def _make_layer(self, block, planes, blocks, stride=1):
# downsample = None
# # stride 是卷积层的步幅,而 self.inplanes 表示当前残差块输入的通道数,
# # planes * block.expansion 则表示当前残差块输出的通道数。因此,当 stride 不等于 1 或者 self.inplanes 不等于 planes * block.expansion 时,就需要进行下采样操作
# #该层中除了第一个残差块之外,其他所有残差块的输入通道数和输出通道数都相等,并且具有相同的步幅(都为 1 或者 2)。这些卷积层的输入张量大小不变, 输出张量高宽尺寸会随着残差块的堆叠而逐渐降低
# if stride != 1 or SELF_INPLANES != planes * block.expansion:
# downsample = nn.Sequential(
# nn.Conv2d(SELF_INPLANES, planes * block.expansion,
# kernel_size=1, stride=stride, bias=False),
# nn.BatchNorm2d(planes * block.expansion),
# )
# layers = []
# layers.append(block(SELF_INPLANES, planes, stride, downsample))
# SELF_INPLANES = planes * block.expansion
# for _ in range(1, blocks): # block的个数
# layers.append(block(SELF_INPLANES, planes))
# return nn.Sequential(*layers)
def forward(self,x):
for _, layer in self.blockdict.items():
x = layer(x)
return x
def quantize(self, num_bits=8):
# 需检查
for _, layer in self.blockdict.items():
layer.quantize() # 这里是因为每一块都是block,而block中有具体的quantize策略
def quantize_forward(self, x):
for _, layer in self.blockdict.items():
x = layer.quantize_forward(x) # 各个block中有具体的quantize_forward
return x
def freeze(self):
# 这里的qconvbnrelu1其实是可以用前一层的qo的,但感觉不太好传参,就没用
# 还需仔细检查
for _, layer in self.blockdict.items():
layer.freeze() # 各个block中有具体的freeze
def quantize_inference(self, x):
# 感觉是不需要进行初始的quantize_tensor和dequantize_tensor,因为他不是最前/后一层,只要中间的每层都在量化后的领域内,就不需要这种处理。
for _, layer in self.blockdict.items():
x = layer.quantize_inference(x) # 每个block中有具体的quantize_inference
return x
# 使用 ResNet18 模型
def resnet18(**kwargs):
model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
return model
# 使用 ResNet50 模型
def resnet50(**kwargs):
model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
return model
# 使用 ResNet152 模型
def resnet152(**kwargs):
model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
return model
\ No newline at end of file
# -*- coding: utf-8 -*-
from model import *
from get_weight import *
import torch
import torch.nn as nn
import torchvision
import torch.optim as optim
from torchvision import datasets, transforms
import os
import os.path as osp
import sys
import time
# import matplotlib.pyplot as plt
# import matplotlib
from torchvision.datasets import ImageFolder
from torch.utils.tensorboard import SummaryWriter
from absl import app, flags
# from easydict import EasyDict
# from cleverhans.torch.attacks.fast_gradient_method import fast_gradient_method
# from cleverhans.torch.attacks.projected_gradient_descent import (
# projected_gradient_descent,
# )
def train(model, device, train_loader, optimizer, epoch):
model.train()
lossLayer = torch.nn.CrossEntropyLoss()
flag = 0
cnt = 0
for batch_idx, (data, target) in enumerate(train_loader):
cnt = cnt + 1
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = lossLayer(output, target)
loss.backward()
histo, grads = (get_model_histogram(model))
if flag == 0:
flag = 1
grads_sum = grads
else:
for k,v in grads_sum.items():
grads_sum[k] += grads[k]
optimizer.step()
if batch_idx % 50 == 0:
print('Train Epoch: {} [{}/{}]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset), loss.item()
))
for k, v in grads_sum.items():
grads_sum[k] = v / len(train_loader.dataset)
return grads_sum
def test(model, device, test_loader):
model.eval()
test_loss = 0
correct = 0
acc=0
lossLayer = torch.nn.CrossEntropyLoss(reduction='sum')
# report = EasyDict(nb_test=0, correct=0, correct_fgm=0, correct_pgd=0)
with torch.no_grad:
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
# x_fgm = fast_gradient_method(model, data, 0.01, np.inf)
# x_pgd = projected_gradient_descent(model, data, 0.01, 0.01, 40, np.inf)
# model prediction on clean examples
# _, y_pred = model(data).max(1)
# # model prediction on FGM adversarial examples
# _, y_pred_fgm = model(x_fgm).max(1)
#
# # model prediction on PGD adversarial examples
# _, y_pred_pgd = model(x_pgd).max(1)
# report.nb_test += target.size(0)
# report.correct += y_pred.eq(target).sum().item()
# report.correct_fgm += y_pred_fgm.eq(target).sum().item()
# report.correct_pgd += y_pred_pgd.eq(target).sum().item()
test_loss += lossLayer(output, target).item()
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
acc=100. * correct / len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {:.0f}%\n'.format(
test_loss, acc
))
# print(
# "test acc on clean examples (%): {:.3f}".format(
# report.correct / report.nb_test * 100.0
# )
# )
# print(
# "test acc on FGM adversarial examples (%): {:.3f}".format(
# report.correct_fgm / report.nb_test * 100.0
# )
# )
# print(
# "test acc on PGD adversarial examples (%): {:.3f}".format(
# report.correct_pgd / report.nb_test * 100.0
# )
# )
return acc
batch_size = 32
test_batch_size = 32
seed = 1
# epochs = 15
d1 = sys.argv[1]
epochs = int(d1)
lr = 0.001
momentum = 0.5
save_model = False
using_bn = True
net = 'LeNet'
torch.manual_seed(seed)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
train_loader = torch.utils.data.DataLoader(
datasets.CIFAR10('data', train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])),
batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=True
)
test_loader = torch.utils.data.DataLoader(
datasets.CIFAR10('data', train=False, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])),
batch_size=test_batch_size, shuffle=True, num_workers=1, pin_memory=True
)
#if using_bn:
if (net == 'VGG19') == True:
model = VGG_19().to(device)
elif (net == 'LeNet') == True:
model = LeNet().to(device)
# else:
# model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
writer = SummaryWriter(log_dir='./fullprecision_log')
#optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.9,0.999),eps=1e-08,weight_decay=0,amsgrad=False)
for epoch in range(1, epochs + 1):
grads_sum = train(model, device, train_loader, optimizer, epoch)
acc = test(model, device, test_loader)
print('epoch:', epoch)
checkpoint = {
'model': model.state_dict(),
'optimizer': optimizer.state_dict(),
'epoch': epoch,
'grads': grads_sum,
'accuracy':acc
}
# for name, param in model.named_parameters():
# writer.add_histogram(tag=name + '_grad', values=param.grad, global_step=epoch)
# writer.add_histogram(tag=name + '_data', values=param.data, global_step=epoch)
for name, param in grads_sum.items():
# 此处的grad是累加值吧 不是平均值
writer.add_histogram(tag=name + '_grad', values=param, global_step=epoch)
# 取这个epoch最后一个batch算完之后的weight
for name, param in model.named_parameters():
writer.add_histogram(tag=name + '_data', values=param.data, global_step=epoch)
if (net == 'LeNet') == True:
torch.save(checkpoint, 'checkpoint/cifar-10_lenet_bn/full/ckpt_cifar-10_lenet_bn_%s.pth' % (str(epoch)))
#保存参数
# if (net == 'VGG19') == True:
# torch.save(checkpoint, 'checkpoint/cifar-10_vgg19_bn/ckpt_cifar-10_vgg19_bn_%s.pth' % (str(epoch)))
# elif (net == 'LeNet') == True:
# torch.save(checkpoint, 'checkpoint/cifar-10_lenet_bn/ckpt_cifar-10_lenet_bn_%s.pth' % (str(epoch)))
#print('Saved all parameters!\n')
if save_model:
if not osp.exists('ckpt'):
os.makedirs('ckpt')
#if using_bn:
if (net == 'VGG19') == True:
torch.save(model.state_dict(), 'ckpt/cifar-10_vgg19_bn.pt')
elif (net == 'LeNet') == True:
torch.save(model.state_dict(), 'ckpt/cifar-10_lenet_bn.pt')
# else:
# torch.save(model.state_dict(), 'ckpt/cifar-10_vgg19.pt')
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment