feat old version for LeNet. (Resnet,Module not finished)

b78e8fd4 · Zhihong Ma · 32783663 · b78e8fd4 · b78e8fd4 · b78e8fd4
Commit b78e8fd4 authored Apr 03, 2023 by Zhihong Ma
12 changed files
--- a/mzh/data_analysis_mmd.py
+++ b/mzh/data_analysis_mmd.py
+# -*- coding: utf-8 -*-
+import numpy
+import numpy as np
+import torch
+import sys
+from mmd_loss import *
+from collections import OrderedDict
+
+d1 = sys.argv[1] # bit
+d2 = sys.argv[2] # epoch
+# d1=4
+# d2=5
+
+
+sum=0
+flag=0
+total_quan_list=list()
+total_base_list=list()
+
+
+# CNN FLOPs = Cout * Hout * Wout * (2 * Cin * K * K )  是考虑bias  否则-1
+# FCN FLOPs = Cout * Cin  是考虑bias 否则-1
+
+
+# 把相关的relu，pool也考虑进去了
+# MAdd
+# weight0 =np.array( [ 705600.0+4704.0+ 3528.0 ,  480000.0+ 1600.0 + 1200.0 , 95880.0 + 120.0,
+#            20076.0 + 84.0 , 1670.0 ])
+# weight1=np.array([705,600.0 ,  480,000.0,+  95,880.0 ,
+#            20,076.0  , 1,670.0 ])
+
+# flops
+weight_f0= np.array([357504+4704+4704, 241600+1600+1600,48000+120,10080+84,840])
+weight_f1=np.array([357504, 241600,48000,10080,840])
+
+
+
+summary_quan_dict=OrderedDict()
+summary_base_dict=OrderedDict()
+losses=[]
+# 最外层：不同epoch的字典 内层：各个网络层的grads
+for i in range(int(d2)):
+    total_quan_list.append(torch.load('./project/p/checkpoint/cifar-10_lenet_bn_quant/' + str(d1) + '/ckpt_cifar-10_lenet_bn_quant_'+str(i+1)+'.pth'))
+    #total_quan_list.append(torch.load('checkpoint/cifar-10_lenet_bn/full' + '/ckpt_cifar-10_lenet_bn_' + str(d2) + '.pth'))
+    total_base_list.append(torch.load('./project/p/checkpoint/cifar-10_lenet_bn/full' + '/ckpt_cifar-10_lenet_bn_' + str(i+1) + '.pth'))
+    for k, _ in total_base_list[i]['grads'].items():
+        if flag == 0:
+            summary_quan_dict[k] = total_quan_list[i]['grads'][k].reshape(1,-1)
+            summary_base_dict[k] = total_base_list[i]['grads'][k].reshape(1,-1)
+
+        else :
+            # 字典里的数据不能直接改，需要重新赋值
+            a=summary_quan_dict[k]
+            b=total_quan_list[i]['grads'][k].reshape(1,-1)
+            c=np.vstack((a,b))
+            summary_quan_dict[k] = c
+
+            a = summary_base_dict[k]
+            b = total_base_list[i]['grads'][k].reshape(1,-1)
+            c = np.vstack((a, b))
+            summary_base_dict[k] = c
+
+    flag = 1
+
+cnt = 0
+flag = 0
+for k, _ in summary_quan_dict.items():
+    if flag == 0:
+        sum += 0.99*weight_f1[cnt] * MK_MMD(source=summary_base_dict[k], target=summary_quan_dict[k])  # weight
+    else:
+        sum += 0.01*weight_f1[cnt] * MK_MMD(source=summary_base_dict[k], target=summary_quan_dict[k])  #bias
+
+
+    if flag == 1:
+        cnt = cnt + 1
+        flag = 0
+    else:
+        flag=1
+sum=sum/(weight_f0.sum()*2)
+print(sum)
+
+f = open('./project/p/lenet_ptq_similarity.txt','a')
+f.write('bit:' + str(d1) + ' epoch_num:' + str(d2) +': '+str(sum)+'\n')
+f.close()
+# for k,v in summary_base_dict.items():
+#     if k== 'conv_layers.conv1.weight':
+#         print(v)
+#         print('===========')
+#         print(summary_quan_dict[k])
\ No newline at end of file
--- a/mzh/data_analysis_wasserstein.py
+++ b/mzh/data_analysis_wasserstein.py
+# -*- coding: utf-8 -*-
+import numpy
+import numpy as np
+import torch
+import sys
+from collections import OrderedDict
+import scipy.stats
+import pandas as pd
+import os
+
+# 整体思路： 本函数实现的是关于bit的，在不同epoch节点（5, 10, ...） 的梯度分布相似度计算 （考虑到是不同epoch节点，则需要在这一段epoch内取平均相似度？）
+# 外界调用： 会用不同的bit分别调用该函数
+# csv中每行记录的是该bit量化情况下，不同epoch节点的平均加权梯度分布相似度
+#
+d1 = sys.argv[1]  # bit
+d2 = sys.argv[2]  # mode
+d3 = sys.argv[3]  # n_exp
+# d2 = sys.argv[2] # epoch
+# d1=4
+# d2=5
+tag = 0
+
+dirpath = './project/p/qat_analysis_data/mode' + str(d2)
+
+if not os.path.isdir(dirpath):
+    os.makedirs(dirpath, mode=0o777)
+    os.chmod(dirpath, mode=0o777)
+
+
+# if int(d2) == 1:
+#     csvpath = './project/p/qat_analysis_data/wasserstein_distance.csv'
+# else:
+if int(d2) != 3:
+    csvpath = './project/p/qat_analysis_data/mode' + str(d2) + '/wasserstein_distance.csv'
+else:
+    csvpath = './project/p/qat_analysis_data/mode' + str(d2) + '/wasserstein_distance_' + str(d3) + '.csv'
+
+
+
+# if os.path.exists("./qat_analysis_data/wasserstein_distance.csv"):
+if os.path.exists(csvpath):
+    tag = 1
+
+if tag == 0:  # 还没有csv
+    df = pd.DataFrame()
+else:  # 已有csv
+    # df = pd.read_csv("./qat_analysis_data/wasserstein_distance.csv", index_col=0)
+    df = pd.read_csv(csvpath, index_col=0)
+    df2 = pd.DataFrame()
+
+
+# CNN FLOPs = Cout * Hout * Wout * (2 * Cin * K * K )  是考虑bias  否则-1
+# FCN FLOPs = Cout * Cin  是考虑bias 否则-1
+
+
+# 把相关的relu，pool也考虑进去了
+# MAdd
+# weight0 =np.array( [ 705600.0+4704.0+ 3528.0 ,  480000.0+ 1600.0 + 1200.0 , 95880.0 + 120.0,
+#            20076.0 + 84.0 , 1670.0 ])
+# weight1=np.array([705,600.0 ,  480,000.0,+  95,880.0 ,
+#            20,076.0  , 1,670.0 ])
+
+# flops
+weight_f0= np.array([357504+4704+4704, 241600+1600+1600,48000+120,10080+84,840])
+weight_f1=np.array([357504, 241600,48000,10080,840])
+
+# 对不同的epoch节点
+for epoch in [5, 10, 15, 20, 25, 30]:
+    total_quan_list = []
+    total_base_list = []
+    summary_quan_dict = OrderedDict()
+    summary_base_dict = OrderedDict()
+
+    flag = 0
+    result = 0
+    # 最外层：不同epoch的字典 内层：各个网络层的grads
+    # 遍历epoch节点内的epoch，收集梯度信息
+    for i in range(epoch):
+        if int(d2) == 1:
+            total_quan_list.append(torch.load(
+                './project/p/checkpoint/cifar-10_lenet_bn_quant/scratch/' + str(d1) + '/ckpt_cifar-10_lenet_bn_quant_' + str(
+                    i + 1) + '.pth'))
+        elif int(d2) == 2:
+             total_quan_list.append(torch.load(
+                 './project/p/checkpoint/cifar-10_lenet_bn_quant/scratch/mode' + str(d2) + '/' + str(d1)+ '/ckpt_cifar-10_lenet_bn_quant_' + str(
+                       epoch) + '.pth'))
+        
+        else:
+            total_quan_list.append(torch.load(
+                 './project/p/checkpoint/cifar-10_lenet_bn_quant/scratch/mode' + str(d2) + '_' + str(d3) + '/' + str(d1)+ '/ckpt_cifar-10_lenet_bn_quant_' + str(
+                       epoch) + '.pth'))
+
+
+        # total_quan_list.append(torch.load('checkpoint/cifar-10_lenet_bn/full' + '/ckpt_cifar-10_lenet_bn_' + str(d2) + '.pth'))
+        # full的数据数不够
+        total_base_list.append(
+            torch.load('./project/p/checkpoint/cifar-10_lenet_bn/full' + '/ckpt_cifar-10_lenet_bn_' + str(i + 1) + '.pth'))
+        for k, _ in total_base_list[i]['grads'].items():  # 得到每个epoch i 的各个层的梯度
+            if flag == 0: # 读的第一个epoch i 要新建立个数据矩阵的第一行，后续的epoch i都是在这行的基础上向下拓展
+                summary_quan_dict[k] = total_quan_list[i]['grads'][k].reshape(1, -1)
+                summary_base_dict[k] = total_base_list[i]['grads'][k].reshape(1, -1)
+
+            else:
+                # 字典里的数据不能直接改，需要重新赋值
+                a = summary_quan_dict[k]
+                b = total_quan_list[i]['grads'][k].reshape(1, -1)
+                c = np.vstack((a, b))
+                summary_quan_dict[k] = c
+
+                a = summary_base_dict[k]
+                b = total_base_list[i]['grads'][k].reshape(1, -1)
+                c = np.vstack((a, b))
+                summary_base_dict[k] = c
+
+        flag = 1
+
+        # loss = total_quan_list[i]['losses']
+        # print(loss)
+
+    # df = pd.read_csv('./data_analysis_folder/data.csv', index_col=0)
+    # # df = pd.DataFrame()
+    # df2 = pd.DataFrame()
+
+    # 上面是在收集数据，下面才是求和
+
+
+    for j in range(epoch):
+        flag0 = 0  # 各个layer的weight和bias
+        cnt = 0  # 依次遍历各个layer
+        sum = 0 # sum只是对一个epoch j 的加权梯度分布相似度记录
+        for k, _ in summary_quan_dict.items():
+            w = summary_base_dict[k][j, :]  # 这里不合适 要改造
+            v = summary_quan_dict[k][j, :]
+            if flag0 == 0:
+                cur_weight = weight_f1[cnt] * scipy.stats.wasserstein_distance(w, v)  # weight
+                # 不是很方便存 需要三维了(sheet)
+                # if tag == 1:
+                #     df2[k] = [cur_weight]
+                # else:
+                #     df[k] = [cur_weight]
+
+                sum += 0.99 * cur_weight
+            else:
+                cur_bias = weight_f1[cnt] * scipy.stats.wasserstein_distance(w, v)  # bias
+                # if tag == 1:
+                #     df2[k] = [cur_bias]
+                # else:
+                #     df[k] = [cur_bias]
+
+                sum += 0.01 * cur_bias
+
+            if flag0 == 1:
+                cnt = cnt + 1
+                flag0 = 0
+            else:
+                flag0 = 1
+
+        sum = sum / (weight_f1.sum() * 2)
+        result += sum  # 对各个epoch i的加权梯度相似度求和
+        print(sum)
+    result /= epoch  # 对epoch节点阶段内的梯度相似度求平均
+    if tag == 1:
+        df2[str(epoch)] = [result]
+    else :
+        df[str(epoch)] = [result]
+
+    result = 0
+
+
+
+
+
+
+
+
+if tag == 1 :
+    df = df.append(df2)
+    # df.to_csv('./qat_analysis_data/wasserstein_distance.csv')
+    df.to_csv(csvpath)
+else :
+    # df.to_csv('./qat_analysis_data/wasserstein_distance.csv')
+    df.to_csv(csvpath)
+
+
+
+
+
+
+# f = open('lenet_ptq_wasserstein_similarity.txt','a')
+# f.write('bit:' + str(d1) + ' epoch_num:' + str(d2) +': '+str(sum)+'\n')
+# f.close()
+
--- a/mzh/examine_data.py
+++ b/mzh/examine_data.py
+# -*- coding: utf-8 -*-
+import numpy
+import numpy as np
+import torch
+import sys
+from collections import OrderedDict
+import scipy.stats
+import pandas as pd
+from model import *
+# from audtorch.metrics.functional import pearsonr
+import math
+
+
+# 该函数用于读出全精度、量化模型的weight和bias值，以作观察
+if __name__ == "__main__":
+
+    d1 = sys.argv[1]
+    # d2 = sys.argv[2]
+    # d1=8
+    # df = pd.read_csv('./ptq_analysis_data/seperate_data.csv', index_col=0)
+    df = pd.DataFrame()
+    # df2 = pd.DataFrame()
+    base_data = torch.load('./project/p/ckpt/trail/model_trail.pt')
+    # checkpoint_data = torch.load('./project/p/ckpt/trail/model_trail.pt')
+    print('full_precision weight/bias loaded!')
+
+    checkpoint_dir = './project/p/checkpoint/cifar-10_trail_model'
+
+    # quan_data = torch.load('ckpt/cifar-10_lenet_bn_ptq_' + str(d1) + '_.pt')
+    # print('quantization bit ' + str(d1) + ' weight/bias loaded!')
+
+
+    sum=0
+    
+    if int(d1) == 1:
+        print(base_data)
+        # for k, _ in base_data.items():
+        #     base_data[k] = base_data[k].reshape(1, -1)
+        #     # quan_data[k] = quan_data[k].reshape(1, -1)
+        #     print(base_data[k])
+
+    else:
+        for i in [4,9,14,19]:
+            check_data =  torch.load(checkpoint_dir + '/ckpt_cifar-10_trail_model%s.pt' % (str(i)))
+            print(check_data)
+            
+
+
+        # if int(d2) == 1:
+        #     print(base_data[k])
+        # else:
+        #     print(quan_data[k])
+
+
+
+
+
+
--- a/mzh/function.py
+++ b/mzh/function.py
+# -*- coding: utf-8 -*-
+from torch.autograd import Function
+
+
+class FakeQuantize(Function):
+
+    @staticmethod
+    def forward(ctx, x, qparam):  # 有qparam i.e. self 中记录的mode、scale、zeropoint、n_exp等信息，其实不用再额外传参
+        x = qparam.quantize_tensor(x, qparam.mode)  # INT
+        x = qparam.dequantize_tensor(x, qparam.mode)  # FP(int)
+        return x
+
+    @staticmethod
+    def backward(ctx, grad_output):  # 用线性粗略近似 STE
+        return grad_output, None
\ No newline at end of file
--- a/mzh/get_weight.py
+++ b/mzh/get_weight.py
+import os
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+from collections import OrderedDict
+
+def get_model_histogram(model):
+    """
+    Description:
+        - get norm gradients from model, and store in a OrderDict
+
+    Args:
+        - model: (torch.nn.Module), torch model
+
+    Returns:
+        - grads in OrderDict
+    """
+
+    gradshisto = OrderedDict()
+    grads = OrderedDict()
+    for name, params in model.named_parameters():
+        grad = params.grad
+        if grad is not None:
+            tmp = {}
+            params_np = grad.cpu().numpy()
+            histogram, bins = np.histogram(params_np.flatten(),bins=20)
+            tmp['histogram'] = list(histogram)
+            tmp['bins'] = list(bins)
+            gradshisto[name] = tmp
+            grads[name] = params_np
+
+    return gradshisto,grads
+
+
+def get_model_norm_gradient(model):
+    """
+    Description:
+        - get norm gradients from model, and store in a OrderDict
+
+    Args:
+        - model: (torch.nn.Module), torch model
+
+    Returns:
+        - grads in OrderDict
+    """
+    grads = OrderedDict()
+    for name, params in model.named_parameters():
+        grad = params.grad
+        if grad is not None:
+            grads[name] = grad.norm().item()
+    return grads
+
+
+def get_grad_histogram(grads_sum):
+
+    gradshisto = OrderedDict()
+   # grads = OrderedDict()
+    for name, params in grads_sum.items():
+        grad = params
+        if grad is not None:
+            tmp = {}
+            #params_np = grad.cpu().numpy()
+            params_np = grad
+            histogram, bins = np.histogram(params_np.flatten(),bins=20)
+            tmp['histogram'] = list(histogram)
+            tmp['bins'] = list(bins)
+            gradshisto[name] = tmp   #每层一个histogram （tmp中的是描述直方图的信息）
+        #    grads[name] = params_np
+
+    return gradshisto
\ No newline at end of file
--- a/mzh/loss_analysis.py
+++ b/mzh/loss_analysis.py
+# -*- coding: utf-8 -*-
+import numpy
+import numpy as np
+import torch
+import sys
+from collections import OrderedDict
+import scipy.stats
+import pandas as pd
+import os
+import os.path
+#
+d1 = sys.argv[1] # bit
+d2 = sys.argv[2] # mode
+d3 = sys.argv[3] # n_exp
+# d1=2
+# d2 = sys.argv[2] # epoch
+# d1=2
+# d2=3
+
+
+sum=0
+flag=0
+
+
+# CNN FLOPs = Cout * Hout * Wout * (2 * Cin * K * K )  是考虑bias  否则-1
+# FCN FLOPs = Cout * Cin  是考虑bias 否则-1
+
+
+# 把相关的relu，pool也考虑进去了
+# MAdd
+# weight0 =np.array( [ 705600.0+4704.0+ 3528.0 ,  480000.0+ 1600.0 + 1200.0 , 95880.0 + 120.0,
+#            20076.0 + 84.0 , 1670.0 ])
+# weight1=np.array([705,600.0 ,  480,000.0,+  95,880.0 ,
+#            20,076.0  , 1,670.0 ])
+
+# flops
+weight_f0= np.array([357504+4704+4704, 241600+1600+1600,48000+120,10080+84,840])
+weight_f1=np.array([357504, 241600,48000,10080,840])
+
+
+
+summary_quan_dict=OrderedDict()
+summary_base_dict=OrderedDict()
+# 最外层：不同epoch的字典 内层：各个网络层的grads
+
+flag = 0
+dirpath = './project/p/qat_analysis_data/mode' + str(d2)
+
+if not os.path.isdir(dirpath):
+    os.makedirs(dirpath, mode=0o777)
+    os.chmod(dirpath, mode=0o777)
+
+if int(d2) == 1 or int(d2) == 2:
+    csvpath = dirpath + '/scratch_loss.csv'
+else:
+    csvpath = dirpath + '/scratch_loss_' + str(d3) + '.csv'
+
+
+if os.path.exists(csvpath):
+    flag = 1
+
+if flag == 0:  # 还没有csv
+    df = pd.DataFrame()
+
+else:  # 已有csv
+    df = pd.read_csv(csvpath, index_col=0)
+    df2 = pd.DataFrame()
+
+
+
+
+for epoch in ([5, 10, 15, 20, 25, 30]):
+    sums = []
+    total_quan_list = []
+    total_base_list = []
+    for i in range(int(epoch)):
+        if int(d2) == 1:
+            total_quan_list.append(torch.load(
+                './project/p/checkpoint/cifar-10_lenet_bn_quant/scratch/' + str(d1) + '/ckpt_cifar-10_lenet_bn_quant_' + str(
+                    i + 1) + '.pth'))
+        elif int(d2) == 2:
+            total_quan_list.append(torch.load(
+                './project/p/checkpoint/cifar-10_lenet_bn_quant/scratch/mode' + str(d2) + '/' + str(
+                    d1) + '/ckpt_cifar-10_lenet_bn_quant_' + str(
+                    i + 1) + '.pth'))
+        else:
+            total_quan_list.append(torch.load(
+                './project/p/checkpoint/cifar-10_lenet_bn_quant/scratch/mode' + str(d2) + '_' + str(d3) + '/' + str(
+                    d1) + '/ckpt_cifar-10_lenet_bn_quant_' + str(
+                    i + 1) + '.pth'))
+        sum_loss = 0
+        loss = total_quan_list[i]['losses']
+        # print(len(loss))
+        # 每个epoch的不同batch的
+        for j in range(len(loss)):
+            sum_loss += loss[j].cpu()
+        # print(sum_loss)
+        sum_loss /= j
+        sums.append(sum_loss)
+        # print(sums)
+
+    #print(sums[0] - sums[int(d1) - 1])
+
+    if flag == 0:
+        df[str(epoch)] = [(sums[0] - sums[int(epoch) - 1]).detach().numpy()]
+
+    else:
+        df2[str(epoch)] = [(sums[0] - sums[int(epoch) - 1]).detach().numpy()]
+
+
+
+if flag == 0:
+    # df.to_csv('./qat_analysis_data/scratch_loss.csv')
+    df.to_csv(csvpath)
+else:
+    df = df.append(df2)
+    # df.to_csv('./qat_analysis_data/scratch_loss.csv')
+    df.to_csv(csvpath)
\ No newline at end of file
--- a/mzh/model.py
+++ b/mzh/model.py
+# -*- coding: utf-8 -*-
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from module import *
+
+# class VGG_19(nn.Module):
+#     def __init__(self, img_size=32, input_channel=3, num_class=10):
+#         super().__init__()
+#         self.conv_param_layer_name = (
+#             'conv1_1', 'relu1_1', 'conv1_2', 'bn1_1', 'relu1_2', 'pool1',
+#             'conv2_1', 'bn2_1', 'relu2_1', 'conv2_2', 'bn2_2', 'relu2_2', 'pool2',
+#             'conv3_1', 'bn3_1', 'relu3_1', 'conv3_2', 'bn3_2', 'relu3_2', 'conv3_3', 'bn3_3', 'relu3_3', 'conv3_4',
+#             'bn3_4', 'relu3_4', 'pool3',
+#             'conv4_1', 'bn4_1', 'relu4_1', 'conv4_2', 'bn4_2', 'relu4_2', 'conv4_3', 'bn4_3', 'relu4_3', 'conv4_4',
+#             'bn4_4', 'relu4_4', 'pool4',
+#             'conv5_1', 'bn5_1', 'relu5_1', 'conv5_2', 'bn5_2', 'relu5_2', 'conv5_3', 'bn5_3', 'relu5_3', 'conv5_4',
+#             'bn5_4', 'relu5_4', 'pool5'
+#         )
+
+#         self.fc_param_layer_name = (
+#             'fc1','relu1','drop1','fc2','relu2','drop2','fc3'
+#         )
+
+#         self.conv_layers = nn.ModuleDict({
+#             # block1
+#         'conv1_1': nn.Conv2d(in_channels=3, out_channels=64, kernel_size=(3, 3), stride=(1, 1), padding=1),
+#         'relu1_1': nn.ReLU(),
+#         'conv1_2': nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3, 3), stride=(1, 1), padding=1),
+#         'bn1_1': nn.BatchNorm2d(num_features=64),
+#         'relu1_2': nn.ReLU(),
+#         'pool1': nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),
+
+#         # block2
+#         'conv2_1': nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(3, 3), stride=(1, 1), padding=1),
+#         'bn2_1': nn.BatchNorm2d(num_features=128),
+#         'relu2_1': nn.ReLU(),
+#         'conv2_2': nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3, 3), stride=(1, 1), padding=1),
+#         'bn2_2': nn.BatchNorm2d(num_features=128),
+#         'relu2_2': nn.ReLU(),
+#         'pool2': nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),
+
+#         # block3
+#         'conv3_1': nn.Conv2d(in_channels=128, out_channels=256, kernel_size=(3, 3), stride=(1, 1), padding=1),
+#         'bn3_1': nn.BatchNorm2d(num_features=256),
+#         'relu3_1':  nn.ReLU(),
+
+#         'conv3_2': nn.Conv2d(in_channels=256, out_channels=256, kernel_size=(3, 3), stride=(1, 1), padding=1),
+#         'bn3_2':nn.BatchNorm2d(num_features=256),
+#         'relu3_2': nn.ReLU(),
+
+#         'conv3_3': nn.Conv2d(in_channels=256, out_channels=256, kernel_size=(3, 3), stride=(1, 1), padding=1),
+#         'bn3_3': nn.BatchNorm2d(num_features=256),
+#         'relu3_3': nn.ReLU(),
+
+#         'conv3_4': nn.Conv2d(in_channels=256, out_channels=256, kernel_size=(3, 3), stride=(1, 1), padding=1),
+#         'bn3_4': nn.BatchNorm2d(num_features=256),
+#         'relu3_4': nn.ReLU(),
+#         'pool3': nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),
+
+#         # block4
+#         'conv4_1': nn.Conv2d(in_channels=256, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=1),
+#         'bn4_1': nn.BatchNorm2d(num_features=512),
+#         'relu4_1': nn.ReLU(),
+
+#         'conv4_2': nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=1),
+#         'bn4_2': nn.BatchNorm2d(num_features=512),
+#         'relu4_2': nn.ReLU(),
+
+#         'conv4_3': nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=1),
+#         'bn4_3': nn.BatchNorm2d(num_features=512),
+#         'relu4_3': nn.ReLU(),
+
+#         'conv4_4': nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=1),
+#         'bn4_4': nn.BatchNorm2d(num_features=512),
+#         'relu4_4': nn.ReLU(),
+
+#         'pool4': nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),
+
+#         # block5
+#         'conv5_1': nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=1),
+#         'bn5_1': nn.BatchNorm2d(num_features=512),
+#         'relu5_1': nn.ReLU(),
+
+#         'conv5_2': nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=1),
+#         'bn5_2': nn.BatchNorm2d(num_features=512),
+#         'relu5_2': nn.ReLU(),
+
+#         'conv5_3': nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=1),
+#         'bn5_3': nn.BatchNorm2d(num_features=512),
+#         'relu5_3': nn.ReLU(),
+
+#         'conv5_4': nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=1),
+#         'bn5_4': nn.BatchNorm2d(num_features=512),
+#         'relu5_4': nn.ReLU(),
+
+#         'pool5': nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
+
+#         })
+#         self.fc_layers = nn.ModuleDict({
+#             # classifier
+#             'fc1': nn.Linear(512 * (int)(img_size * img_size / 32 / 32), 4096),
+#             'relu1': nn.ReLU(),
+#             'drop1': nn.Dropout(0.5),
+#             'fc2': nn.Linear(4096, 4096),
+#             'relu2': nn.ReLU(),
+#             'drop2': nn.Dropout(0.5),
+#             'fc3': nn.Linear(4096, num_class)
+#         })
+
+
+#     def forward(self,x):
+
+#         for _,layer in self.conv_layers.items():
+#             x = layer(x)
+
+#         output = x.view(x.size()[0], -1)
+#         for _,layer in self.fc_layers.items():
+#             output = layer(output)
+
+#         out = F.softmax(output,dim = 1)         # 这里不softmax也行 影响不大
+#         return out
+
+#     def quantize(self, num_bits=8):
+
+#         self.quantize_conv_layers=nn.ModuleDict({
+#             # qi=true: 前一层输出的结果是没有量化过的，需要量化。 maxpool和relu都不会影响INT和minmax，所以在这俩之后的层的pi是false
+#             #若前一层是conv，数据minmax被改变，则需要qi=true来量化
+#             'qconv1_1': QConv2d(self.conv_layers['conv1_1'], qi=True, qo=True, num_bits=num_bits),
+#             'qrelu1_1': QReLU(),
+#             'qconvbnrelu1_1': QConvBNReLU(self.conv_layers['conv1_2'],self.conv_layers['bn1_1'],qi=False,qo=True,num_bits=num_bits),
+#             'qpool1': QMaxPooling2d(kernel_size=2,stride=2,padding=0),
+
+#             # block2
+#             'qconvbnrelu2_1': QConvBNReLU(self.conv_layers['conv2_1'], self.conv_layers['bn2_1'], qi=False, qo=True, num_bits=num_bits),
+#             'qconvbnrelu2_2': QConvBNReLU(self.conv_layers['conv2_2'], self.conv_layers['bn2_2'], qi=False, qo=True, num_bits=num_bits),
+#             'qpool2': QMaxPooling2d(kernel_size=2,stride=2,padding=0),
+
+#             # block3
+#             'qconvbnrelu3_1': QConvBNReLU(self.conv_layers['conv3_1'], self.conv_layers['bn3_1'], qi=False, qo=True,
+#                                           num_bits=num_bits),
+#             'qconvbnrelu3_2': QConvBNReLU(self.conv_layers['conv3_2'], self.conv_layers['bn3_2'], qi=False, qo=True,
+#                                           num_bits=num_bits),
+#             'qconvbnrelu3_3': QConvBNReLU(self.conv_layers['conv3_3'], self.conv_layers['bn3_3'], qi=False, qo=True,
+#                                           num_bits=num_bits),
+#             'qconvbnrelu3_4': QConvBNReLU(self.conv_layers['conv3_4'], self.conv_layers['bn3_4'], qi=False, qo=True,
+#                                           num_bits=num_bits),
+#             'qpool3':  QMaxPooling2d(kernel_size=2,stride=2,padding=0),
+
+#             # block4
+#             'qconvbnrelu4_1': QConvBNReLU(self.conv_layers['conv4_1'], self.conv_layers['bn4_1'], qi=False, qo=True,
+#                                           num_bits=num_bits),
+#             'qconvbnrelu4_2': QConvBNReLU(self.conv_layers['conv4_2'], self.conv_layers['bn4_2'], qi=False, qo=True,
+#                                           num_bits=num_bits),
+#             'qconvbnrelu4_3': QConvBNReLU(self.conv_layers['conv4_3'], self.conv_layers['bn4_3'], qi=False, qo=True,
+#                                           num_bits=num_bits),
+#             'qconvbnrelu4_4': QConvBNReLU(self.conv_layers['conv4_4'], self.conv_layers['bn4_4'], qi=False, qo=True,
+#                                           num_bits=num_bits),
+#             'qpool4': QMaxPooling2d(kernel_size=2,stride=2,padding=0),
+
+#             # block5
+#             'qconvbnrelu5_1': QConvBNReLU(self.conv_layers['conv5_1'], self.conv_layers['bn5_1'], qi=False, qo=True,
+#                                           num_bits=num_bits),
+#             'qconvbnrelu5_2': QConvBNReLU(self.conv_layers['conv5_2'], self.conv_layers['bn5_2'], qi=False, qo=True,
+#                                           num_bits=num_bits),
+#             'qconvbnrelu5_3': QConvBNReLU(self.conv_layers['conv5_3'], self.conv_layers['bn5_3'], qi=False, qo=True,
+#                                           num_bits=num_bits),
+#             'qconvbnrelu5_4': QConvBNReLU(self.conv_layers['conv5_4'], self.conv_layers['bn5_4'], qi=False, qo=True,
+#                                           num_bits=num_bits),
+#             'qpool5': QMaxPooling2d(kernel_size=2,stride=2,padding=0)
+#         })
+
+#         self.quantize_fc_layers = nn.ModuleDict({
+#             'qfc1': QLinear(self.fc_layers['fc1'],qi=False,qo=True,num_bits=num_bits),
+#             'qrelu1': QReLU(),
+#             'qdrop1': nn.Dropout(0.5),
+#             'qfc2': QLinear(self.fc_layers['fc2'],qi=False,qo=True,num_bits=num_bits),
+#             'qrelu2': QReLU(),
+#             'qdrop2': nn.Dropout(0.5),
+#             'qfc3':  QLinear(self.fc_layers['fc3'],qi=False,qo=True,num_bits=num_bits)
+#         })
+
+#     def quantize_forward(self, x):
+
+#         for _, layer in self.quantize_conv_layers.items():
+#             x = layer(x)
+
+#         output = x.view(x.size()[0],-1)
+#         for s, layer in self.quantize_fc_layers.items():
+#             # if (s=='qrelu1') == True or (s=='qrelu2')==True:
+#             #     output = nn.Dropout(0.5)
+#             # else:
+#             output = layer(output)
+
+#         out = F.softmax(output, dim=1)  # 这里不softmax也行 影响不大 算loss用
+#         return out
+
+
+#     def freeze(self):
+
+#         self.quantize_conv_layers['qconv1_1'].freeze()
+#         self.quantize_conv_layers['qrelu1_1'].freeze(self.quantize_conv_layers['qconv1_1'].qo)
+#         self.quantize_conv_layers['qconvbnrelu1_1'].freeze(qi=self.quantize_conv_layers['qconv1_1'].qo)
+#         #self.quantize_conv_layers['qconvbnrelu1_1'].freeze(qi=self.quantize_conv_layers['qrelu1_1'].qo)
+#         self.quantize_conv_layers['qpool1'].freeze(self.quantize_conv_layers['qconvbnrelu1_1'].qo)
+
+
+#         self.quantize_conv_layers['qconvbnrelu2_1'].freeze(qi=self.quantize_conv_layers['qconvbnrelu1_1'].qo)
+#         self.quantize_conv_layers['qconvbnrelu2_2'].freeze(qi=self.quantize_conv_layers['qconvbnrelu2_1'].qo)
+#         self.quantize_conv_layers['qpool2'].freeze(self.quantize_conv_layers['qconvbnrelu2_2'].qo)
+
+#         self.quantize_conv_layers['qconvbnrelu3_1'].freeze(qi=self.quantize_conv_layers['qconvbnrelu2_2'].qo)
+#         #self.quantize_conv_layers['qconvbnrelu3_1'].freeze(qi=self.quantize_conv_layers['qpool2'].qo)
+#         self.quantize_conv_layers['qconvbnrelu3_2'].freeze(qi=self.quantize_conv_layers['qconvbnrelu3_1'].qo)
+#         self.quantize_conv_layers['qconvbnrelu3_3'].freeze(qi=self.quantize_conv_layers['qconvbnrelu3_2'].qo)
+#         self.quantize_conv_layers['qconvbnrelu3_4'].freeze(qi=self.quantize_conv_layers['qconvbnrelu3_3'].qo)
+#         self.quantize_conv_layers['qpool3'].freeze(self.quantize_conv_layers['qconvbnrelu3_4'].qo)
+
+#         self.quantize_conv_layers['qconvbnrelu4_1'].freeze(qi=self.quantize_conv_layers['qconvbnrelu3_4'].qo)
+#         #self.quantize_conv_layers['qconvbnrelu4_1'].freeze(qi=self.quantize_conv_layers['qpool3'].qo)
+#         self.quantize_conv_layers['qconvbnrelu4_2'].freeze(qi=self.quantize_conv_layers['qconvbnrelu4_1'].qo)
+#         self.quantize_conv_layers['qconvbnrelu4_3'].freeze(qi=self.quantize_conv_layers['qconvbnrelu4_2'].qo)
+#         self.quantize_conv_layers['qconvbnrelu4_4'].freeze(qi=self.quantize_conv_layers['qconvbnrelu4_3'].qo)
+#         self.quantize_conv_layers['qpool4'].freeze(self.quantize_conv_layers['qconvbnrelu4_4'].qo)
+
+#         self.quantize_conv_layers['qconvbnrelu5_1'].freeze(qi=self.quantize_conv_layers['qconvbnrelu4_4'].qo)
+#         #self.quantize_conv_layers['qconvbnrelu5_1'].freeze(qi=self.quantize_conv_layers['qpool4'].qo)
+#         self.quantize_conv_layers['qconvbnrelu5_2'].freeze(qi=self.quantize_conv_layers['qconvbnrelu5_1'].qo)
+#         self.quantize_conv_layers['qconvbnrelu5_3'].freeze(qi=self.quantize_conv_layers['qconvbnrelu5_2'].qo)
+#         self.quantize_conv_layers['qconvbnrelu5_4'].freeze(qi=self.quantize_conv_layers['qconvbnrelu5_3'].qo)
+#         self.quantize_conv_layers['qpool5'].freeze(self.quantize_conv_layers['qconvbnrelu5_4'].qo)
+
+#         self.quantize_fc_layers['qfc1'].freeze(qi=self.quantize_conv_layers['qconvbnrelu5_4'].qo)
+#         #self.quantize_fc_layers['qfc1'].freeze(qi=self.quantize_conv_layers['qpool5'].qo)
+#         self.quantize_fc_layers['qrelu1'].freeze(self.quantize_fc_layers['qfc1'].qo)
+#         self.quantize_fc_layers['qfc2'].freeze(qi=self.quantize_fc_layers['qfc1'].qo)
+#         #self.quantize_fc_layers['qfc2'].freeze(qi=self.quantize_fc_layers['qrelu1'].qo)
+#         self.quantize_fc_layers['qrelu2'].freeze(self.quantize_fc_layers['qfc2'].qo)
+#         self.quantize_fc_layers['qfc3'].freeze(qi=self.quantize_fc_layers['qfc2'].qo)
+#         #self.quantize_fc_layers['qfc3'].freeze(qi=self.quantize_fc_layers['qrelu2'].qo)
+
+
+
+#     def quantize_inference(self, x):
+#         x = self.quantize_conv_layers['qconv1_1'].qi.quantize_tensor(x)
+
+#         for s, layer in self.quantize_conv_layers.items():
+#             x=layer.quantize_inference(x)
+
+#         output = x.view(x.size()[0], -1)
+
+#         for s, layer in self.quantize_fc_layers.items():
+
+#             # elif (s == 'qrelu1') == True or (s == 'qrelu2') == True:
+#             #     output = nn.Dropout(0.5)
+#             # if (s == 'qdrop1')==True or (s=='qdrop2')==True:
+#             #     output = F.dropout(output,0.45)
+#             # else:
+#             if ((s == 'qdrop1') == False ) and ((s == 'qdrop2') == False):
+#                 output = layer.quantize_inference(output)
+#             else:
+#                 output = output
+
+
+#         output = self.quantize_fc_layers['qfc3'].qo.dequantize_tensor(output)
+
+#         out = F.softmax(output, dim=1)  # 这里应该用 Qsoftmax可能好些 之后改改
+#         return out
+
+
+class LeNet(nn.Module):
+    # CONV FLOPs: 考虑bias:（2 * C_in * K_h * K_w )* H_out * W_out * C_out
+    #             不考虑bias: （2 * C_in * K_h * K_w -1)* H_out * W_out * C_out
+    # FCN FLOPs:  考虑bias: （2 * I ）* O
+    #             不考虑bias: (2 * I - 1） * O
+    def __init__(self, img_size=32, input_channel=3, num_class=10, n_exp=4, mode=1):
+        super().__init__()
+        self.conv_layers = nn.ModuleDict({
+        # block1
+        'conv1': nn.Conv2d(3,6,5),  # (2*3*5*5) * 32*32*6  (bias占其中的32*32*6)  6144/921600
+        'reluc1': nn.ReLU(),
+        'pool1': nn.MaxPool2d(2,2),
+
+        # block2
+        'conv2': nn.Conv2d(6,16,5), # (2*6*5*5) * 16*16*16  (bias占其中的16*16*6) 1536/1228800
+        'reluc2': nn.ReLU(),
+        'pool2': nn.MaxPool2d(2,2),
+        })
+
+        self.fc_layers = nn.ModuleDict({
+            # classifier
+            'fc1': nn.Linear(16*5*5,120), # (2*16*5*5)*120 (bias占其中的120)  120/96000
+            'reluf1': nn.ReLU(),
+            'fc2': nn.Linear(120,84), # (2*120)*84  (bias占其中的84)  84/2016
+            'reluf2': nn.ReLU(),
+            'fc3': nn.Linear(84, num_class)
+        })
+        self.mode = mode
+        self.n_exp = n_exp
+
+    def forward(self,x):
+
+        for _,layer in self.conv_layers.items():
+            x = layer(x)
+
+        output = x.view(-1,16*5*5)
+        for _,layer in self.fc_layers.items():
+            output = layer(output)
+
+        out = F.softmax(output,dim = 1)         # 这里不softmax也行 影响不大
+        return out
+
+    def quantize(self, num_bits=8):
+
+        self.quantize_conv_layers=nn.ModuleDict({
+            # qi=true: 前一层输出的结果是没有量化过的，需要量化。 maxpool和relu都不会影响INT和minmax，所以在这俩之后的层的pi是false
+            #若前一层是conv，数据minmax被改变，则需要qi=true来量化
+            'qconv1': QConv2d(self.conv_layers['conv1'], qi=True, qo=True, num_bits=num_bits, n_exp=self.n_exp, mode=self.mode),
+            'qreluc1': QReLU(n_exp=self.n_exp, mode=self.mode),
+            'qpool1': QMaxPooling2d(kernel_size=2,stride=2,padding=0, n_exp=self.n_exp, mode=self.mode),
+            'qconv2': QConv2d(self.conv_layers['conv2'], qi=False, qo=True, num_bits=num_bits, n_exp=self.n_exp, mode=self.mode),
+            'qreluc2': QReLU(n_exp=self.n_exp, mode=self.mode),
+            'qpool2': QMaxPooling2d(kernel_size=2, stride=2, padding=0, n_exp=self.n_exp, mode=self.mode)
+        })
+
+        self.quantize_fc_layers = nn.ModuleDict({
+            'qfc1': QLinear(self.fc_layers['fc1'],qi=False,qo=True,num_bits=num_bits, n_exp=self.n_exp, mode=self.mode),
+            'qreluf1': QReLU(n_exp=self.n_exp, mode=self.mode),
+            'qfc2': QLinear(self.fc_layers['fc2'],qi=False,qo=True,num_bits=num_bits, n_exp=self.n_exp, mode=self.mode),
+            'qreluf2': QReLU(n_exp=self.n_exp, mode=self.mode),
+            'qfc3':  QLinear(self.fc_layers['fc3'],qi=False,qo=True,num_bits=num_bits, n_exp=self.n_exp, mode=self.mode)
+        })
+
+    def quantize_forward(self, x):
+
+        for _, layer in self.quantize_conv_layers.items():
+            x = layer(x)
+
+        output = x.view(-1,16*5*5)
+        for s, layer in self.quantize_fc_layers.items():
+            output = layer(output)
+
+        out = F.softmax(output, dim=1)  # 这里不softmax也行 影响不大 算loss用
+        return out
+
+
+    def freeze(self):
+
+        self.quantize_conv_layers['qconv1'].freeze()
+        self.quantize_conv_layers['qreluc1'].freeze(self.quantize_conv_layers['qconv1'].qo)
+        self.quantize_conv_layers['qpool1'].freeze(self.quantize_conv_layers['qconv1'].qo)
+
+        self.quantize_conv_layers['qconv2'].freeze(self.quantize_conv_layers['qconv1'].qo)
+        self.quantize_conv_layers['qreluc2'].freeze(self.quantize_conv_layers['qconv2'].qo)
+        self.quantize_conv_layers['qpool2'].freeze(self.quantize_conv_layers['qconv2'].qo)
+
+        self.quantize_fc_layers['qfc1'].freeze(qi=self.quantize_conv_layers['qconv2'].qo)
+        self.quantize_fc_layers['qreluf1'].freeze(self.quantize_fc_layers['qfc1'].qo)
+        self.quantize_fc_layers['qfc2'].freeze(qi=self.quantize_fc_layers['qfc1'].qo)
+        self.quantize_fc_layers['qreluf2'].freeze(self.quantize_fc_layers['qfc2'].qo)
+        self.quantize_fc_layers['qfc3'].freeze(qi=self.quantize_fc_layers['qfc2'].qo)
+
+    def fakefreeze(self):
+        self.quantize_conv_layers['qconv1'].fakefreeze()
+        self.quantize_conv_layers['qreluc1'].fakefreeze(self.quantize_conv_layers['qconv1'].qo)
+        self.quantize_conv_layers['qpool1'].fakefreeze(self.quantize_conv_layers['qconv1'].qo)
+
+        self.quantize_conv_layers['qconv2'].fakefreeze(self.quantize_conv_layers['qconv1'].qo)
+        self.quantize_conv_layers['qreluc2'].fakefreeze(self.quantize_conv_layers['qconv2'].qo)
+        self.quantize_conv_layers['qpool2'].fakefreeze(self.quantize_conv_layers['qconv2'].qo)
+
+        self.quantize_fc_layers['qfc1'].fakefreeze(qi=self.quantize_conv_layers['qconv2'].qo)
+        self.quantize_fc_layers['qreluf1'].fakefreeze(self.quantize_fc_layers['qfc1'].qo)
+        self.quantize_fc_layers['qfc2'].fakefreeze(qi=self.quantize_fc_layers['qfc1'].qo)
+        self.quantize_fc_layers['qreluf2'].fakefreeze(self.quantize_fc_layers['qfc2'].qo)
+        self.quantize_fc_layers['qfc3'].fakefreeze(qi=self.quantize_fc_layers['qfc2'].qo)
+
+    def quantize_inference(self, x):
+        x = self.quantize_conv_layers['qconv1'].qi.quantize_tensor(x,  self.mode)
+
+        for s, layer in self.quantize_conv_layers.items():
+            x = layer.quantize_inference(x)
+
+        output = x.view( -1,16*5*5)
+
+        for s, layer in self.quantize_fc_layers.items():
+            output = layer.quantize_inference(output)
+
+        # 只有mode1需要出现范围映射，将量化后的数据恢复到原数据相似的范围，PoT无需，其自带恢复性
+        if self.mode == 1:
+            output = self.quantize_fc_layers['qfc3'].qo.dequantize_tensor(output, self.mode)
+
+        out = F.softmax(output, dim=1)  # 这里应该用 Qsoftmax可能好些 之后改改
+        return out
+
+
+class Net(nn.Module):
+
+    def __init__(self, num_channels=1):
+        super(Net, self).__init__()
+        # self.conv1 = nn.Conv2d(num_channels, 40, 3, 1)
+        # self.conv2 = nn.Conv2d(40, 40, 3, 1, groups=20)
+        # self.fc = nn.Linear(5*5*40, 10)
+
+
+
+    def forward(self, x):
+        x = F.relu(self.conv1(x))
+        x = F.max_pool2d(x, 2, 2)
+        x = F.relu(self.conv2(x))
+        x = F.max_pool2d(x, 2, 2)
+        x = x.view(-1, 5*5*40)  # 重新定义矩阵的形状
+        x = self.fc(x)
+        return x
+
+    # 对模型进行初步的量化 （此处还没开始训练量化模型） 对于量化参数的确定是一个预先的过程（之后对量化模型的训练是fine tune）
+    def quantize(self, num_bits=8):
+        # 这里仅第一个qi=True，因为在forward的时候除了pool和relu外，每层最后都会根据qo调整一下x，完成量化再恢复的工作，所以x实际上是保持着最新版本的量化再恢复，只有weight需要在各个层不断调整量化再恢复的情况。
+        self.qconv1 = QConv2d(self.conv1, qi=True, qo=True, num_bits=num_bits)
+        self.qrelu1 = QReLU()
+        self.qmaxpool2d_1 = QMaxPooling2d(kernel_size=2, stride=2, padding=0)
+        self.qconv2 = QConv2d(self.conv2, qi=False, qo=True, num_bits=num_bits) #qi=False的含义是无需在这层换量化参数scale,zeropoint
+        self.qrelu2 = QReLU()
+        self.qmaxpool2d_2 = QMaxPooling2d(kernel_size=2, stride=2, padding=0)
+        self.qfc = QLinear(self.fc, qi=False, qo=True, num_bits=num_bits)
+
+    # 训练量化模型时的forward函数
+    def quantize_forward(self, x):
+        x = self.qconv1(x)
+        x = self.qrelu1(x)
+        x = self.qmaxpool2d_1(x)
+        x = self.qconv2(x)
+        x = self.qrelu2(x)
+        x = self.qmaxpool2d_2(x)
+        x = x.view(-1, 5*5*40)
+        x = self.qfc(x)
+        return x
+
+  # 量化模型训练完之后把参数固定住 目的是规定当前层在推断的时候使用什么样的量化参数，量化参数决定了映射，量化后恢复的结果
+  # 考虑各个层最后对qo的更新，maxpool，relu，drop不会更新qo，conv会，因此只有出现conv后需要改下Qpram，其他的继承下去就行了（层与层输出与输入相连的x自带了这种继承关系，而Qpram的需要看情况（即，x是否可能有max，min范围的突破）来决定是否要更新）
+    def freeze(self):
+        self.qconv1.freeze()
+        self.qrelu1.freeze(self.qconv1.qo)  # 就是作为qi 带conv的层后面需要用新的 (qo总是会在训练过程被更新的，因为有不一样的x和模型参数,是在Q... layer的forward过程中不断更新的,min,max是一个全局的统计效果，考虑到是fine tunning 一开始的minmax也不会太离谱)
+        self.qmaxpool2d_1.freeze(self.qconv1.qo)
+        self.qconv2.freeze(qi=self.qconv1.qo)
+        self.qrelu2.freeze(self.qconv2.qo)  # relu和maxpool对Qpram不具备改变能力(因为min，max的统计是全局性质的，relu和poolmax对min和max都没有影响)
+        self.qmaxpool2d_2.freeze(self.qconv2.qo)
+        self.qfc.freeze(qi=self.qconv2.qo)
+
+   # 固定住量化模型参数后的推理  FP32入 过程中量化  FP32出
+    def quantize_inference(self, x):
+        qx = self.qconv1.qi.quantize_tensor(x)
+        qx = self.qconv1.quantize_inference(qx)
+        qx = self.qrelu1.quantize_inference(qx)
+        qx = self.qmaxpool2d_1.quantize_inference(qx)
+        qx = self.qconv2.quantize_inference(qx)
+        qx = self.qrelu2.quantize_inference(qx)
+        qx = self.qmaxpool2d_2.quantize_inference(qx)
+        qx = qx.view(-1, 5*5*40)
+        qx = self.qfc.quantize_inference(qx)
+        out = self.qfc.qo.dequantize_tensor(qx)
+        return out
+
+
+class NetBN(nn.Module):
+
+    def __init__(self, num_channels=1):
+        super(NetBN, self).__init__()
+        self.conv1 = nn.Conv2d(num_channels, 40, 3, 1)
+        self.bn1 = nn.BatchNorm2d(40)
+        self.conv2 = nn.Conv2d(40, 40, 3, 1)
+        self.bn2 = nn.BatchNorm2d(40)
+        self.fc = nn.Linear(5 * 5 * 40, 10)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = F.relu(x)
+        x = F.max_pool2d(x, 2, 2)
+        x = self.conv2(x)
+        x = self.bn2(x)
+        x = F.relu(x)
+        x = F.max_pool2d(x, 2, 2)
+        x = x.view(-1, 5 * 5 * 40)
+        x = self.fc(x)
+        return x
+
+    def quantize(self, num_bits=8):
+        self.qconv1 = QConvBNReLU(self.conv1, self.bn1, qi=True, qo=True, num_bits=num_bits)
+        self.qmaxpool2d_1 = QMaxPooling2d(kernel_size=2, stride=2, padding=0)
+        self.qconv2 = QConvBNReLU(self.conv2, self.bn2, qi=False, qo=True, num_bits=num_bits)
+        self.qmaxpool2d_2 = QMaxPooling2d(kernel_size=2, stride=2, padding=0)
+        self.qfc = QLinear(self.fc, qi=False, qo=True, num_bits=num_bits)
+
+    def quantize_forward(self, x):
+        x = self.qconv1(x)
+        x = self.qmaxpool2d_1(x)
+        x = self.qconv2(x)
+        x = self.qmaxpool2d_2(x)
+        x = x.view(-1, 5*5*40)
+        x = self.qfc(x)
+        return x
+
+    def freeze(self):
+        self.qconv1.freeze()
+        self.qmaxpool2d_1.freeze(self.qconv1.qo)
+        self.qconv2.freeze(qi=self.qconv1.qo)  # 因为maxpool不会改变min，max
+        self.qmaxpool2d_2.freeze(self.qconv2.qo)
+        self.qfc.freeze(qi=self.qconv2.qo)  # 因为maxpool不会改变min，max
+
+    def quantize_inference(self, x):
+        qx = self.qconv1.qi.quantize_tensor(x)
+        qx = self.qconv1.quantize_inference(qx)
+        qx = self.qmaxpool2d_1.quantize_inference(qx)
+        qx = self.qconv2.quantize_inference(qx)
+        qx = self.qmaxpool2d_2.quantize_inference(qx)
+        qx = qx.view(-1, 5*5*40)
+
+        qx = self.qfc.quantize_inference(qx)
+        
+        out = self.qfc.qo.dequantize_tensor(qx)   # INT -> FP
+        return out
--- a/mzh/new_train.py
+++ b/mzh/new_train.py
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import DataLoader
+from get_weight import *
+from torch.utils.tensorboard import SummaryWriter
+from torchvision import datasets, transforms
+from torchvision.datasets import CIFAR10
+
+from resnet import *
+from torchvision.transforms import transforms
+# import models
+
+import time
+import os
+
+
+import argparse
+# 定义模型
+
+
+class Net(nn.Module):
+    def __init__(self):
+        super(Net, self).__init__()
+        self.conv1 = nn.Conv2d(3, 32, kernel_size=3)
+        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
+        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
+        self.fc1 = nn.Linear(64 * 6 * 6, 512)
+        self.fc2 = nn.Linear(512, 10)
+        self.relu = nn.ReLU()
+
+    def forward(self, x):
+        x = self.relu(self.conv1(x))
+        x = self.pool(x)
+        x = self.relu(self.conv2(x))
+        x = self.pool(x)
+        x = torch.flatten(x, start_dim=1)
+        x = self.relu(self.fc1(x))
+        x = self.fc2(x)
+        return x
+
+
+def train(model, optimizer, criterion, train_loader, device):
+    
+    model.train()
+
+    running_loss = 0.0
+
+    flag = 0
+    cnt = 0
+
+    for i, data in enumerate(train_loader):
+        inputs, labels = data
+        inputs, labels = inputs.to(device), labels.to(device)
+
+        optimizer.zero_grad()
+
+        outputs = model(inputs)
+        loss = criterion(outputs, labels)
+        loss.backward()
+
+        histo, grads = (get_model_histogram(model))
+        if flag == 0:
+            flag = 1
+            grads_sum = grads
+        else:
+            for k,v in grads_sum.items():
+                grads_sum[k] += grads[k]
+
+
+        optimizer.step()
+
+        running_loss += loss.item()
+
+    train_loss = running_loss / len(train_loader)
+
+    for k, v in grads_sum.items():
+        grads_sum[k] = v / len(train_loader)
+
+    return train_loss,grads_sum
+
+
+def evaluate(model, criterion, test_loader, device):
+    model.eval()
+
+    correct, total = 0, 0
+
+    with torch.no_grad():
+        for data in test_loader:
+            images, labels = data
+            images, labels = images.to(device), labels.to(device)
+
+            outputs = model(images)
+            _, predicted = torch.max(outputs.data, 1)
+            total += labels.size(0)
+            correct += (predicted == labels).sum().item()
+
+    accuracy = 100 * correct / total
+    return accuracy
+
+
+def get_children(model: torch.nn.Module):
+    # get children form model!
+    # 为了后续也能够更新参数，需要用nn.ModuleList来承载
+
+    children = nn.ModuleList(model.children())
+    # print(children)
+    # 方便对其中的module进行后续的更新 
+    flatt_children = nn.ModuleList()  
+
+    # children = list(model.children())
+    # flatt_children = nn.ModuleList()  
+    # flatt_children = []
+    if len(children) == 0:
+        # if model has no children; model is last child! :O
+        return model
+    else:
+       # look for children from children... to the last child!
+       for child in children:
+            try:
+                flatt_children.extend(get_children(child))
+            except TypeError:
+                flatt_children.append(get_children(child))
+
+    # print(flatt_children)
+    return flatt_children
+
+
+if __name__ == "__main__":
+    # torch.cuda.empty_cache()
+
+    parser = argparse.ArgumentParser(description='PyTorch FP32 Training')
+    parser.add_argument('-m', '--model', metavar='MODEL ARCH', default='resnet18')
+    parser.add_argument('-e','--epochs', default=100, type=int, metavar='EPOCHS', help='number of total epochs to run')
+    parser.add_argument('-b', '--batch_size', default=128, type=int, metavar='BATCH SIZE', help='mini-batch size (default: 128)')
+    parser.add_argument('-j','--workers', default=4, type=int, metavar='WORKERS',help='number of data loading workers (default: 4)')
+    parser.add_argument('-lr', '--learning-rate', default=0.001, type=float, metavar='LR', help='initial learning rate', dest='lr')
+    parser.add_argument('-t', '--test', dest='test', action='store_true', help='test model on test set')
+
+    # models = ['resnet18', 'resnet50', 'resnet152','resnet18']
+
+
+
+    # 训练参数
+    args = parser.parse_args()
+
+    num_epochs = args.epochs
+    print(num_epochs)
+    batch_size = args.batch_size
+    print(batch_size)
+    num_workers = args.workers
+    lr = args.lr
+
+    best_acc = float("-inf")
+
+    start_time = time.time()
+
+    # 模型、损失函数和优化器
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # 加入设备选择
+    print(device)
+
+
+    # model = Net().to(device)  # 将模型移动到 device 上
+    # model = resnet18().to(device)
+    # model = models.__dict__[args.model]().to(device)
+
+    # t = torch.cuda.get_device_properties(0).total_memory
+    # r = torch.cuda.memory_reserved(0)
+    # a = torch.cuda.memory_allocated(0)
+    # f = r-a  # free memory
+    # print(f"Total memory: {t}")
+    # print(f"Reserved memory: {r}")
+    # print(f"Allocated memory: {a}")
+    # print(f"Free memory: {f}")
+
+    if args.model == 'resnet18' :
+        model = resnet18().to(device)
+    elif args.model == 'resnet50' :
+        model = resnet50().to(device)
+    elif args.model == 'resnet152' :
+        model = resnet152().to(device)
+
+
+    criterion = nn.CrossEntropyLoss()
+    optimizer = optim.Adam(model.parameters(), lr=lr)
+    print("ok!")
+
+# 数据并行
+    if torch.cuda.device_count() > 1:
+        print(f"Using {torch.cuda.device_count()} GPUs")
+        model = nn.DataParallel(model)
+        
+    
+
+  
+
+    # 加载数据
+
+    train_loader = torch.utils.data.DataLoader(
+    datasets.CIFAR10('./project/p/data', train=True, download=False,
+                     transform=transforms.Compose([
+                         transforms.ToTensor(),
+                         transforms.Normalize(
+                             (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
+                     ])),
+     batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True
+    )
+
+    test_loader = torch.utils.data.DataLoader(
+    datasets.CIFAR10('./project/p/data', train=False, download=False, transform=transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize((0.4914, 0.4822, 0.4465),
+                             (0.2023, 0.1994, 0.2010))
+    ])),
+    batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True
+    )
+
+# train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
+# test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
+
+    # 学习率调度器
+    lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
+
+    # TensorBoard
+
+    # WARN
+    # writer = SummaryWriter(log_dir='./project/p/models_log/trail/full_log')
+    writer = SummaryWriter(log_dir='./project/p/models_log/' + args.model  +  '/full_log')
+
+    # Early Stopping 参数
+    patience = 5
+    count = 0
+    # WARN
+    # save_dir = './project/p/ckpt/trail'
+    save_dir = './project/p/ckpt/' + args.model
+    if not os.path.isdir(save_dir):
+        os.makedirs(save_dir, mode=0o777)
+        os.chmod(save_dir, mode=0o777)
+
+    # checkpoint_dir = './project/p/checkpoint/cifar-10_trail_model'
+    checkpoint_dir = './project/p/checkpoint/cifar-10_' + args.model
+    if not os.path.isdir(checkpoint_dir):
+        os.makedirs(checkpoint_dir, mode=0o777)
+        os.chmod(checkpoint_dir, mode=0o777)
+
+
+    # 训练循环
+
+    if args.test == True:
+       
+       model.load_state_dict(torch.load(save_dir+'/' + args.model + '.pt'))
+       acc = evaluate(model, criterion, test_loader, device=device)
+       print(f"test accuracy: {acc:.2f}%")
+
+
+       for name, module in model.named_modules():
+           print(f"{name}: {module}\n")
+            
+        
+
+       print('========================================================') 
+       print('========================================================')      
+ 
+       model.quantize()
+       for name , layer in model.quantize_layers.items():
+            print(f"Layer {name}: {layer} ")  # 足够遍历了
+
+   
+
+
+                
+            
+           
+       
+
+
+    else:
+        for epoch in range(num_epochs):
+        # 训练模型并记录 loss
+            train_loss,grads_sum = train(model, optimizer, criterion,
+                       train_loader, device=device)
+            writer.add_scalar("Training Loss", train_loss, epoch + 1)
+
+            # 评估模型并记录 accuracy
+            if (epoch + 1) % 5 == 0:
+                acc = evaluate(model, criterion, test_loader, device=device)
+                writer.add_scalar("Validation Accuracy", acc, epoch + 1)
+
+
+                checkpoint = {
+                'model': model.state_dict(),
+                'optimizer': optimizer.state_dict(),
+                'epoch': epoch,
+                'grads': grads_sum,
+                'accuracy':acc
+                }
+                # for name, param in model.named_parameters():
+                #     writer.add_histogram(tag=name + '_grad', values=param.grad, global_step=epoch)
+                #     writer.add_histogram(tag=name + '_data', values=param.data, global_step=epoch)
+                for name, param in grads_sum.items():
+                    # 此处的grad是累加值吧 不是平均值
+                    writer.add_histogram(tag=name + '_grad', values=param, global_step=epoch)
+                    # 取这个epoch最后一个batch算完之后的weight
+                for name, param in model.named_parameters():
+                    writer.add_histogram(tag=name + '_data', values=param.data, global_step=epoch)
+
+                # WARN
+                # torch.save(checkpoint, checkpoint_dir + '/ckpt_cifar-10_trail_model%s.pt' % (str(epoch+1)))
+                torch.save(checkpoint, checkpoint_dir + '/ckpt_cifar-10_' + args.model + '_%s.pt' % (str(epoch+1)))
+
+            # 存储最好的模型
+                if acc > best_acc:
+                    best_acc = acc
+                    count = 0
+                    # WARN
+                    # torch.save(model.state_dict(), save_dir+'/model_trail.pt')
+                    torch.save(model.state_dict(), save_dir+'/'  + args.model  +  '.pt')
+                else:
+                    count += 1
+
+                print(
+                    f"Epoch {epoch + 1}/{num_epochs}, Train Loss: {train_loss:.5f}, Val Acc: {acc:.2f}%")
+
+            # 判断是否需要 early stopping
+            if count == patience:
+                print(f"No improvement after {patience} epochs. Early stop!")
+                break
+
+            # 更新学习率
+            lr_scheduler.step()
+
+        # 训练用时和最佳验证集准确率
+        print(f"Training took {(time.time() - start_time) / 60:.2f} minutes")
+        print(f"Best validation accuracy: {best_acc:.2f}%")
+
+        # 加载并测试最佳模型
+        # model.load_state_dict(torch.load("best_model.pth"))
+        # model.to(device)
+
+        # test_acc = evaluate(model, criterion, test_loader, device="cuda")
+        # print(f"Test Accuracy: {test_acc:.2f}%")
+
+        # 关闭 TensorBoard 写入器
+        writer.close()
+
+
--- a/mzh/post_training_fakefreeze.py
+++ b/mzh/post_training_fakefreeze.py
+# -*- coding: utf-8 -*-
+from torch.serialization import load
+from model import *
+
+import argparse
+import torch
+import sys
+import torch.nn as nn
+import torch.optim as optim
+from torchvision import datasets, transforms
+import os
+import os.path as osp
+from torch.utils.tensorboard import SummaryWriter
+
+
+ # 为了得到PTQ的权重数据的伪量化版 （先quantize再dequantize，与full precision的权重数据分布相似，便于用wasserstein距离求相似度）
+
+def direct_quantize(model, test_loader, device):
+    for i, (data, target) in enumerate(test_loader, 1):
+        data, target = data.to(device), target.to(device)
+        output = model.quantize_forward(data)  # 这里会依次调用model中各个层的forward，则会update qw
+        if i % 5000 == 0:
+            break
+    print('direct quantization finish')
+
+
+def full_inference(model, test_loader, device):
+    correct = 0
+    for i, (data, target) in enumerate(test_loader, 1):
+        data, target = data.to(device), target.to(device)
+        output = model(data)
+        pred = output.argmax(dim=1, keepdim=True)
+        correct += pred.eq(target.view_as(pred)).sum().item()
+    print('\nTest set: Full Model Accuracy: {:.4f}%\n'.format(100. * correct / len(test_loader.dataset)))
+
+
+def quantize_inference(model, test_loader, device):
+    correct = 0
+    for i, (data, target) in enumerate(test_loader, 1):
+        data, target = data.to(device), target.to(device)
+        output = model.quantize_inference(data)
+        pred = output.argmax(dim=1, keepdim=True)
+        correct += pred.eq(target.view_as(pred)).sum().item()
+    acc = 100. * correct / len(test_loader.dataset)
+    print('\nTest set: Quant Model Accuracy: {:.4f}%\n'.format(acc))
+    return acc
+
+
+if __name__ == "__main__":
+    d1 = sys.argv[1]
+    batch_size = 32
+    using_bn = True
+    load_quant_model_file = None
+    # load_model_file = None
+    net = 'LeNet'  # 1:
+    acc = 0
+
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    print(device)
+
+    train_loader = torch.utils.data.DataLoader(
+        datasets.CIFAR10('data', train=True, download=True,
+                         transform=transforms.Compose([
+                             transforms.ToTensor(),
+                             transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
+                         ])),
+        batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=False
+    )
+
+    test_loader = torch.utils.data.DataLoader(
+        datasets.CIFAR10('data', train=False, transform=transforms.Compose([
+            transforms.ToTensor(),
+            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
+        ])),
+        batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=False
+    )
+
+    if using_bn:
+        model = LeNet().to(device)
+        # 生成梯度分布图的时候是从0开始训练的
+        model.load_state_dict(torch.load('ckpt/cifar-10_lenet_bn.pt', map_location='cpu'))
+    # else:
+    #     model = Net()
+    #     model.load_state_dict(torch.load('ckpt/mnist_cnn.pt', map_location='cpu'))
+    #     save_file = "ckpt/mnist_cnn_ptq.pt"
+    # model.to(device)
+    model.eval()
+    full_inference(model, test_loader, device)
+
+
+
+    num_bits = int(d1)
+    model.quantize(num_bits=num_bits)
+    model.eval()
+    print('Quantization bit: %d' % num_bits)
+
+    dir_name = './ptq_fake_log/' + 'quant_bit_' + str(d1) + '_log'
+    if not os.path.isdir(dir_name):
+        os.makedirs(dir_name, mode=0o777)
+        os.chmod(dir_name, mode=0o777)
+
+    qwriter = SummaryWriter(log_dir=dir_name)
+    # for name, param in model.named_parameters():
+    #     qwriter.add_histogram(tag=name + '_data', values=param.data)
+
+    if load_quant_model_file is not None:
+        model.load_state_dict(torch.load(load_quant_model_file))
+        print("Successfully load quantized model %s" % load_quant_model_file)
+
+    direct_quantize(model, train_loader, device)
+
+    model.fakefreeze()  # 权重量化
+
+    for name, param in model.named_parameters():
+        qwriter.add_histogram(tag=name + '_data', values=param.data)
+
+
+    dir_name ='ckpt/ptq_fakefreeze'
+    if not os.path.isdir(dir_name):
+        os.makedirs(dir_name, mode=0o777)
+        os.chmod(dir_name, mode=0o777)
+
+    save_file = 'ckpt/ptq_fakefreeze/cifar-10_lenet_bn_ptq_' + str(d1) + '_.pt'
+    torch.save(model.state_dict(), save_file)
+
+
+
+
+
+
+
--- a/mzh/quantization_aware_training.py
+++ b/mzh/quantization_aware_training.py
+# -*- coding: utf-8 -*-
+from model import *
+
+# from easydict import EasyDict
+# from cleverhans.torch.attacks.fast_gradient_method import fast_gradient_method
+# from cleverhans.torch.attacks.projected_gradient_descent import (
+#     projected_gradient_descent,
+# )
+
+
+import argparse
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torchvision import datasets, transforms
+from torchvision import datasets, transforms
+import os
+import os.path as osp
+import sys
+
+import time
+
+# import matplotlib.pyplot as plt
+# import matplotlib
+
+# sys.path.append("./project/p")
+from get_weight import *
+from torch.utils.tensorboard import SummaryWriter
+
+
+
+def quantize_aware_training(model, device, train_loader, optimizer, epoch):
+    lossLayer = torch.nn.CrossEntropyLoss()
+    flag = 0
+    cnt = 0
+    losses=[]
+    for batch_idx, (data, target) in enumerate(train_loader, 1):
+        data, target = data.to(device), target.to(device)
+        optimizer.zero_grad()
+        output = model.quantize_forward(data)  # 各个layer的forward
+        loss = lossLayer(output, target)  #此处loss与layers联系起来
+        loss.backward()
+        # cnt = cnt + 1
+        losses.append(loss)
+
+        histo, grads = (get_model_histogram(model))
+        if flag == 0:
+            flag = 1
+            grads_sum = grads
+        # 对一个epoch的每个batch的梯度求和
+        else:
+            for k, v in grads_sum.items():
+                grads_sum[k] += grads[k]
+                #print(k)
+
+        optimizer.step()
+
+        if batch_idx % 50 == 0:
+            print('Quantize Aware Training Epoch: {} [{}/{}]\tLoss: {:.6f}'.format(
+                epoch, batch_idx * len(data), len(train_loader.dataset), loss.item()
+            ))
+
+
+    # print(grad_sum['conv_layers.conv1.weight'])
+    # sys.exit(0)
+
+    # print('batch_idx: ' +str(batch_idx))
+    # print('cnt: ' + str(cnt))
+
+    # 一个epoch的平均梯度
+    for k, v in grads_sum.items():
+        grads_sum[k] = v/len(train_loader.dataset)
+
+    return grads_sum,losses
+    #
+    # print(grads_sum)
+    #
+    # histo = get_grad_histogram(grads_sum)
+    #
+    # for s,_ in grads_sum.items():
+    #     data = histo[s]
+    #     bins = data['bins']
+    #     histogram = data['histogram']
+    #     max_idx = np.argmax(histogram)
+    #     min_idx = np.argmin(histogram)
+    #     width = abs(bins[max_idx] - bins[min_idx])
+    #
+    #     plt.figure(figsize=(9, 6))
+    #     plt.bar(bins[:-1], histogram, width=width)
+    #     #plt.show()
+    #
+    #     plt.savefig('diff_fig/int'+ sys.argv[1] + '/' + s +'.jpg')
+    #
+    # np.save('diff_fig/int' + sys.argv[1] + '/grads_sum.npy', grads_sum)
+    # sys.exit(0)
+
+
+def full_inference(model, test_loader):
+    correct = 0
+    # report = EasyDict(nb_test=0, correct=0, correct_fgm=0, correct_pgd=0)
+    for i, (data, target) in enumerate(test_loader, 1):
+        data, target = data.to(device), target.to(device)
+        with torch.no_grad():
+            output = model(data)
+        pred = output.argmax(dim=1, keepdim=True)
+        correct += pred.eq(target.view_as(pred)).sum().item()
+
+        #x_fgm = fast_gradient_method(model, data, 0.01, np.inf)
+        #x_pgd = projected_gradient_descent(model, data, 0.01, 0.01, 40, np.inf)
+        # model prediction on clean examples
+        # _, y_pred = model(data).max(1)
+
+        # model prediction on FGM adversarial examples
+        #_, y_pred_fgm = model(x_fgm).max(1)
+
+        # model prediction on PGD adversarial examples
+        #_, y_pred_pgd = model(x_pgd).max(1)
+
+        # report.nb_test += target.size(0)
+        # report.correct += y_pred.eq(target).sum().item()
+        #report.correct_fgm += y_pred_fgm.eq(target).sum().item()
+        #report.correct_pgd += y_pred_pgd.eq(target).sum().item()
+
+    print('\nTest set: Full Model Accuracy: {:.0f}%\n'.format(100. * correct / len(test_loader.dataset)))
+    # print('\nTest set: Full Model Accuracy:')
+    # print(
+    #     "test acc on clean examples (%): {:.3f}".format(
+    #         report.correct / report.nb_test * 100.0
+    #     )
+    # )
+    # print(
+    #     "test acc on FGM adversarial examples (%): {:.3f}".format(
+    #         report.correct_fgm / report.nb_test * 100.0
+    #     )
+    # )
+    # print(
+    #     "test acc on PGD adversarial examples (%): {:.3f}".format(
+    #         report.correct_pgd / report.nb_test * 100.0
+    #     )
+    # )
+    print('============================================')
+
+def quantize_inference(model, test_loader):
+    correct = 0
+    acc=0
+    # report = EasyDict(nb_test=0, correct=0, correct_fgm=0, correct_pgd=0)
+    for i, (data, target) in enumerate(test_loader, 1):
+
+        data, target = data.to(device), target.to(device)
+        output = model.quantize_inference(data)
+        pred = output.argmax(dim=1, keepdim=True)
+        correct += pred.eq(target.view_as(pred)).sum().item()
+        acc = 100. * correct / len(test_loader.dataset)
+    print('\nTest set: Quant Model Accuracy: {:.0f}%\n'.format(acc))
+        # data, target = data.to(device), target.to(device)
+        # x_fgm = fast_gradient_method(model, data, 0.01, np.inf)
+        # x_pgd = projected_gradient_descent(model, data, 0.01, 0.01, 40, np.inf)
+        # model prediction on clean examples
+        # _, y_pred = model.quantize_inference(data).max(1)
+
+          # model prediction on FGM adversarial examples
+        # _, y_pred_fgm = model.quantize_inference(x_fgm).max(1)
+
+          # model prediction on PGD adversarial examples
+        #_, y_pred_pgd = model.quantize_inference(x_pgd).max(1)
+
+        # report.nb_test += target.size(0)
+        # report.correct += y_pred.eq(target).sum().item()
+        # report.correct_fgm += y_pred_fgm.eq(target).sum().item()
+        # report.correct_pgd += y_pred_pgd.eq(target).sum().item()
+    #     acc = report.correct / report.nb_test * 100.0
+    # print(
+    #     "test acc on clean examples (%): {:.3f}".format(acc
+
+    #     )
+    # )
+    # print(
+    #     "test acc on FGM adversarial examples (%): {:.3f}".format(
+    #         report.correct_fgm / report.nb_test * 100.0
+    #     )
+    # )
+    # print(
+    #     "test acc on PGD adversarial examples (%): {:.3f}".format(
+    #         report.correct_pgd / report.nb_test * 100.0
+    #     )
+    # )
+
+    return acc
+
+
+if __name__ == "__main__":
+    # d1=20
+    # d2=5
+    d1 = sys.argv[1]  # num_bits
+    d2 = sys.argv[2]  # epochs
+    d3 = sys.argv[3]  # mode
+    d4 = sys.argv[4]  # n_exp
+
+    batch_size = 32
+    test_batch_size = 32  # test的与train的batch_size相等才更合理点吧 有batch norm
+    seed = 1
+    epochs = int(d2)
+
+    lr = 0.001 # 1%*0.01
+    momentum = 0.5
+    net = 'LeNet' # 1:
+    acc=0
+    using_bn = True
+    load_quant_model_file = None
+#     load_quant_model_file = "ckpt/mnist_cnnbn_qat.pt"
+
+    torch.manual_seed(seed)
+
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    print(device)
+
+    # datasets.imagenet
+    train_loader = torch.utils.data.DataLoader(
+        datasets.CIFAR10('./project/p/data', train=True, download=False,
+                       transform=transforms.Compose([
+                            transforms.ToTensor(),
+                            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
+                       ])),
+        batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=False
+    )
+
+    test_loader = torch.utils.data.DataLoader(
+        datasets.CIFAR10('./project/p/data', train=False, download=False,transform=transforms.Compose([
+            transforms.ToTensor(),
+            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
+        ])),
+        batch_size=test_batch_size, shuffle=True, num_workers=1, pin_memory=False
+    )
+
+    #if using_bn:
+        #model = NetBN()
+
+    # if (net=='VGG19') == True:
+    #     model = VGG_19().to(device)
+    #     model.load_state_dict(torch.load('ckpt/cifar-10_vgg19_bn.pt', map_location='cpu'))
+    #     save_file = "ckpt/cifar-10_vgg19_bn_qat.pt"
+    # elif (net=='LeNet') == True:
+    model = LeNet(n_exp=int(d4), mode = int(d3)).to(device)
+    #生成梯度分布图的时候是从0开始训练的
+
+    # fine tune qat
+    #model.load_state_dict(torch.load('ckpt/cifar-10_lenet_bn.pt', map_location='cuda'))
+    #     save_file = "ckpt/cifar-10_lenet_bn_qat.pt"
+
+    # else:
+    #     model = Net().to(device)
+    #     model.load_state_dict(torch.load('ckpt/cifar-10_vgg19.pt', map_location='cpu'))
+    #     save_file = "ckpt/cifar-10_vgg19_qat.pt"
+    model.to(device)
+
+    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
+    #考虑用Adam
+    # INT
+    # writer = SummaryWriter(log_dir='./scratchlog/quant_bit_' + str(d1) + '_log')
+
+    writer = SummaryWriter(log_dir='./project/p/scratchlog/mode' + str(d3) + '_' + str(d4) + '/quant_bit_' + str(d1) + '_log')
+
+
+
+    model.eval()  # 评价模式（不更新梯度，不dropout）
+    
+    full_inference(model, test_loader)
+
+    num_bits = int(d1)
+    # 先进行self中的各个量化层的定义
+    model.quantize(num_bits=num_bits)
+    print('Quantization bit: %d' % num_bits)
+
+    if load_quant_model_file is not None:
+        model.load_state_dict(torch.load(load_quant_model_file))
+        print("Successfully load quantized model %s" % load_quant_model_file)
+
+
+
+    # 进行量化训练
+    for epoch in range(1, epochs + 1):
+        model.train()  # 训练模式
+        grads_sum, losses = quantize_aware_training(model, device, train_loader, optimizer, epoch)
+        print('epoch:', epoch)
+        checkpoint = {
+            'model': model.state_dict(),
+            'optimizer': optimizer.state_dict(),
+            'grads':grads_sum,
+            'epoch': epoch,
+            'losses': losses
+        }
+
+        for name, param in grads_sum.items():
+            # 此处的grad是累加值吧 不是平均值
+            writer.add_histogram(tag=name + '_grad', values=param, global_step=epoch)
+        for name, param in model.named_parameters():
+            writer.add_histogram(tag=name + '_data', values=param.data, global_step=epoch)
+        # if (net == 'VGG19') == True:
+        #     torch.save(checkpoint,
+        #                'checkpoint/cifar-10_vgg_19_bn_quant/ckpt_cifar-10_vgg19_bn_quant_%s.pth' % (str(epoch)))
+        #
+        #
+        # elif (net == 'LeNet') == True:
+
+        # INT
+        # dir_name = 'checkpoint/cifar-10_lenet_bn_quant/scratch/' + str(d1)
+
+        dir_name = './project/p/checkpoint/cifar-10_lenet_bn_quant/scratch/mode' + str(d3) + '_' + str(d4) + '/' + str(d1)
+
+
+        if not os.path.isdir(dir_name):
+            os.makedirs(dir_name,mode=0o777)
+            os.chmod(dir_name,mode=0o777)
+        # INT
+        # torch.save(checkpoint,'checkpoint/cifar-10_lenet_bn_quant/scratch/' + str(d1) + '/ckpt_cifar-10_lenet_bn_quant_' + str(epoch) + '.pth')
+
+        torch.save(checkpoint,
+                    './project/p/checkpoint/cifar-10_lenet_bn_quant/scratch/mode' + str(d3) + '_' + str(d4) + '/' + str(d1)+ '/ckpt_cifar-10_lenet_bn_quant_' + str(
+                       epoch) + '.pth')
+
+
+
+
+
+        # quan_dict = torch.load('checkpoint/cifar-10_lenet_bn_quant/' + str(d1) + '/ckpt_cifar-10_lenet_bn_quant_%s.pth' % (str(epoch)))
+        # print(quan_dict['grads']['conv_layers.conv1.weight'].reshape(1,-1).shape)
+        #
+        #
+        # print('Saved all parameters!\n')
+
+
+    model.eval()
+    #torch.save(model.state_dict(), save_file)
+
+    model.freeze()
+
+
+
+    acc = quantize_inference(model, test_loader)
+    f = open('./project/p/lenet_qat_scratch_acc' + '.txt', 'a')
+    f.write('bit ' + str(d1) + ': ' + str(acc) + '\n')
+    f.close()
+
+
+
+
+
+    
+
+
+
+    
--- a/mzh/resnet.py
+++ b/mzh/resnet.py
+# -*- coding: utf-8 -*-
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from new_train import get_children
+from global_var import GlobalVariables
+from module import *
+
+
+class LeNet(nn.Module):
+    # CONV FLOPs: 考虑bias:（2 * C_in * K_h * K_w )* H_out * W_out * C_out
+    #             不考虑bias: （2 * C_in * K_h * K_w -1)* H_out * W_out * C_out
+    # FCN FLOPs:  考虑bias: （2 * I ）* O
+    #             不考虑bias: (2 * I - 1） * O
+    def __init__(self, img_size=32, input_channel=3, num_class=10, n_exp=4, mode=1):
+        super().__init__()
+        self.conv_layers = nn.ModuleDict({
+        # block1
+        'conv1': nn.Conv2d(3,6,5),  # (2*3*5*5) * 32*32*6  (bias占其中的32*32*6)  6144/921600
+        'reluc1': nn.ReLU(),
+        'pool1': nn.MaxPool2d(2,2),
+
+        # block2
+        'conv2': nn.Conv2d(6,16,5), # (2*6*5*5) * 16*16*16  (bias占其中的16*16*6) 1536/1228800
+        'reluc2': nn.ReLU(),
+        'pool2': nn.MaxPool2d(2,2),
+        })
+
+        self.fc_layers = nn.ModuleDict({
+            # classifier
+            'fc1': nn.Linear(16*5*5,120), # (2*16*5*5)*120 (bias占其中的120)  120/96000
+            'reluf1': nn.ReLU(),
+            'fc2': nn.Linear(120,84), # (2*120)*84  (bias占其中的84)  84/2016
+            'reluf2': nn.ReLU(),
+            'fc3': nn.Linear(84, num_class)
+        })
+        self.mode = mode
+        self.n_exp = n_exp
+
+    def forward(self,x):
+
+        for _,layer in self.conv_layers.items():
+            x = layer(x)
+
+        output = x.view(-1,16*5*5)
+        for _,layer in self.fc_layers.items():
+            output = layer(output)
+
+        out = F.softmax(output,dim = 1)         # 这里不softmax也行 影响不大
+        return out
+
+    def quantize(self, num_bits=8):
+
+        self.quantize_conv_layers=nn.ModuleDict({
+            # qi=true: 前一层输出的结果是没有量化过的，需要量化。 maxpool和relu都不会影响INT和minmax，所以在这俩之后的层的pi是false
+            #若前一层是conv，数据minmax被改变，则需要qi=true来量化
+            'qconv1': QConv2d(self.conv_layers['conv1'], qi=True, qo=True, num_bits=num_bits, n_exp=self.n_exp, mode=self.mode),
+            'qreluc1': QReLU(n_exp=self.n_exp, mode=self.mode),
+            'qpool1': QMaxPooling2d(kernel_size=2,stride=2,padding=0, n_exp=self.n_exp, mode=self.mode),
+            'qconv2': QConv2d(self.conv_layers['conv2'], qi=False, qo=True, num_bits=num_bits, n_exp=self.n_exp, mode=self.mode),
+            'qreluc2': QReLU(n_exp=self.n_exp, mode=self.mode),
+            'qpool2': QMaxPooling2d(kernel_size=2, stride=2, padding=0, n_exp=self.n_exp, mode=self.mode)
+        })
+
+        self.quantize_fc_layers = nn.ModuleDict({
+            'qfc1': QLinear(self.fc_layers['fc1'],qi=False,qo=True,num_bits=num_bits, n_exp=self.n_exp, mode=self.mode),
+            'qreluf1': QReLU(n_exp=self.n_exp, mode=self.mode),
+            'qfc2': QLinear(self.fc_layers['fc2'],qi=False,qo=True,num_bits=num_bits, n_exp=self.n_exp, mode=self.mode),
+            'qreluf2': QReLU(n_exp=self.n_exp, mode=self.mode),
+            'qfc3':  QLinear(self.fc_layers['fc3'],qi=False,qo=True,num_bits=num_bits, n_exp=self.n_exp, mode=self.mode)
+        })
+
+    def quantize_forward(self, x):
+
+        for _, layer in self.quantize_conv_layers.items():
+            x = layer(x)
+
+        output = x.view(-1,16*5*5)
+        for s, layer in self.quantize_fc_layers.items():
+            output = layer(output)
+
+        out = F.softmax(output, dim=1)  # 这里不softmax也行 影响不大 算loss用
+        return out
+
+
+    def freeze(self):
+
+        self.quantize_conv_layers['qconv1'].freeze()
+        self.quantize_conv_layers['qreluc1'].freeze(self.quantize_conv_layers['qconv1'].qo)
+        self.quantize_conv_layers['qpool1'].freeze(self.quantize_conv_layers['qconv1'].qo)
+
+        self.quantize_conv_layers['qconv2'].freeze(self.quantize_conv_layers['qconv1'].qo)
+        self.quantize_conv_layers['qreluc2'].freeze(self.quantize_conv_layers['qconv2'].qo)
+        self.quantize_conv_layers['qpool2'].freeze(self.quantize_conv_layers['qconv2'].qo)
+
+        self.quantize_fc_layers['qfc1'].freeze(qi=self.quantize_conv_layers['qconv2'].qo)
+        self.quantize_fc_layers['qreluf1'].freeze(self.quantize_fc_layers['qfc1'].qo)
+        self.quantize_fc_layers['qfc2'].freeze(qi=self.quantize_fc_layers['qfc1'].qo)
+        self.quantize_fc_layers['qreluf2'].freeze(self.quantize_fc_layers['qfc2'].qo)
+        self.quantize_fc_layers['qfc3'].freeze(qi=self.quantize_fc_layers['qfc2'].qo)
+
+    def fakefreeze(self):
+        self.quantize_conv_layers['qconv1'].fakefreeze()
+        self.quantize_conv_layers['qreluc1'].fakefreeze(self.quantize_conv_layers['qconv1'].qo)
+        self.quantize_conv_layers['qpool1'].fakefreeze(self.quantize_conv_layers['qconv1'].qo)
+
+        self.quantize_conv_layers['qconv2'].fakefreeze(self.quantize_conv_layers['qconv1'].qo)
+        self.quantize_conv_layers['qreluc2'].fakefreeze(self.quantize_conv_layers['qconv2'].qo)
+        self.quantize_conv_layers['qpool2'].fakefreeze(self.quantize_conv_layers['qconv2'].qo)
+
+        self.quantize_fc_layers['qfc1'].fakefreeze(qi=self.quantize_conv_layers['qconv2'].qo)
+        self.quantize_fc_layers['qreluf1'].fakefreeze(self.quantize_fc_layers['qfc1'].qo)
+        self.quantize_fc_layers['qfc2'].fakefreeze(qi=self.quantize_fc_layers['qfc1'].qo)
+        self.quantize_fc_layers['qreluf2'].fakefreeze(self.quantize_fc_layers['qfc2'].qo)
+        self.quantize_fc_layers['qfc3'].fakefreeze(qi=self.quantize_fc_layers['qfc2'].qo)
+
+    def quantize_inference(self, x):
+        x = self.quantize_conv_layers['qconv1'].qi.quantize_tensor(x,  self.mode)
+
+        for s, layer in self.quantize_conv_layers.items():
+            x = layer.quantize_inference(x)
+
+        output = x.view( -1,16*5*5)
+
+        for s, layer in self.quantize_fc_layers.items():
+            output = layer.quantize_inference(output)
+
+        # 只有mode1需要出现范围映射，将量化后的数据恢复到原数据相似的范围，PoT无需，其自带恢复性
+        if self.mode == 1:
+            output = self.quantize_fc_layers['qfc3'].qo.dequantize_tensor(output, self.mode)
+
+        out = F.softmax(output, dim=1)  # 这里应该用 Qsoftmax可能好些 之后改改
+        return out
+
+
+
+
+
+
+
+class NetBN(nn.Module):
+
+    def __init__(self, num_channels=1):
+        super(NetBN, self).__init__()
+        self.conv1 = nn.Conv2d(num_channels, 40, 3, 1)
+        self.bn1 = nn.BatchNorm2d(40)
+        self.conv2 = nn.Conv2d(40, 40, 3, 1)
+        self.bn2 = nn.BatchNorm2d(40)
+        self.fc = nn.Linear(5 * 5 * 40, 10)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = F.relu(x)
+        x = F.max_pool2d(x, 2, 2)
+        x = self.conv2(x)
+        x = self.bn2(x)
+        x = F.relu(x)
+        x = F.max_pool2d(x, 2, 2)
+        x = x.view(-1, 5 * 5 * 40)
+        x = self.fc(x)
+        return x
+
+    def quantize(self, num_bits=8):
+        self.qconv1 = QConvBNReLU(self.conv1, self.bn1, qi=True, qo=True, num_bits=num_bits)
+        self.qmaxpool2d_1 = QMaxPooling2d(kernel_size=2, stride=2, padding=0)
+        self.qconv2 = QConvBNReLU(self.conv2, self.bn2, qi=False, qo=True, num_bits=num_bits)
+        self.qmaxpool2d_2 = QMaxPooling2d(kernel_size=2, stride=2, padding=0)
+        self.qfc = QLinear(self.fc, qi=False, qo=True, num_bits=num_bits)
+
+    def quantize_forward(self, x):
+        x = self.qconv1(x)
+        x = self.qmaxpool2d_1(x)
+        x = self.qconv2(x)
+        x = self.qmaxpool2d_2(x)
+        x = x.view(-1, 5*5*40)
+        x = self.qfc(x)
+        return x
+
+    def freeze(self):
+        self.qconv1.freeze()
+        self.qmaxpool2d_1.freeze(self.qconv1.qo)
+        self.qconv2.freeze(qi=self.qconv1.qo)  # 因为maxpool不会改变min，max
+        self.qmaxpool2d_2.freeze(self.qconv2.qo)
+        self.qfc.freeze(qi=self.qconv2.qo)  # 因为maxpool不会改变min，max
+
+    def quantize_inference(self, x):
+        qx = self.qconv1.qi.quantize_tensor(x)
+        qx = self.qconv1.quantize_inference(qx)
+        qx = self.qmaxpool2d_1.quantize_inference(qx)
+        qx = self.qconv2.quantize_inference(qx)
+        qx = self.qmaxpool2d_2.quantize_inference(qx)
+        qx = qx.view(-1, 5*5*40)
+
+        qx = self.qfc.quantize_inference(qx)
+        
+        out = self.qfc.qo.dequantize_tensor(qx)   # INT -> FP
+        return out
+
+
+
+
+# 定义 ResNet 模型
+# 适用于Cifar10
+class ResNet(nn.Module):
+    def __init__(self, block, layers, num_classes=10, n_exp=4, mode=1): # 这里将类别数设置为10
+        
+        super(ResNet, self).__init__()
+
+        self.inplanes = 16 # 因为 CIFAR-10 图片较小，所以开始时需要更少的通道数
+        GlobalVariables.SELF_INPLANES = self.inplanes
+        print('resnet init:'+ str(GlobalVariables.SELF_INPLANES))
+        # 输入层
+        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1,
+                               bias=False)
+        self.bn1 = nn.BatchNorm2d(16)
+        self.relu = nn.ReLU()
+
+        # 残差层（4 个阶段，每个阶段包含 6n+2 个卷积层）
+        # self.layer1 = self._make_layer(block, 16, layers[0])
+        # self.layer2 = self._make_layer(block, 32, layers[1], stride=2)
+        # self.layer3 = self._make_layer(block, 64, layers[2], stride=2)
+        # self.layer4 = self._make_layer(block, 128, layers[3], stride=2)
+        self.layer1 = MakeLayer(block, 16, layers[0])
+        self.layer2 = MakeLayer(block, 32, layers[1], stride=2)
+        self.layer3 = MakeLayer(block, 64, layers[2], stride=2)
+        self.layer4 = MakeLayer(block, 128, layers[3], stride=2)
+
+        # 分类层
+        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+        self.fc = nn.Linear(128 * block.expansion, num_classes)
+
+        # self.layers_to_quantize = [self.conv1, self.bn1, self.relu, self.layer1, self.layer2, self.layer3, self.layer4, self.avgpool, self.fc]
+
+
+        # 参数初始化
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+
+    # 似乎对于resnet的self.inplanes在不断被改变，传递下去
+    # def _make_layer(self, block, planes, blocks, stride=1):
+    #     downsample = None
+    #     #  stride 是卷积层的步幅，而 self.inplanes 表示当前残差块输入的通道数，
+    #     # planes * block.expansion 则表示当前残差块输出的通道数。因此，当 stride 不等于 1 或者 self.inplanes 不等于 planes * block.expansion 时，就需要进行下采样操作
+
+    #     #该层中除了第一个残差块之外，其他所有残差块的输入通道数和输出通道数都相等，并且具有相同的步幅（都为 1 或者 2）。这些卷积层的输入张量大小不变, 输出张量高宽尺寸会随着残差块的堆叠而逐渐降低
+    #     if stride != 1 or self.inplanes != planes * block.expansion:
+    #         downsample = nn.Sequential(
+    #             nn.Conv2d(self.inplanes, planes * block.expansion,
+    #                       kernel_size=1, stride=stride, bias=False),
+    #             nn.BatchNorm2d(planes * block.expansion),
+    #         )
+
+    #     layers = []
+    #     layers.append(block(self.inplanes, planes, stride, downsample))
+    #     self.inplanes = planes * block.expansion
+    #     for _ in range(1, blocks):  # block的个数 
+    #         layers.append(block(self.inplanes, planes))
+
+    #     return nn.Sequential(*layers)
+    
+
+
+    def forward(self, x):
+        # 输入层
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        # 这里相比于imagenet的，少了一个maxpool，因为cifar10本身图片就小，如果再pool就太小了
+
+        # 残差层
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+
+        # 分类层
+        x = self.avgpool(x)  # 输出的尺寸为 B,C,1,1 
+        x = x.view(x.size(0), -1)
+        x = self.fc(x)
+
+        return x
+
+    def quantize(self, num_bits=8):
+      pass
+
+    def quantize_forward(self, x):
+        # for _, layer in self.quantize_layers.items():
+        #     x = layer(x)
+
+        # out = F.softmax(x, dim=1)
+        # return out
+        pass
+                  
+
+    def freeze(self):
+        pass
+
+    def fakefreeze(self):
+        pass
+
+    def quantize_inference(self, x):
+        pass
+
+
+# BasicBlock 类
+class BasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(BasicBlock, self).__init__()
+
+        # 第一个卷积层
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride,
+                               padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+
+        # 第二个卷积层
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1,
+                               padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+
+        # shortcut
+        self.relu = nn.ReLU()
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+
+        identity = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+
+        if self.downsample is not None:
+            identity = self.downsample(identity)
+
+        out += identity
+        out = self.relu(out)
+
+        return out
+    
+    def quantize(self, num_bits=8):
+        self.qconvbnrelu1 = QConvBNReLU(self.conv1,self.bn2,qi=False,qo=True,num_bits=num_bits)
+        self.qconvbn1 = QConvBN(self.conv2,self.bn2,qi=False,qo=True,num_bits=num_bits)
+
+        if self.downsample is not None:
+            self.qconvbn2 =  QConvBN(self.downsample[0],self.downsample[1],qi=False,qo=True,num_bits=num_bits)
+
+        self.qrelu1 = QReLU()
+        
+
+    def quantize_forward(self, x):
+        identity = x
+        out = self.qconvbnrelu1(x)
+        out = self.qconvbn1(out)
+
+        if self.downsample is not None:
+            identity = self.qconvbn2(identity)
+        
+        # residual add
+        out = identity + out    # 这里是需要写一个elementwiseadd的变换的，待后续修改
+        out = self.qrelu1(out)
+        return out
+
+    def freeze(self):
+        # 这里的qconvbnrelu1其实是可以用前一层的qo的，但感觉不太好传参，就没用 
+        # 还需仔细检查
+        self.qconvbnrelu1.freeze()
+        self.qconvbn1.freeze(qi = self.qconvbnrelu1.qo)
+
+        if self.downsample is not None:
+            self.qconvbn2.freeze(qi = self.qconvbn1)
+            self.qrelu1.freeze(self.qconvbn2)
+
+        else:
+            self.qrelu1.freeze(self.qconvbn1)
+
+    def quantize_inference(self, x):
+        # 感觉是不需要进行初始的quantize_tensor和dequantize_tensor，因为他不是最前/后一层，只要中间的每层都在量化后的领域内，就不需要这种处理。
+        identity = x
+        out = self.qconvbnrelu1.quantize_inference(x)
+        out = self.qconvbn1.quantize_inference(x)
+
+        if self.downsample is not None:
+            identity = self.qconvbn2.quantize_inference(identity)
+        
+        out = identity + out    # 这里是需要写一个elementwiseadd的变换的，待后续修改
+        out = self.qrelu1.quantize_inference(out)
+        return out
+
+
+    
+
+
+# Bottleneck 类
+class Bottleneck(nn.Module):
+    expansion = 4
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(Bottleneck, self).__init__()
+
+        # 1x1 卷积层
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+
+        # 3x3 卷积层
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
+                               padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+
+        # 1x1 卷积层
+        self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
+                               bias=False)
+        self.bn3 = nn.BatchNorm2d(planes * self.expansion)
+
+        # shortcut
+        self.relu = nn.ReLU()
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+
+        identity = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        if self.downsample is not None:
+            identity = self.downsample(x)
+
+        out += identity  # 相加是在这里处理的
+        out = self.relu(out)
+
+        return out
+
+
+class MakeLayer(nn.Module):
+
+    def __init__(self, block, planes, blocks, stride=1):
+        super(MakeLayer, self).__init__()
+        print('makelayer init:'+ str(GlobalVariables.SELF_INPLANES))
+        self.downsample = None
+        if stride != 1 or GlobalVariables.SELF_INPLANES != planes * block.expansion:
+            self.downsample = nn.Sequential(
+            nn.Conv2d(GlobalVariables.SELF_INPLANES, planes * block.expansion,kernel_size=1, stride=stride, bias=False), 
+            nn.BatchNorm2d(planes * block.expansion)
+            )
+        
+        self.blockdict = nn.ModuleDict()
+        self.blockdict['block1'] = block(GlobalVariables.SELF_INPLANES, planes, stride, self.downsample)
+        GlobalVariables.SELF_INPLANES = planes * block.expansion
+        for i in range(1, blocks):  # block的个数   这里只能用字典了
+            self.blockdict['block' + str(i+1)] = block(GlobalVariables.SELF_INPLANES, planes)  # 此处进行实例化了
+            
+    # def _make_layer(self, block, planes, blocks, stride=1):
+    #     downsample = None
+    #     #  stride 是卷积层的步幅，而 self.inplanes 表示当前残差块输入的通道数，
+    #     # planes * block.expansion 则表示当前残差块输出的通道数。因此，当 stride 不等于 1 或者 self.inplanes 不等于 planes * block.expansion 时，就需要进行下采样操作
+
+    #     #该层中除了第一个残差块之外，其他所有残差块的输入通道数和输出通道数都相等，并且具有相同的步幅（都为 1 或者 2）。这些卷积层的输入张量大小不变, 输出张量高宽尺寸会随着残差块的堆叠而逐渐降低
+    #     if stride != 1 or SELF_INPLANES != planes * block.expansion:
+    #         downsample = nn.Sequential(
+    #             nn.Conv2d(SELF_INPLANES, planes * block.expansion,
+    #                       kernel_size=1, stride=stride, bias=False),
+    #             nn.BatchNorm2d(planes * block.expansion),
+    #         )
+
+    #     layers = []
+    #     layers.append(block(SELF_INPLANES, planes, stride, downsample))
+    #     SELF_INPLANES = planes * block.expansion
+    #     for _ in range(1, blocks):  # block的个数 
+    #         layers.append(block(SELF_INPLANES, planes))
+
+    #     return nn.Sequential(*layers)
+    def forward(self,x):
+        
+        for _, layer in self.blockdict.items():
+            x = layer(x)
+        
+        return x
+
+    def quantize(self, num_bits=8):
+        # 需检查
+        for _, layer in self.blockdict.items():
+            layer.quantize()   # 这里是因为每一块都是block，而block中有具体的quantize策略
+        
+
+    def quantize_forward(self, x):
+        for _, layer in self.blockdict.items():
+            x = layer.quantize_forward(x)   # 各个block中有具体的quantize_forward
+
+        return x
+        
+       
+    def freeze(self):
+        # 这里的qconvbnrelu1其实是可以用前一层的qo的，但感觉不太好传参，就没用 
+        # 还需仔细检查
+        for _, layer in self.blockdict.items():
+            layer.freeze()  # 各个block中有具体的freeze
+
+
+    def quantize_inference(self, x):
+        # 感觉是不需要进行初始的quantize_tensor和dequantize_tensor，因为他不是最前/后一层，只要中间的每层都在量化后的领域内，就不需要这种处理。
+        for _, layer in self.blockdict.items():
+            x = layer.quantize_inference(x)  # 每个block中有具体的quantize_inference
+        
+        return x
+
+
+
+
+# 使用 ResNet18 模型
+def resnet18(**kwargs):
+    model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
+    return model
+
+
+# 使用 ResNet50 模型
+def resnet50(**kwargs):
+    model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
+    return model
+
+
+# 使用 ResNet152 模型
+def resnet152(**kwargs):
+    model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
+    return model
\ No newline at end of file
--- a/mzh/train.py
+++ b/mzh/train.py
+# -*- coding: utf-8 -*-
+from model import *
+from get_weight import *
+import torch
+import torch.nn as nn
+import torchvision
+import torch.optim as optim
+from torchvision import datasets, transforms
+import os
+import os.path as osp
+import sys
+
+import time
+
+# import matplotlib.pyplot as plt
+# import matplotlib
+from torchvision.datasets import ImageFolder
+
+from torch.utils.tensorboard import SummaryWriter
+
+
+from absl import app, flags
+# from easydict import EasyDict
+# from cleverhans.torch.attacks.fast_gradient_method import fast_gradient_method
+# from cleverhans.torch.attacks.projected_gradient_descent import (
+#     projected_gradient_descent,
+# )
+
+
+
+
+
+
+
+def train(model, device, train_loader, optimizer, epoch):
+    model.train()
+    lossLayer = torch.nn.CrossEntropyLoss()
+    flag = 0
+    cnt = 0
+    for batch_idx, (data, target) in enumerate(train_loader):
+        cnt = cnt + 1
+        data, target = data.to(device), target.to(device)
+        optimizer.zero_grad()
+        output = model(data)
+        loss = lossLayer(output, target)
+
+        loss.backward()
+
+        histo, grads = (get_model_histogram(model))
+        if flag == 0:
+            flag = 1
+            grads_sum = grads
+        else:
+            for k,v in grads_sum.items():
+                grads_sum[k] += grads[k]
+
+        optimizer.step()
+
+        if batch_idx % 50 == 0:
+            print('Train Epoch: {} [{}/{}]\tLoss: {:.6f}'.format(
+                epoch, batch_idx * len(data), len(train_loader.dataset), loss.item()
+            ))
+
+    for k, v in grads_sum.items():
+        grads_sum[k] = v / len(train_loader.dataset)
+
+    return grads_sum
+
+
+
+
+
+
+def test(model, device, test_loader):
+    model.eval()
+    test_loss = 0
+    correct = 0
+    acc=0
+    lossLayer = torch.nn.CrossEntropyLoss(reduction='sum')
+
+    # report = EasyDict(nb_test=0, correct=0, correct_fgm=0, correct_pgd=0)
+    with torch.no_grad:
+        for data, target in test_loader:
+            data, target = data.to(device), target.to(device)
+        
+            output = model(data)
+
+        # x_fgm = fast_gradient_method(model, data, 0.01, np.inf)
+        # x_pgd = projected_gradient_descent(model, data, 0.01, 0.01, 40, np.inf)
+
+        # model prediction on clean examples
+        # _, y_pred = model(data).max(1)
+
+        # # model prediction on FGM adversarial examples
+        # _, y_pred_fgm = model(x_fgm).max(1)
+        #
+        # # model prediction on PGD adversarial examples
+        # _, y_pred_pgd = model(x_pgd).max(1)
+
+        # report.nb_test += target.size(0)
+        # report.correct += y_pred.eq(target).sum().item()
+        # report.correct_fgm += y_pred_fgm.eq(target).sum().item()
+        # report.correct_pgd += y_pred_pgd.eq(target).sum().item()
+
+            test_loss += lossLayer(output, target).item()
+            pred = output.argmax(dim=1, keepdim=True)
+            correct += pred.eq(target.view_as(pred)).sum().item()
+
+    test_loss /= len(test_loader.dataset)
+    acc=100. * correct / len(test_loader.dataset)
+    print('\nTest set: Average loss: {:.4f}, Accuracy: {:.0f}%\n'.format(
+        test_loss, acc
+    ))
+
+    # print(
+    #     "test acc on clean examples (%): {:.3f}".format(
+    #         report.correct / report.nb_test * 100.0
+    #     )
+    # )
+    # print(
+    #     "test acc on FGM adversarial examples (%): {:.3f}".format(
+    #         report.correct_fgm / report.nb_test * 100.0
+    #     )
+    # )
+    # print(
+    #     "test acc on PGD adversarial examples (%): {:.3f}".format(
+    #         report.correct_pgd / report.nb_test * 100.0
+    #     )
+    # )
+
+    return acc
+
+
+    batch_size = 32
+    test_batch_size = 32
+    seed = 1
+    # epochs = 15
+    d1 = sys.argv[1]
+    epochs = int(d1)
+    lr = 0.001
+    momentum = 0.5
+    save_model = False
+    using_bn = True
+    net = 'LeNet'
+    torch.manual_seed(seed)
+
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    print(device)
+
+
+
+    train_loader = torch.utils.data.DataLoader(
+        datasets.CIFAR10('data', train=True, download=True,
+                       transform=transforms.Compose([
+                           transforms.ToTensor(),
+                           transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
+                       ])),
+        batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=True
+    )
+
+    test_loader = torch.utils.data.DataLoader(
+        datasets.CIFAR10('data', train=False, transform=transforms.Compose([
+            transforms.ToTensor(),
+            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
+        ])),
+        batch_size=test_batch_size, shuffle=True, num_workers=1, pin_memory=True
+    )
+
+
+    #if using_bn:
+    if (net == 'VGG19') == True:
+        model = VGG_19().to(device)
+    elif (net == 'LeNet') == True:
+        model = LeNet().to(device)
+
+    # else:
+    #     model = Net().to(device)
+
+
+    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
+    writer = SummaryWriter(log_dir='./fullprecision_log')
+    #optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.9,0.999),eps=1e-08,weight_decay=0,amsgrad=False)
+
+    for epoch in range(1, epochs + 1):
+        grads_sum = train(model, device, train_loader, optimizer, epoch)
+        acc = test(model, device, test_loader)
+        print('epoch:', epoch)
+        checkpoint = {
+            'model': model.state_dict(),
+            'optimizer': optimizer.state_dict(),
+            'epoch': epoch,
+            'grads': grads_sum,
+            'accuracy':acc
+        }
+        # for name, param in model.named_parameters():
+        #     writer.add_histogram(tag=name + '_grad', values=param.grad, global_step=epoch)
+        #     writer.add_histogram(tag=name + '_data', values=param.data, global_step=epoch)
+        for name, param in grads_sum.items():
+            # 此处的grad是累加值吧 不是平均值
+            writer.add_histogram(tag=name + '_grad', values=param, global_step=epoch)
+            # 取这个epoch最后一个batch算完之后的weight
+        for name, param in model.named_parameters():
+            writer.add_histogram(tag=name + '_data', values=param.data, global_step=epoch)
+
+        if (net == 'LeNet') == True:
+            torch.save(checkpoint, 'checkpoint/cifar-10_lenet_bn/full/ckpt_cifar-10_lenet_bn_%s.pth' % (str(epoch)))
+
+
+    #保存参数
+        # if (net == 'VGG19') == True:
+        #     torch.save(checkpoint, 'checkpoint/cifar-10_vgg19_bn/ckpt_cifar-10_vgg19_bn_%s.pth' % (str(epoch)))
+        # elif (net == 'LeNet') == True:
+        #     torch.save(checkpoint, 'checkpoint/cifar-10_lenet_bn/ckpt_cifar-10_lenet_bn_%s.pth' % (str(epoch)))
+
+        #print('Saved all parameters!\n')
+
+
+    if save_model:
+        if not osp.exists('ckpt'):
+            os.makedirs('ckpt')
+        #if using_bn:
+        if (net == 'VGG19') == True:
+            torch.save(model.state_dict(), 'ckpt/cifar-10_vgg19_bn.pt')
+        elif (net == 'LeNet') == True:
+            torch.save(model.state_dict(), 'ckpt/cifar-10_lenet_bn.pt')
+
+        # else:
+        #     torch.save(model.state_dict(), 'ckpt/cifar-10_vgg19.pt')