feat: new version combined. ResNet18 trial

62550290 · Zhihong Ma · 84050d7f · 62550290 · 62550290 · 62550290
Commit 62550290 authored Apr 10, 2023 by Zhihong Ma
14 changed files
--- a/mzh/new_mzh/extract_ratio.py
+++ b/mzh/new_mzh/extract_ratio.py
+import sys
+import os
+# 从get_param.py输出重定向文件val.txt中提取参数量和计算量
+def extract_ratio():
+    fr = open('param_flops.txt','r')
+    lines = fr.readlines()
+    layer = []
+    par_ratio = []
+    flop_ratio = []
+    for line in lines:
+        if '(' in line and ')' in line:
+            layer.append(line.split(')')[0].split('(')[1])
+            r1 = line.split('%')[0].split(',')[-1]
+            r1 = float(r1)
+            par_ratio.append(r1)
+            r2 = line.split('%')[-2].split(',')[-1]
+            r2 = float(r2)
+            flop_ratio.append(r2)
+    return layer, par_ratio, flop_ratio
+if __name__ == "__main__":
+    layer, par_ratio, flop_ratio = extract_ratio()
+    print(layer)
+    print(par_ratio)
+    print(flop_ratio)
\ No newline at end of file
--- a/mzh/new_mzh/function.py
+++ b/mzh/new_mzh/function.py
+from torch.autograd import Function
+class FakeQuantize(Function):
+    @staticmethod
+    def forward(ctx, x, qparam):
+        x = qparam.quantize_tensor(x)
+        x = qparam.dequantize_tensor(x)
+        return x
+    @staticmethod
+    def backward(ctx, grad_output):
+        return grad_output, None
\ No newline at end of file
--- a/mzh/new_mzh/get_param_flops.py
+++ b/mzh/new_mzh/get_param_flops.py
+from model import *
+import torch
+from ptflops import get_model_complexity_info
+if __name__ == "__main__":
+    model = resnet18()
+    full_file = 'ckpt/cifar10_ResNet18.pt'
+    model.load_state_dict(torch.load(full_file))
+    flops, params = get_model_complexity_info(model, (3, 32, 32), as_strings=True, print_per_layer_stat=True)
--- a/mzh/new_mzh/get_weight.py
+++ b/mzh/new_mzh/get_weight.py
+import os
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+from collections import OrderedDict
+def get_model_histogram(model):
+    """
+    Description:
+        - get norm gradients from model, and store in a OrderDict
+    Args:
+        - model: (torch.nn.Module), torch model
+    Returns:
+        - grads in OrderDict
+    """
+    gradshisto = OrderedDict()
+    grads = OrderedDict()
+    for name, params in model.named_parameters():
+        grad = params.grad
+        if grad is not None:
+            tmp = {}
+            params_np = grad.cpu().numpy()
+            histogram, bins = np.histogram(params_np.flatten(),bins=20)
+            tmp['histogram'] = list(histogram)
+            tmp['bins'] = list(bins)
+            gradshisto[name] = tmp
+            grads[name] = params_np
+    return gradshisto,grads
+def get_model_norm_gradient(model):
+    """
+    Description:
+        - get norm gradients from model, and store in a OrderDict
+    Args:
+        - model: (torch.nn.Module), torch model
+    Returns:
+        - grads in OrderDict
+    """
+    grads = OrderedDict()
+    for name, params in model.named_parameters():
+        grad = params.grad
+        if grad is not None:
+            grads[name] = grad.norm().item()
+    return grads
+def get_grad_histogram(grads_sum):
+    gradshisto = OrderedDict()
+   # grads = OrderedDict()
+    for name, params in grads_sum.items():
+        grad = params
+        if grad is not None:
+            tmp = {}
+            #params_np = grad.cpu().numpy()
+            params_np = grad
+            histogram, bins = np.histogram(params_np.flatten(),bins=20)
+            tmp['histogram'] = list(histogram)
+            tmp['bins'] = list(bins)
+            gradshisto[name] = tmp   #每层一个histogram （tmp中的是描述直方图的信息）
+        #    grads[name] = params_np
+    return gradshisto
\ No newline at end of file
--- a/mzh/new_mzh/global_var.py
+++ b/mzh/new_mzh/global_var.py
+class GlobalVariables:
+     SELF_INPLANES = 0
\ No newline at end of file
--- a/mzh/new_mzh/gol.py
+++ b/mzh/new_mzh/gol.py
+# -*- coding: utf-8 -*-
+# 用于多个module之间共享全局变量
+def _init():  # 初始化
+    global _global_dict
+    _global_dict = {}
+def set_value(value,is_bias=False):
+    # 定义一个全局变量
+    if is_bias:
+        _global_dict[0] = value
+    else:
+        _global_dict[1] = value
+def get_value(is_bias=False): # 给bias独立于各变量外的精度
+    if is_bias:
+        return _global_dict[0]
+    else:
+        return _global_dict[1]  
--- a/mzh/new_mzh/model.py
+++ b/mzh/new_mzh/model.py
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from module import *
+import module
+from global_var import GlobalVariables
+class AlexNet_BN(nn.Module):
+    def __init__(self, num_channels=3, num_classes=10):
+        super(AlexNet_BN, self).__init__()
+        # original size 32x32
+        self.conv1 = nn.Conv2d(num_channels, 32, kernel_size=3, padding=1, bias=True)
+        self.bn1   = nn.BatchNorm2d(32)
+        self.relu1 = nn.ReLU(inplace=True)
+        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)                  # output[48, 27, 27]
+        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1, bias=True)           # output[128, 27, 27]
+        self.bn2   = nn.BatchNorm2d(64)
+        self.relu2 = nn.ReLU(inplace=True)
+        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)                  # output[128, 13, 13]
+        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1, bias=True)          # output[192, 13, 13]
+        self.bn3   = nn.BatchNorm2d(128)
+        self.relu3 = nn.ReLU(inplace=True)
+        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, padding=1, bias=True)          # output[192, 13, 13]
+        self.bn4   = nn.BatchNorm2d(256)
+        self.relu4 = nn.ReLU(inplace=True)
+        self.conv5 = nn.Conv2d(256, 256, kernel_size=3, padding=1, bias=True)          # output[128, 13, 13]
+        self.bn5   = nn.BatchNorm2d(256)
+        self.relu5 = nn.ReLU(inplace=True)
+        self.pool5 = nn.MaxPool2d(kernel_size=3, stride=2)
+        self.drop1 = nn.Dropout(p=0.5)
+        self.fc1 = nn.Linear(256 * 3 * 3, 1024, bias=True)
+        self.relu6 = nn.ReLU(inplace=True)
+        self.drop2 = nn.Dropout(p=0.5)
+        self.fc2 = nn.Linear(1024, 512, bias=True)
+        self.relu7 = nn.ReLU(inplace=True)
+        self.fc3 = nn.Linear(512, num_classes, bias=True)
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu1(x)
+        x = self.pool1(x)
+        x = self.conv2(x)
+        x = self.bn2(x)
+        x = self.relu2(x)
+        x = self.pool2(x)
+        x = self.conv3(x)
+        x = self.bn3(x)
+        x = self.relu3(x)
+        x = self.conv4(x)
+        x = self.bn4(x)
+        x = self.relu4(x)
+        x = self.conv5(x)
+        x = self.bn5(x)
+        x = self.relu5(x)
+        x = self.pool5(x)
+        x = torch.flatten(x, start_dim=1)
+        x = self.drop1(x)
+        x = self.fc1(x)
+        x = self.relu6(x)
+        x = self.drop2(x)
+        x = self.fc2(x)
+        x = self.relu7(x)
+        x = self.fc3(x)
+        return x
+    def quantize(self, quant_type, num_bits=8, e_bits=3): 
+        # e_bits仅当使用FLOAT量化时用到
+        self.qconv1 = QConvBNReLU(quant_type, self.conv1, self.bn1, qi=True, qo=True, num_bits=num_bits, e_bits=e_bits)
+        self.qpool1 = QMaxPooling2d(quant_type, kernel_size=2, stride=2, padding=0, num_bits=num_bits, e_bits=e_bits)
+        self.qconv2 = QConvBNReLU(quant_type, self.conv2, self.bn2, qi=False, qo=True, num_bits=num_bits, e_bits=e_bits)
+        self.qpool2 = QMaxPooling2d(quant_type, kernel_size=2, stride=2, padding=0, num_bits=num_bits, e_bits=e_bits)
+        self.qconv3 = QConvBNReLU(quant_type, self.conv3, self.bn3, qi=False, qo=True, num_bits=num_bits, e_bits=e_bits)
+        self.qconv4 = QConvBNReLU(quant_type, self.conv4, self.bn4, qi=False, qo=True, num_bits=num_bits, e_bits=e_bits)
+        self.qconv5 = QConvBNReLU(quant_type, self.conv5, self.bn5, qi=False, qo=True, num_bits=num_bits, e_bits=e_bits)
+        self.qpool5 = QMaxPooling2d(quant_type, kernel_size=3, stride=2, padding=0, num_bits=num_bits, e_bits=e_bits)
+        self.qfc1 = QLinear(quant_type, self.fc1, qi=False, qo=True, num_bits=num_bits, e_bits=e_bits)
+        self.qrelu6 = QReLU(quant_type, num_bits=num_bits, e_bits=e_bits)
+        self.qfc2 = QLinear(quant_type, self.fc2, qi=False, qo=True, num_bits=num_bits, e_bits=e_bits)
+        self.qrelu7 = QReLU(quant_type, num_bits=num_bits, e_bits=e_bits)
+        self.qfc3 = QLinear(quant_type, self.fc3, qi=False, qo=True, num_bits=num_bits, e_bits=e_bits)
+    def quantize_forward(self, x):
+        x = self.qconv1(x)
+        x = self.qpool1(x)
+        x = self.qconv2(x)
+        x = self.qpool2(x)
+        x = self.qconv3(x)
+        x = self.qconv4(x)
+        x = self.qconv5(x)
+        x = self.qpool5(x)
+        x = torch.flatten(x, start_dim=1)
+        x = self.drop1(x)
+        x = self.qfc1(x)
+        x = self.qrelu6(x)
+        x = self.drop2(x)
+        x = self.qfc2(x)
+        x = self.qrelu7(x)
+        x = self.qfc3(x)
+        return x
+    def freeze(self):
+        self.qconv1.freeze()
+        self.qpool1.freeze(self.qconv1.qo)
+        self.qconv2.freeze(self.qconv1.qo)
+        self.qpool2.freeze(self.qconv2.qo)
+        self.qconv3.freeze(self.qconv2.qo)
+        self.qconv4.freeze(self.qconv3.qo)
+        self.qconv5.freeze(self.qconv4.qo)
+        self.qpool5.freeze(self.qconv5.qo)
+        self.qfc1.freeze(self.qconv5.qo)
+        self.qrelu6.freeze(self.qfc1.qo)
+        self.qfc2.freeze(self.qfc1.qo)
+        self.qrelu7.freeze(self.qfc2.qo)
+        self.qfc3.freeze(self.qfc2.qo)
+    def quantize_inference(self, x):
+        x = self.qconv1.qi.quantize_tensor(x)
+        x = self.qconv1.quantize_inference(x)
+        x = self.qpool1.quantize_inference(x)
+        x = self.qconv2.quantize_inference(x)
+        x = self.qpool2.quantize_inference(x)
+        x = self.qconv3.quantize_inference(x)
+        x = self.qconv4.quantize_inference(x)
+        x = self.qconv5.quantize_inference(x)
+        x = self.qpool5.quantize_inference(x)
+        x = torch.flatten(x, start_dim=1)
+        x = self.qfc1.quantize_inference(x)
+        x = self.qrelu6.quantize_inference(x)
+        x = self.qfc2.quantize_inference(x)
+        x = self.qrelu7.quantize_inference(x)
+        x = self.qfc3.quantize_inference(x)
+        x = self.qfc3.qo.dequantize_tensor(x)
+        return x
+# 定义 ResNet 模型
+# 适用于Cifar10
+class ResNet(nn.Module):
+    def __init__(self, block, layers, num_classes=10): # 这里将类别数设置为10
+        super(ResNet, self).__init__()
+        self.inplanes = 16 # 因为 CIFAR-10 图片较小，所以开始时需要更少的通道数
+        GlobalVariables.SELF_INPLANES = self.inplanes
+        # print('resnet init:'+ str(GlobalVariables.SELF_INPLANES))
+        # 输入层
+        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1,
+                               bias=True)
+        self.bn1 = nn.BatchNorm2d(16)
+        self.relu = nn.ReLU()
+        # 残差层（4 个阶段，每个阶段包含 6n+2 个卷积层）
+        self.layer1 = MakeLayer(block, 16, layers[0])
+        self.layer2 = MakeLayer(block, 32, layers[1], stride=2)
+        self.layer3 = MakeLayer(block, 64, layers[2], stride=2)
+        self.layer4 = MakeLayer(block, 128, layers[3], stride=2)
+        # 分类层
+        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+        self.fc = nn.Linear(128 * block.expansion, num_classes)
+        # 参数初始化
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+    def forward(self, x):
+        # 输入层
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        # 这里相比于imagenet的，少了一个maxpool，因为cifar10本身图片就小，如果再pool就太小了
+        # 残差层
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        # 分类层
+        x = self.avgpool(x)  # 输出的尺寸为 B,C,1,1 
+        x = x.view(x.size(0), -1)
+        x = self.fc(x)
+        out = F.softmax(x,dim = 1)         # 这里不softmax也行 影响不大
+        return out
+    def quantize(self, quant_type, num_bits=8, e_bits=3):
+        self.qconvbnrelu1 = QConvBNReLU(quant_type,self.conv1,self.bn1,qi=True,qo=True,num_bits=num_bits,e_bits=e_bits)
+        # 没有输入num_bits 需修改
+        self.layer1.quantize(quant_type=quant_type,num_bits=num_bits, e_bits=e_bits)      
+        self.layer2.quantize(quant_type=quant_type,num_bits=num_bits, e_bits=e_bits)
+        self.layer3.quantize(quant_type=quant_type,num_bits=num_bits, e_bits=e_bits)
+        self.layer4.quantize(quant_type=quant_type,num_bits=num_bits, e_bits=e_bits)
+        self.qavgpool1 = QAdaptiveAvgPool2d(quant_type,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
+        self.qfc1 = QLinear(quant_type, self.fc,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
+    def quantize_forward(self, x):
+        # for _, layer in self.quantize_layers.items():
+        #     x = layer(x)
+        # out = F.softmax(x, dim=1)
+        # return out
+        x = self.qconvbnrelu1(x)
+        x = self.layer1.quantize_forward(x)
+        x = self.layer2.quantize_forward(x)
+        x = self.layer3.quantize_forward(x)
+        x = self.layer4.quantize_forward(x)
+        x = self.qavgpool1(x)
+        x = x.view(x.size(0), -1)   
+        x = self.qfc1(x)
+        out = F.softmax(x,dim = 1)         # 这里不softmax也行 影响不大
+        return out
+    def freeze(self):
+        self.qconvbnrelu1.freeze()  # 因为作为第一层是有qi的，所以freeze的时候无需再重新提供qi
+        qo = self.layer1.freeze(qinput = self.qconvbnrelu1.qo)
+        qo = self.layer2.freeze(qinput = qo)
+        qo = self.layer3.freeze(qinput = qo)
+        qo = self.layer4.freeze(qinput = qo)
+        self.qavgpool1.freeze(qi=qo)
+        self.qfc1.freeze(qi=self.qavgpool1.qo)
+    def fakefreeze(self):
+        pass
+    def quantize_inference(self, x):
+        qx = self.qconvbnrelu1.qi.quantize_tensor(x)
+        qx = self.qconvbnrelu1.quantize_inference(qx)
+        qx = self.layer1.quantize_inference(qx)
+        qx = self.layer2.quantize_inference(qx)
+        qx = self.layer3.quantize_inference(qx)
+        qx = self.layer4.quantize_inference(qx)
+        qx = self.qavgpool1.quantize_inference(qx)
+        qx = qx.view(qx.size(0), -1)
+        qx = self.qfc1.quantize_inference(qx) 
+        qx = self.qfc1.qo.dequantize_tensor(qx)
+        out = F.softmax(qx,dim = 1)         # 这里不softmax也行 影响不大
+        return out
+# BasicBlock 类
+class BasicBlock(nn.Module):
+    expansion = 1
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(BasicBlock, self).__init__()
+        # 第一个卷积层
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride,
+                               padding=1, bias=True)
+        self.bn1 = nn.BatchNorm2d(planes)
+        # 第二个卷积层
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1,
+                               padding=1, bias=True)
+        self.bn2 = nn.BatchNorm2d(planes)
+        # shortcut
+        self.relu = nn.ReLU()
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        identity = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        if self.downsample is not None:
+            identity = self.downsample(identity)
+        out += identity
+        out = self.relu(out)
+        return out
+    def quantize(self, quant_type ,num_bits=8, e_bits=3):
+        self.qconvbnrelu1 = QConvBNReLU(quant_type,self.conv1,self.bn1,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
+        self.qconvbn1 = QConvBN(quant_type,self.conv2,self.bn2,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
+        if self.downsample is not None:
+            self.qconvbn2 =  QConvBN(quant_type,self.downsample[0],self.downsample[1],qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
+        self.qelementadd = QElementwiseAdd(quant_type,qi0=False, qi1=False, qo=True,num_bits=num_bits,e_bits=e_bits)
+        self.qrelu1 = QReLU(quant_type,qi= False,num_bits=num_bits,e_bits=e_bits)   # 需要qi
+    def quantize_forward(self, x):
+        identity = x
+        out = self.qconvbnrelu1(x)
+        out = self.qconvbn1(out)
+        if self.downsample is not None:
+            identity = self.qconvbn2(identity)
+        # residual add
+        # out = identity + out    # 这里是需要写一个elementwiseadd的变换的，待后续修改
+        out = self.qelementadd(out,identity)
+        out = self.qrelu1(out)
+        return out
+    def freeze(self, qinput):
+        # 这里的qconvbnrelu1其实是可以用前一层的qo的，但感觉不太好传参，就没用 
+        # 还需仔细检查
+        self.qconvbnrelu1.freeze(qi= qinput)   # 需要接前一个module的最后一个qo
+        self.qconvbn1.freeze(qi = self.qconvbnrelu1.qo)
+        if self.downsample is not None:
+            self.qconvbn2.freeze(qi = qinput) # 一条支路
+            self.qelementadd.freeze(qi0 = self.qconvbn1.qo, qi1 = self.qconvbn2.qo)
+        else:
+            self.qelementadd.freeze(qi0 = self.qconvbn1.qo, qi1 = qinput)
+        # 这里或许需要补充个层来处理elementwise add
+        self.qrelu1.freeze(qi = self.qelementadd.qo)  # 需要自己统计qi
+        return self.qrelu1.qi  # relu后的qo可用relu统计的qi 
+    def quantize_inference(self, x):
+        # 感觉是不需要进行初始的quantize_tensor和dequantize_tensor，因为他不是最前/后一层，只要中间的每层都在量化后的领域内，就不需要这种处理。
+        identity = x
+        out = self.qconvbnrelu1.quantize_inference(x)
+        out = self.qconvbn1.quantize_inference(out)
+        if self.downsample is not None:
+            identity = self.qconvbn2.quantize_inference(identity)
+        # out = identity + out    # 这里可能需要写一个elementwiseadd的变换的，待后续修改
+        out = self.qelementadd.quantize_inference(out,identity)
+        out = self.qrelu1.quantize_inference(out)
+        return out
+# Bottleneck 类
+class Bottleneck(nn.Module):
+    expansion = 4
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(Bottleneck, self).__init__()
+        # 1x1 卷积层
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=True)
+        self.bn1 = nn.BatchNorm2d(planes)
+        # 3x3 卷积层
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
+                               padding=1, bias=True)
+        self.bn2 = nn.BatchNorm2d(planes)
+        # 1x1 卷积层
+        self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
+                               bias=True)
+        self.bn3 = nn.BatchNorm2d(planes * self.expansion)
+        # shortcut
+        self.relu = nn.ReLU()
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        identity = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out += identity  # 相加是在这里处理的
+        out = self.relu(out)
+        return out
+class MakeLayer(nn.Module):
+    def __init__(self, block, planes, blocks, stride=1):
+        super(MakeLayer, self).__init__()
+        # print('makelayer init:'+ str(GlobalVariables.SELF_INPLANES))
+        self.downsample = None
+        if stride != 1 or GlobalVariables.SELF_INPLANES != planes * block.expansion:
+            self.downsample = nn.Sequential(
+            nn.Conv2d(GlobalVariables.SELF_INPLANES, planes * block.expansion,kernel_size=1, stride=stride, bias=True), 
+            nn.BatchNorm2d(planes * block.expansion)
+            )
+        self.blockdict = nn.ModuleDict()
+        self.blockdict['block1'] = block(inplanes=GlobalVariables.SELF_INPLANES, planes=planes, stride=stride, downsample=self.downsample)
+        GlobalVariables.SELF_INPLANES = planes * block.expansion
+        for i in range(1, blocks):  # block的个数   这里只能用字典了
+            self.blockdict['block' + str(i+1)] = block(inplanes=GlobalVariables.SELF_INPLANES, planes=planes)  # 此处进行实例化了
+    # def _make_layer(self, block, planes, blocks, stride=1):
+    #     downsample = None
+    #     #  stride 是卷积层的步幅，而 self.inplanes 表示当前残差块输入的通道数，
+    #     # planes * block.expansion 则表示当前残差块输出的通道数。因此，当 stride 不等于 1 或者 self.inplanes 不等于 planes * block.expansion 时，就需要进行下采样操作
+    #     #该层中除了第一个残差块之外，其他所有残差块的输入通道数和输出通道数都相等，并且具有相同的步幅（都为 1 或者 2）。这些卷积层的输入张量大小不变, 输出张量高宽尺寸会随着残差块的堆叠而逐渐降低
+    #     if stride != 1 or SELF_INPLANES != planes * block.expansion:
+    #         downsample = nn.Sequential(
+    #             nn.Conv2d(SELF_INPLANES, planes * block.expansion,
+    #                       kernel_size=1, stride=stride, bias=False),
+    #             nn.BatchNorm2d(planes * block.expansion),
+    #         )
+    #     layers = []
+    #     layers.append(block(SELF_INPLANES, planes, stride, downsample))
+    #     SELF_INPLANES = planes * block.expansion
+    #     for _ in range(1, blocks):  # block的个数 
+    #         layers.append(block(SELF_INPLANES, planes))
+    #     return nn.Sequential(*layers)
+    def forward(self,x):
+        for _, layer in self.blockdict.items():
+            x = layer(x)
+        return x
+    def quantize(self, quant_type, num_bits=8, e_bits=3):
+        # 需检查
+        for _, layer in self.blockdict.items():
+            layer.quantize(quant_type=quant_type,num_bits=num_bits,e_bits=e_bits)   # 这里是因为每一块都是block，而block中有具体的quantize策略, n_exp和mode已经在__init__中赋值了
+    def quantize_forward(self, x):
+        for _, layer in self.blockdict.items():
+            x = layer.quantize_forward(x)   # 各个block中有具体的quantize_forward
+        return x
+    def freeze(self, qinput):  # 需要在 Module Resnet的freeze里传出来
+        # 这里的qconvbnrelu1其实是可以用前一层的qo的，但感觉不太好传参，就没用 
+        # 还需仔细检查
+        cnt = 0
+        for _, layer in self.blockdict.items():
+            if cnt == 0:
+                qo = layer.freeze(qinput = qinput)
+                cnt = 1
+            else:
+                qo = layer.freeze(qinput = qo)  # 各个block中有具体的freeze
+        return qo   # 供后续的层用
+    def quantize_inference(self, x):
+        # 感觉是不需要进行初始的quantize_tensor和dequantize_tensor，因为他不是最前/后一层，只要中间的每层都在量化后的领域内，就不需要这种处理。
+        for _, layer in self.blockdict.items():
+            x = layer.quantize_inference(x)  # 每个block中有具体的quantize_inference
+        return x
+# 使用 ResNet18 模型
+def resnet18(**kwargs):
+    model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
+    return model
+# 使用 ResNet50 模型
+def resnet50(**kwargs):
+    model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
+    return model
+# 使用 ResNet152 模型
+def resnet152(**kwargs):
+    model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
+    return model
--- a/mzh/new_mzh/module.py
+++ b/mzh/new_mzh/module.py
+import math
+import numpy as np
+import gol
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.autograd import Variable
+from function import FakeQuantize
+# 获取最近的量化值
+def get_nearest_val(quant_type,x,is_bias=False):
+    if quant_type=='INT':
+        return x.round_()
+    plist = gol.get_value(is_bias)
+    # print('get')
+    # print(plist)
+    shape = x.shape
+    xhard = x.view(-1)
+    plist = plist.type_as(x)
+    # 取最近幂次作为索引
+    idx = (xhard.unsqueeze(0) - plist.unsqueeze(1)).abs().min(dim=0)[1]
+    xhard = plist[idx].view(shape)
+    xout = (xhard - x).detach() + x
+    return xout
+# 采用对称有符号量化时，获取量化范围最大值
+def get_qmax(quant_type,num_bits=None, e_bits=None):
+    if quant_type == 'INT':
+        qmax = 2. ** (num_bits - 1) - 1
+    elif quant_type == 'POT':
+        qmax = 1
+    else: #FLOAT
+        m_bits = num_bits - 1 - e_bits
+        dist_m = 2 ** (-m_bits)
+        e = 2 ** (e_bits - 1)
+        expo = 2 ** e
+        m = 2 ** m_bits -1
+        frac = 1. + m * dist_m
+        qmax = frac * expo
+    return qmax
+# 都采用有符号量化，zeropoint都置为0
+def calcScaleZeroPoint(min_val, max_val, qmax):
+    scale = torch.max(max_val.abs(),min_val.abs()) / qmax
+    zero_point = torch.tensor(0.)
+    return scale, zero_point
+# 将输入进行量化，输入输出都为tensor
+def quantize_tensor(quant_type, x, scale, zero_point, qmax, is_bias=False):
+    # 量化后范围，直接根据位宽确定
+    qmin = -qmax
+    q_x = zero_point + x / scale
+    q_x.clamp_(qmin, qmax)
+    q_x = get_nearest_val(quant_type, q_x, is_bias)
+    return q_x
+# bias使用不同精度，需要根据量化类型指定num_bits/e_bits
+def bias_qmax(quant_type):
+    if quant_type == 'INT':
+        return get_qmax(quant_type, 64)
+    elif quant_type == 'POT':
+        return get_qmax(quant_type)
+    else:
+        return get_qmax(quant_type, 16, 5)
+# 转化为FP32，不需再做限制
+def dequantize_tensor(q_x, scale, zero_point):
+    return scale * (q_x - zero_point)
+class QParam(nn.Module):
+    def __init__(self,quant_type, num_bits=8, e_bits=3):
+        super(QParam, self).__init__()
+        self.quant_type = quant_type
+        self.num_bits = num_bits
+        self.e_bits = e_bits
+        self.qmax = get_qmax(quant_type, num_bits, e_bits)
+        scale = torch.tensor([], requires_grad=False)
+        zero_point = torch.tensor([], requires_grad=False)
+        min = torch.tensor([], requires_grad=False)
+        max = torch.tensor([], requires_grad=False)
+        # 通过注册为register，使得buffer可以被记录到state_dict
+        self.register_buffer('scale', scale)
+        self.register_buffer('zero_point', zero_point)
+        self.register_buffer('min', min)
+        self.register_buffer('max', max)
+    # 更新统计范围及量化参数
+    def update(self, tensor):
+        if self.max.nelement() == 0 or self.max.data < tensor.max().data:
+            self.max.data = tensor.max().data
+        self.max.clamp_(min=0)
+        if self.min.nelement() == 0 or self.min.data > tensor.min().data:
+            self.min.data = tensor.min().data
+        self.min.clamp_(max=0)
+        self.scale, self.zero_point = calcScaleZeroPoint(self.min, self.max, self.qmax)
+    def quantize_tensor(self, tensor):
+        return quantize_tensor(self.quant_type, tensor, self.scale, self.zero_point, self.qmax)
+    def dequantize_tensor(self, q_x):
+        return dequantize_tensor(q_x, self.scale, self.zero_point)
+    # 该方法保证了可以从state_dict里恢复
+    def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys,
+                              error_msgs):
+        key_names = ['scale', 'zero_point', 'min', 'max']
+        for key in key_names:
+            value = getattr(self, key)
+            value.data = state_dict[prefix + key].data
+            state_dict.pop(prefix + key)
+    # 该方法返回值将是打印该对象的结果
+    def __str__(self):
+        info = 'scale: %.10f ' % self.scale
+        info += 'zp: %.6f ' % self.zero_point
+        info += 'min: %.6f ' % self.min
+        info += 'max: %.6f' % self.max
+        return info
+# 作为具体量化层的父类，qi和qo分别为量化输入/输出
+class QModule(nn.Module):
+    def __init__(self,quant_type, qi=True, qo=True, num_bits=8, e_bits=3):
+        super(QModule, self).__init__()
+        if qi:
+            self.qi = QParam(quant_type,num_bits, e_bits)
+        if qo:
+            self.qo = QParam(quant_type,num_bits, e_bits)
+        self.quant_type = quant_type
+        self.num_bits = num_bits
+        self.e_bits = e_bits
+        self.bias_qmax = bias_qmax(quant_type)
+    def freeze(self):
+        pass  # 空语句
+    def quantize_inference(self, x):
+        raise NotImplementedError('quantize_inference should be implemented.')
+"""
+QModule  量化卷积
+:quant_type: 量化类型
+:conv_module: 卷积模块
+:qi: 是否量化输入特征图
+:qo: 是否量化输出特征图
+:num_bits: 8位bit数
+"""
+class QConv2d(QModule):
+    def __init__(self, quant_type, conv_module, qi=True, qo=True, num_bits=8, e_bits=3):
+        super(QConv2d, self).__init__(quant_type, qi, qo, num_bits, e_bits)
+        self.conv_module = conv_module
+        self.qw = QParam(quant_type, num_bits,e_bits)
+        self.register_buffer('M', torch.tensor([], requires_grad=False))  # 将M注册为buffer
+    # freeze方法可以固定真量化的权重参数，并将该值更新到原全精度层上，便于散度计算
+    def freeze(self, qi=None, qo=None):
+        if hasattr(self, 'qi') and qi is not None:
+            raise ValueError('qi has been provided in init function.')
+        if not hasattr(self, 'qi') and qi is None:
+            raise ValueError('qi is not existed, should be provided.')
+        if hasattr(self, 'qo') and qo is not None:
+            raise ValueError('qo has been provided in init function.')
+        if not hasattr(self, 'qo') and qo is None:
+            raise ValueError('qo is not existed, should be provided.')
+        # 这里因为在池化或者激活的输入，不需要对最大值和最小是进行额外的统计，会共享相同的输出
+        if qi is not None:
+            self.qi = qi
+        if qo is not None:
+            self.qo = qo
+        # 根据https://zhuanlan.zhihu.com/p/156835141, 这是式3 的系数
+        self.M.data = (self.qw.scale * self.qi.scale / self.qo.scale).data
+        self.conv_module.weight.data = self.qw.quantize_tensor(self.conv_module.weight.data)
+        self.conv_module.weight.data = self.conv_module.weight.data - self.qw.zero_point
+        self.conv_module.bias.data = quantize_tensor(self.quant_type,
+                                                    self.conv_module.bias.data, scale=self.qi.scale * self.qw.scale,
+                                                    zero_point=0.,qmax=self.bias_qmax, is_bias=True)
+    def forward(self, x):  # 前向传播,输入张量,x为浮点型数据
+        if hasattr(self, 'qi'):
+            self.qi.update(x)
+            x = FakeQuantize.apply(x, self.qi)  # 对输入张量X完成量化
+        # foward前更新qw，保证量化weight时候scale正确
+        self.qw.update(self.conv_module.weight.data)
+        # 注意:此处主要为了统计各层x和weight范围，未对bias进行量化操作
+        tmp_wgt = FakeQuantize.apply(self.conv_module.weight, self.qw)
+        x = F.conv2d(x, tmp_wgt, self.conv_module.bias,
+                     stride=self.conv_module.stride,
+                     padding=self.conv_module.padding, dilation=self.conv_module.dilation,
+                     groups=self.conv_module.groups)
+        if hasattr(self, 'qo'):
+            self.qo.update(x)
+            x = FakeQuantize.apply(x, self.qo)
+        return x
+    # 利用公式 q_a = M(\sigma(q_w-Z_w)(q_x-Z_x) + q_b)
+    def quantize_inference(self, x):  # 此处input为已经量化的qx
+        x = x - self.qi.zero_point
+        x = self.conv_module(x)
+        x = self.M * x
+        x = get_nearest_val(self.quant_type,x)
+        x = x + self.qo.zero_point
+        return x
+class QLinear(QModule):
+    def __init__(self, quant_type, fc_module, qi=True, qo=True, num_bits=8, e_bits=3):
+        super(QLinear, self).__init__(quant_type, qi, qo, num_bits, e_bits)
+        self.fc_module = fc_module
+        self.qw = QParam(quant_type, num_bits, e_bits)
+        self.register_buffer('M', torch.tensor([], requires_grad=False))  # 将M注册为buffer
+    def freeze(self, qi=None, qo=None):
+        if hasattr(self, 'qi') and qi is not None:
+            raise ValueError('qi has been provided in init function.')
+        if not hasattr(self, 'qi') and qi is None:
+            raise ValueError('qi is not existed, should be provided.')
+        if hasattr(self, 'qo') and qo is not None:
+            raise ValueError('qo has been provided in init function.')
+        if not hasattr(self, 'qo') and qo is None:
+            raise ValueError('qo is not existed, should be provided.')
+        if qi is not None:
+            self.qi = qi
+        if qo is not None:
+            self.qo = qo
+        self.M.data = (self.qw.scale * self.qi.scale / self.qo.scale).data
+        self.fc_module.weight.data = self.qw.quantize_tensor(self.fc_module.weight.data)
+        self.fc_module.weight.data = self.fc_module.weight.data - self.qw.zero_point
+        self.fc_module.bias.data = quantize_tensor(self.quant_type,
+                                                    self.fc_module.bias.data, scale=self.qi.scale * self.qw.scale,
+                                                    zero_point=0., qmax=self.bias_qmax, is_bias=True)
+    def forward(self, x):
+        if hasattr(self, 'qi'):
+            self.qi.update(x)
+            x = FakeQuantize.apply(x, self.qi)
+        self.qw.update(self.fc_module.weight.data)
+        tmp_wgt = FakeQuantize.apply(self.fc_module.weight, self.qw)
+        x = F.linear(x, tmp_wgt, self.fc_module.bias)
+        if hasattr(self, 'qo'):
+            self.qo.update(x)
+            x = FakeQuantize.apply(x, self.qo)
+        return x
+    def quantize_inference(self, x):
+        x = x - self.qi.zero_point
+        x = self.fc_module(x)
+        x = self.M * x
+        x = get_nearest_val(self.quant_type,x)
+        x = x + self.qo.zero_point
+        return x
+class QReLU(QModule):
+    def __init__(self,quant_type, qi=False, qo=True, num_bits=8, e_bits=3):
+        super(QReLU, self).__init__(quant_type, qi, qo, num_bits, e_bits)
+    def freeze(self, qi=None):
+        if hasattr(self, 'qi') and qi is not None:
+            raise ValueError('qi has been provided in init function.')
+        if not hasattr(self, 'qi') and qi is None:
+            raise ValueError('qi is not existed, should be provided.')
+        if qi is not None:
+            self.qi = qi
+    def forward(self, x):
+        if hasattr(self, 'qi'):
+            self.qi.update(x)
+            x = FakeQuantize.apply(x, self.qi)
+        x = F.relu(x)
+        return x
+    def quantize_inference(self, x):
+        x = x.clone()
+        # x[x < self.qi.zero_point] = self.qi.zero_point
+        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        a = self.qi.zero_point.float().to(device)
+        x[x < a] = a
+        return x
+class QMaxPooling2d(QModule):
+    def __init__(self, quant_type, kernel_size=3, stride=1, padding=0, qi=False, qo=True, num_bits=8,e_bits=3):
+        super(QMaxPooling2d, self).__init__(quant_type, qi, qo, num_bits, e_bits)
+        self.kernel_size = kernel_size
+        self.stride = stride
+        self.padding = padding
+    def freeze(self, qi=None):
+        if hasattr(self, 'qi') and qi is not None:
+            raise ValueError('qi has been provided in init function.')
+        if not hasattr(self, 'qi') and qi is None:
+            raise ValueError('qi is not existed, should be provided.')
+        if qi is not None:
+            self.qi = qi
+    def forward(self, x):
+        if hasattr(self, 'qi'):
+            self.qi.update(x)
+            x = FakeQuantize.apply(x, self.qi)
+        x = F.max_pool2d(x, self.kernel_size, self.stride, self.padding)
+        return x
+    def quantize_inference(self, x):
+        return F.max_pool2d(x, self.kernel_size, self.stride, self.padding)
+class QConvBNReLU(QModule):
+    def __init__(self, quant_type, conv_module, bn_module, qi=True, qo=True, num_bits=8, e_bits=3):
+        super(QConvBNReLU, self).__init__(quant_type, qi, qo, num_bits, e_bits)
+        self.conv_module = conv_module
+        self.bn_module = bn_module
+        self.qw = QParam(quant_type, num_bits,e_bits)
+        self.register_buffer('M', torch.tensor([], requires_grad=False))  # 将M注册为buffer
+    def fold_bn(self, mean, std):
+        if self.bn_module.affine:
+            gamma_ = self.bn_module.weight / std
+            weight = self.conv_module.weight * gamma_.view(self.conv_module.out_channels, 1, 1, 1)
+            if self.conv_module.bias is not None:
+                bias = gamma_ * self.conv_module.bias - gamma_ * mean + self.bn_module.bias
+            else:
+                bias = self.bn_module.bias - gamma_ * mean
+        else:
+            gamma_ = 1 / std
+            weight = self.conv_module.weight * gamma_
+            if self.conv_module.bias is not None:
+                bias = gamma_ * self.conv_module.bias - gamma_ * mean
+            else:
+                bias = -gamma_ * mean
+        return weight, bias
+    def freeze(self, qi=None, qo=None):
+        if hasattr(self, 'qi') and qi is not None:
+            raise ValueError('qi has been provided in init function.')
+        if not hasattr(self, 'qi') and qi is None:
+            raise ValueError('qi is not existed, should be provided.')
+        if hasattr(self, 'qo') and qo is not None:
+            raise ValueError('qo has been provided in init function.')
+        if not hasattr(self, 'qo') and qo is None:
+            raise ValueError('qo is not existed, should be provided.')
+        if qi is not None:
+            self.qi = qi
+        if qo is not None:
+            self.qo = qo
+        self.M.data = (self.qw.scale * self.qi.scale / self.qo.scale).data
+        std = torch.sqrt(self.bn_module.running_var + self.bn_module.eps)
+        weight, bias = self.fold_bn(self.bn_module.running_mean, std)
+        self.conv_module.weight.data = self.qw.quantize_tensor(weight.data)
+        self.conv_module.weight.data = self.conv_module.weight.data - self.qw.zero_point
+        self.conv_module.bias.data = quantize_tensor(self.quant_type,
+                                                     bias, scale=self.qi.scale * self.qw.scale,
+                                                     zero_point=0., qmax=self.bias_qmax,is_bias=True)
+    def forward(self, x):
+        if hasattr(self, 'qi'):
+            self.qi.update(x)
+            x = FakeQuantize.apply(x, self.qi)
+        if self.training:
+            y = F.conv2d(x, self.conv_module.weight, self.conv_module.bias, 
+                            stride=self.conv_module.stride,
+                            padding=self.conv_module.padding,
+                            dilation=self.conv_module.dilation,
+                            groups=self.conv_module.groups)
+            y = y.permute(1, 0, 2, 3) # NCHW -> CNHW
+            y = y.contiguous().view(self.conv_module.out_channels, -1) # CNHW -> C,NHW
+            # mean = y.mean(1)
+            # var = y.var(1)
+            mean = y.mean(1).detach()
+            var = y.var(1).detach()
+            self.bn_module.running_mean = \
+                (1 - self.bn_module.momentum) * self.bn_module.running_mean + \
+                self.bn_module.momentum * mean
+            self.bn_module.running_var = \
+                (1 - self.bn_module.momentum) * self.bn_module.running_var + \
+                self.bn_module.momentum * var
+        else:
+            mean = Variable(self.bn_module.running_mean)
+            var = Variable(self.bn_module.running_var)
+        std = torch.sqrt(var + self.bn_module.eps)
+        weight, bias = self.fold_bn(mean, std)
+        self.qw.update(weight.data)
+        x = F.conv2d(x, FakeQuantize.apply(weight, self.qw), bias, 
+                stride=self.conv_module.stride,
+                padding=self.conv_module.padding, dilation=self.conv_module.dilation, 
+                groups=self.conv_module.groups)
+        x = F.relu(x)
+        if hasattr(self, 'qo'):
+            self.qo.update(x)
+            x = FakeQuantize.apply(x, self.qo)
+        return x
+    def quantize_inference(self, x):
+        x = x - self.qi.zero_point
+        x = self.conv_module(x)
+        x = self.M * x
+        x = get_nearest_val(self.quant_type,x) 
+        x = x + self.qo.zero_point        
+        x.clamp_(min=0)
+        return x
+class QConvBN(QModule):
+    def __init__(self, quant_type, conv_module, bn_module, qi=True, qo=True, num_bits=8, e_bits=3):
+        super(QConvBN, self).__init__(quant_type, qi, qo, num_bits, e_bits)
+        self.conv_module = conv_module
+        self.bn_module = bn_module
+        self.qw = QParam(quant_type, num_bits,e_bits)
+        self.register_buffer('M', torch.tensor([], requires_grad=False))  # 将M注册为buffer
+    def fold_bn(self, mean, std):
+        if self.bn_module.affine:
+            gamma_ = self.bn_module.weight / std
+            weight = self.conv_module.weight * gamma_.view(self.conv_module.out_channels, 1, 1, 1)
+            if self.conv_module.bias is not None:
+                bias = gamma_ * self.conv_module.bias - gamma_ * mean + self.bn_module.bias
+            else:
+                bias = self.bn_module.bias - gamma_ * mean
+        else:
+            gamma_ = 1 / std
+            weight = self.conv_module.weight * gamma_
+            if self.conv_module.bias is not None:
+                bias = gamma_ * self.conv_module.bias - gamma_ * mean
+            else:
+                bias = -gamma_ * mean
+        return weight, bias
+    def freeze(self, qi=None, qo=None):
+        if hasattr(self, 'qi') and qi is not None:
+            raise ValueError('qi has been provided in init function.')
+        if not hasattr(self, 'qi') and qi is None:
+            raise ValueError('qi is not existed, should be provided.')
+        if hasattr(self, 'qo') and qo is not None:
+            raise ValueError('qo has been provided in init function.')
+        if not hasattr(self, 'qo') and qo is None:
+            raise ValueError('qo is not existed, should be provided.')
+        if qi is not None:
+            self.qi = qi
+        if qo is not None:
+            self.qo = qo
+        self.M.data = (self.qw.scale * self.qi.scale / self.qo.scale).data
+        std = torch.sqrt(self.bn_module.running_var + self.bn_module.eps)
+        weight, bias = self.fold_bn(self.bn_module.running_mean, std)
+        self.conv_module.weight.data = self.qw.quantize_tensor(weight.data)
+        self.conv_module.weight.data = self.conv_module.weight.data - self.qw.zero_point
+        self.conv_module.bias.data = quantize_tensor(self.quant_type,
+                                                     bias, scale=self.qi.scale * self.qw.scale,
+                                                     zero_point=0., qmax=self.bias_qmax,is_bias=True)
+    def forward(self, x):
+        if hasattr(self, 'qi'):
+            self.qi.update(x)
+            x = FakeQuantize.apply(x, self.qi)
+        if self.training:
+            y = F.conv2d(x, self.conv_module.weight, self.conv_module.bias, 
+                            stride=self.conv_module.stride,
+                            padding=self.conv_module.padding,
+                            dilation=self.conv_module.dilation,
+                            groups=self.conv_module.groups)
+            y = y.permute(1, 0, 2, 3) # NCHW -> CNHW
+            y = y.contiguous().view(self.conv_module.out_channels, -1) # CNHW -> C,NHW
+            # mean = y.mean(1)
+            # var = y.var(1)
+            mean = y.mean(1).detach()
+            var = y.var(1).detach()
+            self.bn_module.running_mean = \
+                (1 - self.bn_module.momentum) * self.bn_module.running_mean + \
+                self.bn_module.momentum * mean
+            self.bn_module.running_var = \
+                (1 - self.bn_module.momentum) * self.bn_module.running_var + \
+                self.bn_module.momentum * var
+        else:
+            mean = Variable(self.bn_module.running_mean)
+            var = Variable(self.bn_module.running_var)
+        std = torch.sqrt(var + self.bn_module.eps)
+        weight, bias = self.fold_bn(mean, std)
+        self.qw.update(weight.data)
+        x = F.conv2d(x, FakeQuantize.apply(weight, self.qw), bias, 
+                stride=self.conv_module.stride,
+                padding=self.conv_module.padding, dilation=self.conv_module.dilation, 
+                groups=self.conv_module.groups)
+        # x = F.relu(x)
+        if hasattr(self, 'qo'):
+            self.qo.update(x)
+            x = FakeQuantize.apply(x, self.qo)
+        return x
+    def quantize_inference(self, x):
+        x = x - self.qi.zero_point
+        x = self.conv_module(x)
+        x = self.M * x
+        x = get_nearest_val(self.quant_type,x) 
+        x = x + self.qo.zero_point        
+        # x.clamp_(min=0)
+        return x
+# 待修改  需要有qo吧
+class QAdaptiveAvgPool2d(QModule):
+    def __init__(self, quant_type, qi=False, qo=True, num_bits=8, e_bits=3):
+        super(QAdaptiveAvgPool2d, self).__init__(quant_type,qi,qo,num_bits,e_bits)
+    def freeze(self, qi=None):
+        if hasattr(self, 'qi') and qi is not None:
+            raise ValueError('qi has been provided in init function.')
+        if not hasattr(self, 'qi') and qi is None:
+            raise ValueError('qi is not existed, should be provided.')
+        if qi is not None:
+            self.qi = qi
+    # def fakefreeze(self, qi=None):
+    #     if hasattr(self, 'qi') and qi is not None:
+    #         raise ValueError('qi has been provided in init function.')
+    #     if not hasattr(self, 'qi') and qi is None:
+    #         raise ValueError('qi is not existed, should be provided.')
+    #     if qi is not None:
+    #         self.qi = qi
+    def forward(self, x):
+        if hasattr(self, 'qi'):
+            self.qi.update(x)
+            x = FakeQuantize.apply(x, self.qi)   # 与ReLu一样，先更新qi的scale，再将x用PoT表示了 (不过一般前一层的qo都是True，则x已经被PoT表示了)
+        x = F.adaptive_avg_pool2d(x,(1, 1))   # 对输入输出都量化一下就算是量化了
+        if hasattr(self, 'qo'):
+            self.qo.update(x)
+            x = FakeQuantize.apply(x, self.qo)
+        return x
+    def quantize_inference(self, x):
+        x = F.adaptive_avg_pool2d(x,(1,1))
+        x = FakeQuantize.apply(x, self.qo)
+        return x
+class QModule_2(nn.Module):
+    def __init__(self,quant_type, qi0=True, qi1=True, qo=True, num_bits=8, e_bits=3):
+        super(QModule_2, self).__init__()
+        if qi0:
+            self.qi0 = QParam(quant_type,num_bits, e_bits)  # qi在此处就已经被num_bits和mode赋值了
+        if qi1:
+            self.qi1 = QParam(quant_type,num_bits, e_bits)  # qi在此处就已经被num_bits和mode赋值了
+        if qo:
+            self.qo = QParam(quant_type,32, e_bits)  # qo在此处就已经被num_bits和mode赋值了
+        self.quant_type = quant_type
+        self.num_bits = 32
+        self.e_bits = e_bits
+        self.bias_qmax = bias_qmax(quant_type)
+    def freeze(self):
+        pass
+    def fakefreeze(self):
+        pass
+    def quantize_inference(self, x):
+        raise NotImplementedError('quantize_inference should be implemented.')
+class QElementwiseAdd(QModule_2):
+    def __init__(self, quant_type, qi0=True, qi1=True,  qo=True, num_bits=8, e_bits=3):
+        super(QElementwiseAdd, self).__init__(quant_type, qi0, qi1, qo, num_bits, e_bits)
+        self.register_buffer('M0', torch.tensor([], requires_grad=False))  # 将M注册为buffer
+        self.register_buffer('M1', torch.tensor([], requires_grad=False))  # 将M注册为buffer
+    def freeze(self, qi0=None, qi1=None ,qo=None):
+        if hasattr(self, 'qi') and qi0 is not None:
+            raise ValueError('qi0 has been provided in init function.')
+        if not hasattr(self, 'qi') and qi0 is None:
+            raise ValueError('qi0 is not existed, should be provided.')
+        if hasattr(self, 'qi1') and qi0 is not None:
+            raise ValueError('qi1 has been provided in init function.')
+        if not hasattr(self, 'qi1') and qi0 is None:
+            raise ValueError('qi1 is not existed, should be provided.')
+        if hasattr(self, 'qo') and qo is not None:
+            raise ValueError('qo has been provided in init function.')
+        if not hasattr(self, 'qo') and qo is None:
+            raise ValueError('qo is not existed, should be provided.')
+        # 这里因为在池化或者激活的输入，不需要对最大值和最小是进行额外的统计，会共享相同的输出
+        if qi0 is not None:
+            self.qi0 = qi0
+        if qi1 is not None:
+            self.qi1 = qi1
+        if qo is not None:
+            self.qo = qo
+        # 根据https://zhuanlan.zhihu.com/p/156835141, 这是式3 的系数
+        self.M0.data = self.qi0.scale / self.qo.scale
+        self.M1.data = self.qi1.scale / self.qi0.scale
+    def forward(self, x0, x1):  # 前向传播,输入张量,x为浮点型数据
+        if hasattr(self, 'qi0'):
+            self.qi0.update(x0)
+            x0 = FakeQuantize.apply(x0, self.qi0)  # 对输入张量X完成量化
+        if hasattr(self, 'qi1'):
+            self.qi1.update(x1)
+            x1 = FakeQuantize.apply(x1, self.qi1)  # 对输入张量X完成量化
+        x = x0 + x1 
+        if hasattr(self, 'qo'):
+            self.qo.update(x)
+            x = FakeQuantize.apply(x, self.qo)
+        return x
+    def quantize_inference(self, x0, x1):  # 此处input为已经量化的qx
+        x0 = x0 - self.qi0.zero_point
+        x1 = x1 - self.qi1.zero_point
+        x = self.M0 * (x0 + x1*self.M1)
+        # x = get_nearest_val(self.quant_type,x)
+        x = x + self.qo.zero_point
+        return x
\ No newline at end of file
--- a/mzh/new_mzh/new_train.py
+++ b/mzh/new_mzh/new_train.py
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import DataLoader
+from get_weight import *
+from torch.utils.tensorboard import SummaryWriter
+from torchvision import datasets, transforms
+from torchvision.datasets import CIFAR10
+from torch.optim.lr_scheduler import CosineAnnealingLR
+from model import *
+from torchvision.transforms import transforms
+# import models
+import time
+import os
+import argparse
+# 定义模型
+def train(model, optimizer, criterion, train_loader, device):
+    model.train()
+    running_loss = 0.0
+    flag = 0
+    cnt = 0
+    for i, data in enumerate(train_loader):
+        inputs, labels = data
+        inputs, labels = inputs.to(device), labels.to(device)
+        optimizer.zero_grad()
+        outputs = model(inputs)
+        loss = criterion(outputs, labels)
+        loss.backward()
+        histo, grads = (get_model_histogram(model))
+        if flag == 0:
+            flag = 1
+            grads_sum = grads
+        else:
+            for k,v in grads_sum.items():
+                grads_sum[k] += grads[k]
+        optimizer.step()
+        running_loss += loss.item()
+    train_loss = running_loss / len(train_loader)
+    for k, v in grads_sum.items():
+        grads_sum[k] = v / len(train_loader)
+    return train_loss,grads_sum
+def evaluate(model, criterion, test_loader, device):
+    model.eval()
+    correct, total = 0, 0
+    with torch.no_grad():
+        for data in test_loader:
+            images, labels = data
+            images, labels = images.to(device), labels.to(device)
+            outputs = model(images)
+            _, predicted = torch.max(outputs.data, 1)
+            total += labels.size(0)
+            correct += (predicted == labels).sum().item()
+    accuracy = 100 * correct / total
+    return accuracy
+# def get_children(model: torch.nn.Module):
+#     # get children form model!
+#     # 为了后续也能够更新参数，需要用nn.ModuleList来承载
+#     children = nn.ModuleList(model.children())
+#     # print(children)
+#     # 方便对其中的module进行后续的更新 
+#     flatt_children = nn.ModuleList()  
+#     # children = list(model.children())
+#     # flatt_children = nn.ModuleList()  
+#     # flatt_children = []
+#     if len(children) == 0:
+#         # if model has no children; model is last child! :O
+#         return model
+#     else:
+#        # look for children from children... to the last child!
+#        for child in children:
+#             try:
+#                 flatt_children.extend(get_children(child))
+#             except TypeError:
+#                 flatt_children.append(get_children(child))
+#     # print(flatt_children)
+#     return flatt_children
+if __name__ == "__main__":
+    # torch.cuda.empty_cache()
+    parser = argparse.ArgumentParser(description='PyTorch FP32 Training')
+    parser.add_argument('-m', '--model', metavar='MODEL ARCH', default='resnet18')
+    parser.add_argument('-e','--epochs', default=100, type=int, metavar='EPOCHS', help='number of total epochs to run')
+    parser.add_argument('-b', '--batch_size', default=128, type=int, metavar='BATCH SIZE', help='mini-batch size (default: 128)')
+    parser.add_argument('-j','--workers', default=4, type=int, metavar='WORKERS',help='number of data loading workers (default: 4)')
+    parser.add_argument('-lr', '--learning-rate', default=0.001, type=float, metavar='LR', help='initial learning rate', dest='lr')
+    parser.add_argument('-wd','--weight_decay',default=0.0001,type=float,metavar='WD',help='lr schduler weight decay',dest='wd')
+    parser.add_argument('-t', '--test', dest='test', action='store_true', help='test model on test set')
+    # 训练参数
+    args = parser.parse_args()
+    num_epochs = args.epochs
+    print(num_epochs)
+    batch_size = args.batch_size
+    print(batch_size)
+    num_workers = args.workers
+    lr = args.lr
+    weight_decay = args.wd
+    best_acc = float("-inf")
+    start_time = time.time()
+    # 模型、损失函数和优化器
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # 加入设备选择
+    print(device)
+    if args.model == 'ResNet18' :
+        model = resnet18().to(device)
+    elif args.model == 'ResNet50' :
+        model = resnet50().to(device)
+    elif args.model == 'ResNet152' :
+        model = resnet152().to(device)
+    # elif args.model == 'LeNet' :
+    #     model = LeNet().to(device)
+    # elif args.model == 'NetBN' :
+    #     model = NetBN().to(device)
+    criterion = nn.CrossEntropyLoss()
+    optimizer = optim.Adam(model.parameters(), lr=lr)
+    # optimizer = optim.AdaBound(model.parameters(), lr=lr,
+    #                        weight_decay=weight_decay, final_lr=0.001*lr)
+    # print("ok!")
+# 数据并行
+    if torch.cuda.device_count() > 1:
+        print(f"Using {torch.cuda.device_count()} GPUs")
+        model = nn.DataParallel(model)
+    # 加载数据
+    train_loader = torch.utils.data.DataLoader(
+    datasets.CIFAR10('../../project/p/data', train=True, download=False,
+                     transform=transforms.Compose([
+                        transforms.RandomCrop(32, padding=2),
+                        transforms.RandomHorizontalFlip(),
+                         transforms.ToTensor(),
+                         transforms.Normalize(
+                             (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
+                     ])),
+     batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True
+    )
+    test_loader = torch.utils.data.DataLoader(
+    datasets.CIFAR10('../../project/p/data', train=False, download=False, transform=transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize((0.4914, 0.4822, 0.4465),
+                             (0.2023, 0.1994, 0.2010))
+    ])),
+    batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True
+    )
+# train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
+# test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
+    # 学习率调度器
+    # lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
+    lr_scheduler = CosineAnnealingLR(optimizer, T_max=num_epochs)
+    # TensorBoard
+    # WARN
+    # writer = SummaryWriter(log_dir='./project/p/models_log/trail/full_log')
+    writer = SummaryWriter(log_dir='log/' + args.model  +  '/full_log')
+    # Early Stopping 参数
+    patience = 30
+    count = 0
+    # WARN
+    # save_dir = './project/p/ckpt/trail'
+    save_dir = 'ckpt'
+    if not os.path.isdir(save_dir):
+        os.makedirs(save_dir, mode=0o777)
+        os.chmod(save_dir, mode=0o777)
+    # checkpoint_dir = './project/p/checkpoint/cifar-10_trail_model'
+    checkpoint_dir = 'checkpoint'
+    if not os.path.isdir(checkpoint_dir):
+        os.makedirs(checkpoint_dir, mode=0o777)
+        os.chmod(checkpoint_dir, mode=0o777)
+    # 训练循环
+    if args.test == True:
+       model.load_state_dict(torch.load(save_dir + '/cifar10_' +args.model + '.pt'))
+       acc = evaluate(model, criterion, test_loader, device=device)
+       print(f"test accuracy: {acc:.2f}%")
+       for name, module in model.named_modules():
+           print(f"{name}: {module}\n")
+       print('========================================================') 
+       print('========================================================')      
+       model.quantize()
+       for name , layer in model.quantize_layers.items():
+            print(f"Layer {name}: {layer} ")  # 足够遍历了
+    else:
+        for epoch in range(num_epochs):
+        # 训练模型并记录 loss
+            train_loss,grads_sum = train(model, optimizer, criterion,
+                       train_loader, device=device)
+            writer.add_scalar("Training Loss", train_loss, epoch + 1)
+            # 评估模型并记录 accuracy
+            if (epoch + 1) % 5 == 0:
+                acc = evaluate(model, criterion, test_loader, device=device)
+                writer.add_scalar("Validation Accuracy", acc, epoch + 1)
+                checkpoint = {
+                # 'model': model.state_dict(),
+                # 'optimizer': optimizer.state_dict(),
+                'epoch': epoch,
+                'grads': grads_sum,
+                'accuracy':acc
+                }
+                # for name, param in model.named_parameters():
+                #     writer.add_histogram(tag=name + '_grad', values=param.grad, global_step=epoch)
+                #     writer.add_histogram(tag=name + '_data', values=param.data, global_step=epoch)
+                for name, param in grads_sum.items():
+                    # 此处的grad是累加值吧 不是平均值
+                    writer.add_histogram(tag=name + '_grad', values=param, global_step=epoch)
+                    # 取这个epoch最后一个batch算完之后的weight
+                for name, param in model.named_parameters():
+                    writer.add_histogram(tag=name + '_data', values=param.data, global_step=epoch)
+                # WARN
+                # torch.save(checkpoint, checkpoint_dir + '/ckpt_cifar-10_trail_model%s.pt' % (str(epoch+1)))
+                torch.save(checkpoint, checkpoint_dir + '/cifar10_' + args.model + '_%s.pt' % (str(epoch+1)))
+            # 存储最好的模型
+                if acc > best_acc:
+                    best_acc = acc
+                    count = 0
+                    # WARN
+                    # torch.save(model.state_dict(), save_dir+'/model_trail.pt')
+                    torch.save(model.state_dict(), save_dir + '/cifar10_' +args.model + '.pt')
+                else:
+                    count += 1
+                print(
+                    f"Epoch {epoch + 1}/{num_epochs}, Train Loss: {train_loss:.5f}, Val Acc: {acc:.2f}%")
+            # 判断是否需要 early stopping
+            if count == patience:
+                print(f"No improvement after {patience} epochs. Early stop!")
+                break
+            # 更新学习率
+            lr_scheduler.step()
+        # 训练用时和最佳验证集准确率
+        print(f"Training took {(time.time() - start_time) / 60:.2f} minutes")
+        print(f"Best validation accuracy: {best_acc:.2f}%")
+        # 加载并测试最佳模型
+        # model.load_state_dict(torch.load("best_model.pth"))
+        # model.to(device)
+        # test_acc = evaluate(model, criterion, test_loader, device="cuda")
+        # print(f"Test Accuracy: {test_acc:.2f}%")
+        # 关闭 TensorBoard 写入器
+        writer.close()
--- a/mzh/new_mzh/ptq.py
+++ b/mzh/new_mzh/ptq.py
+# -*- coding: utf-8 -*-
+from torch.serialization import load
+from model import *
+from extract_ratio import *
+from utils import *
+import gol
+import openpyxl
+import sys
+import argparse
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+from torchvision import datasets, transforms
+from torchvision.transforms.functional import InterpolationMode
+import torch.utils.bottleneck as bn
+import os
+import os.path as osp
+from torch.utils.tensorboard import SummaryWriter
+def direct_quantize(model, test_loader,device):
+    for i, (data, target) in enumerate(test_loader, 1):
+        data = data.to(device)
+        output = model.quantize_forward(data).cpu()
+        if i % 500 == 0:
+            break
+    print('direct quantization finish')
+def full_inference(model, test_loader, device):
+    correct = 0
+    for i, (data, target) in enumerate(test_loader, 1):
+        data = data.to(device)
+        output = model(data).cpu()
+        pred = output.argmax(dim=1, keepdim=True)
+        # print(pred)
+        correct += pred.eq(target.view_as(pred)).sum().item()
+    print('\nTest set: Full Model Accuracy: {:.2f}%'.format(100. * correct / len(test_loader.dataset)))
+    return 100. * correct / len(test_loader.dataset)
+def quantize_inference(model, test_loader, device):
+    correct = 0
+    for i, (data, target) in enumerate(test_loader, 1):
+        data = data.to(device)
+        output = model.quantize_inference(data).cpu()
+        pred = output.argmax(dim=1, keepdim=True)
+        correct += pred.eq(target.view_as(pred)).sum().item()
+    print('Test set: Quant Model Accuracy: {:.2f}%'.format(100. * correct / len(test_loader.dataset)))
+    return 100. * correct / len(test_loader.dataset)
+def js_div(p_output, q_output, get_softmax=True):
+    """
+    Function that measures JS divergence between target and output logits:
+    """
+    KLDivLoss = nn.KLDivLoss(reduction='sum')
+    if get_softmax:
+        p_output = F.softmax(p_output)
+        q_output = F.softmax(q_output)
+    log_mean_output = ((p_output + q_output)/2).log()
+    return (KLDivLoss(log_mean_output, p_output) + KLDivLoss(log_mean_output, q_output))/2
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='PyTorch FP32 Training')
+    parser.add_argument('-m', '--model', metavar='MODEL ARCH', default='resnet18')
+    parser.add_argument('-b', '--batch_size', default=128, type=int, metavar='BATCH SIZE', help='mini-batch size (default: 128)')
+    parser.add_argument('-j','--workers', default=4, type=int, metavar='WORKERS',help='number of data loading workers (default: 4)')
+    # parser.add_argument('-t', '--test', dest='test', action='store_true', help='test model on test set')
+    # 训练参数
+    args = parser.parse_args()
+    batch_size = args.batch_size
+    num_workers = args.workers
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    print(device)
+    train_loader = torch.utils.data.DataLoader(
+    datasets.CIFAR10('../../project/p/data', train=True, download=False,
+                     transform=transforms.Compose([
+                        transforms.RandomCrop(32, padding=2),
+                        transforms.RandomHorizontalFlip(),
+                         transforms.ToTensor(),
+                         transforms.Normalize(
+                             (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
+                     ])),
+     batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True
+    )
+    test_loader = torch.utils.data.DataLoader(
+    datasets.CIFAR10('../../project/p/data', train=False, download=False, transform=transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize((0.4914, 0.4822, 0.4465),
+                             (0.2023, 0.1994, 0.2010))
+    ])),
+    batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True
+    )
+    # model = AlexNet_BN()
+    if args.model == 'ResNet18':
+        model = resnet18()
+    elif args.model == 'ResNet50':
+        model = resnet50()
+    elif args.model == 'ResNet152':
+        model = resnet152()
+    writer = SummaryWriter(log_dir='log/' + args.model  +  '/ptq')
+    full_file = 'ckpt/cifar10_' + args.model + '.pt'
+    model.load_state_dict(torch.load(full_file))
+    model.to(device)
+    load_ptq = False
+    ptq_file_prefix = 'ckpt/cifar10_' + args.model + '_ptq_'
+    model.eval()
+    full_acc = full_inference(model, test_loader, device)
+    model_fold = fold_model(model)
+    full_params = []
+    layer, par_ratio, flop_ratio = extract_ratio()
+    print(layer)
+    par_ratio, flop_ratio = fold_ratio(layer, par_ratio, flop_ratio)
+    for name, param in model_fold.named_parameters():
+        if 'bn' in name:
+            continue
+        param_norm = F.normalize(param.data.cpu(),p=2,dim=-1)
+        full_params.append(param_norm)
+        writer.add_histogram(tag='Full_' + name + '_data', values=param.data)
+    gol._init()
+    quant_type_list = ['INT','POT','FLOAT']
+    title_list = []
+    js_flops_list = []
+    js_param_list = []
+    ptq_acc_list = []
+    acc_loss_list = []
+    for quant_type in quant_type_list:
+        num_bit_list = numbit_list(quant_type)
+        # 对一个量化类别，只需设置一次bias量化表
+        # int由于位宽大，使用量化表开销过大，直接_round即可
+        if quant_type != 'INT':
+            bias_list = build_bias_list(quant_type)
+            gol.set_value(bias_list, is_bias=True)
+        for num_bits in num_bit_list:
+            e_bit_list = ebit_list(quant_type,num_bits)
+            for e_bits in e_bit_list:
+                model_ptq = resnet18()
+                if quant_type == 'FLOAT':
+                    title = '%s_%d_E%d' % (quant_type, num_bits, e_bits)
+                else:
+                    title = '%s_%d' % (quant_type, num_bits)
+                print('\nPTQ: '+title)
+                title_list.append(title)
+                # 设置量化表
+                if quant_type != 'INT':
+                    plist = build_list(quant_type, num_bits, e_bits)
+                    gol.set_value(plist)
+                # 判断是否需要载入
+                if load_ptq is True and osp.exists(ptq_file_prefix + title + '.pt'):
+                    model_ptq.quantize(quant_type,num_bits,e_bits)
+                    model_ptq.load_state_dict(torch.load(ptq_file_prefix + title + '.pt'))
+                    model_ptq.to(device)
+                    print('Successfully load ptq model: ' + title)
+                else:
+                    model_ptq.load_state_dict(torch.load(full_file))
+                    model_ptq.to(device)
+                    model_ptq.quantize(quant_type,num_bits,e_bits)
+                    model_ptq.eval()
+                    direct_quantize(model_ptq, train_loader, device)
+                    torch.save(model_ptq.state_dict(), ptq_file_prefix + title + '.pt')
+                model_ptq.freeze()
+                ptq_acc = quantize_inference(model_ptq, test_loader, device)
+                ptq_acc_list.append(ptq_acc)
+                acc_loss = (full_acc - ptq_acc) / full_acc
+                acc_loss_list.append(acc_loss)
+                idx = -1
+                # 获取计算量/参数量下的js-div
+                js_flops = 0.
+                js_param = 0.
+                for name, param in model_ptq.named_parameters():
+                    if '.' not in name or 'bn' in name:
+                        continue
+                    writer.add_histogram(tag=title +':'+ name + '_data', values=param.data)
+                #     idx = idx + 1
+                #     prefix = name.split('.')[0]
+                #     if prefix in layer:
+                #         layer_idx = layer.index(prefix)
+                #         ptq_param = param.data.cpu()
+                #         # 取L2范数
+                #         ptq_norm = F.normalize(ptq_param,p=2,dim=-1)
+                #         writer.add_histogram(tag=title +':'+ name + '_data', values=ptq_param)
+                #         js = js_div(ptq_norm,full_params[idx])
+                #         js = js.item()
+                #         if js < 0.:
+                #             js = 0.
+                #         js_flops = js_flops + js * flop_ratio[layer_idx]
+                #         js_param = js_param + js * flop_ratio[layer_idx]
+                # js_flops_list.append(js_flops)
+                # js_param_list.append(js_param)
+                print(title + ': js_flops: %f js_param: %f acc_loss: %f' % (js_flops, js_param, acc_loss))
+    # 写入xlsx
+    workbook = openpyxl.Workbook()
+    worksheet = workbook.active
+    worksheet.cell(row=1,column=1,value='FP32-acc')
+    worksheet.cell(row=1,column=2,value=full_acc)
+    worksheet.cell(row=3,column=1,value='title')
+    worksheet.cell(row=3,column=2,value='js_flops')
+    worksheet.cell(row=3,column=3,value='js_param')
+    worksheet.cell(row=3,column=4,value='ptq_acc')
+    worksheet.cell(row=3,column=5,value='acc_loss')
+    for i in range(len(title_list)):
+        worksheet.cell(row=i+4, column=1, value=title_list[i])
+        worksheet.cell(row=i+4, column=2, value=js_flops_list[i])
+        worksheet.cell(row=i+4, column=3, value=js_param_list[i])
+        worksheet.cell(row=i+4, column=4, value=ptq_acc_list[i])
+        worksheet.cell(row=i+4, column=5, value=acc_loss_list[i])
+    workbook.save('ptq_result.xlsx')
+    writer.close()
+    ft = open('ptq_result.txt','w')
+    print('title_list:',file=ft)
+    print(" ".join(title_list),file=ft)
+    print('js_flops_list:',file=ft)
+    print(" ".join(str(i) for i in js_flops_list), file=ft)
+    print('js_param_list:',file=ft)
+    print(" ".join(str(i) for i in js_param_list), file=ft)
+    print('ptq_acc_list:',file=ft)
+    print(" ".join(str(i) for i in ptq_acc_list), file=ft)
+    print('acc_loss_list:',file=ft)
+    print(" ".join(str(i) for i in acc_loss_list), file=ft)
+    ft.close()
--- a/mzh/new_mzh/readme.md
+++ b/mzh/new_mzh/readme.md
+2023.4.10
+注：new_mzh中的程序改用了与游昆霖同学统一的度量方式、以及一些量化细节约定，将代码重新建立在游昆霖同学版本的程序上。
+在量化BN层的过程中遇到了较多问题，感谢游昆霖同学的帮助:D
+程序改动：
+为量化ResNet18，在module.py中新增的量化层包括QConvBNReLu层，QConvBN层，QElementwiseAdd层，QAdaptiveAvgPool2d层。在model.py中建立了ResNet18的量化架构，通过class BasicBlock, class Bottleneck, class MakeLayer等保障了ResNet的扩展性，能够较为方便的扩展成ResNet50和152 
+待完善：
+ResNet的网络架构相比于AlexNet，VGG等更加的跳跃，各种MakeLayer, Residual的结构使得其不是一个平铺开来的网络，则过去的很多计算相似度等的算法不能直接适用在ResNet上(直接遍历网络参数时，会有包装在conv，bn等层外面的layer, sequential, block等)，关于参数相似度、梯度相似度的分析有待后续研究补充。
+QAT方面有待后续补充
+下面的实验是关于ResNet18的PTQ结果：(js_flops, js_param等还未修改计算方式，因而暂时未计算，赋值为0)
+```
+PTQ: INT_2
+direct quantization finish
+Test set: Quant Model Accuracy: 10.00%
+INT_2: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.883599
+PTQ: INT_3
+direct quantization finish
+Test set: Quant Model Accuracy: 10.00%
+INT_3: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.883599
+PTQ: INT_4
+direct quantization finish
+Test set: Quant Model Accuracy: 49.76%
+INT_4: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.420789
+PTQ: INT_5
+direct quantization finish
+Test set: Quant Model Accuracy: 80.86%
+INT_5: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.058782
+PTQ: INT_6
+direct quantization finish
+Test set: Quant Model Accuracy: 84.91%
+INT_6: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.011640
+PTQ: INT_7
+direct quantization finish
+Test set: Quant Model Accuracy: 85.60%
+INT_7: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.003608
+PTQ: INT_8
+direct quantization finish
+Test set: Quant Model Accuracy: 85.85%
+INT_8: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.000698
+PTQ: INT_9
+direct quantization finish
+Test set: Quant Model Accuracy: 85.64%
+INT_9: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.003143
+PTQ: INT_10
+direct quantization finish
+Test set: Quant Model Accuracy: 82.81%
+INT_10: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.036084
+PTQ: INT_11
+direct quantization finish
+Test set: Quant Model Accuracy: 74.91%
+INT_11: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.128041
+PTQ: INT_12
+direct quantization finish
+Test set: Quant Model Accuracy: 56.50%
+INT_12: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.342335
+PTQ: INT_13
+direct quantization finish
+Test set: Quant Model Accuracy: 26.25%
+INT_13: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.694448
+PTQ: INT_14
+direct quantization finish
+Test set: Quant Model Accuracy: 14.16%
+INT_14: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.835176
+PTQ: INT_15
+direct quantization finish
+Test set: Quant Model Accuracy: 11.29%
+INT_15: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.868583
+PTQ: INT_16
+direct quantization finish
+Test set: Quant Model Accuracy: 10.25%
+INT_16: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.880689
+PTQ: POT_2
+direct quantization finish
+Test set: Quant Model Accuracy: 10.00%
+POT_2: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.883599
+PTQ: POT_3
+direct quantization finish
+Test set: Quant Model Accuracy: 10.00%
+POT_3: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.883599
+PTQ: POT_4
+direct quantization finish
+Test set: Quant Model Accuracy: 44.75%
+POT_4: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.479106
+PTQ: POT_5
+direct quantization finish
+Test set: Quant Model Accuracy: 40.29%
+POT_5: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.531021
+PTQ: POT_6
+direct quantization finish
+Test set: Quant Model Accuracy: 50.13%
+POT_6: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.416482
+PTQ: POT_7
+direct quantization finish
+Test set: Quant Model Accuracy: 45.75%
+POT_7: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.467466
+PTQ: POT_8
+direct quantization finish
+Test set: Quant Model Accuracy: 39.79%
+POT_8: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.536841
+PTQ: FLOAT_3_E1
+direct quantization finish
+Test set: Quant Model Accuracy: 9.93%
+FLOAT_3_E1: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.884414
+PTQ: FLOAT_4_E1
+direct quantization finish
+Test set: Quant Model Accuracy: 39.63%
+FLOAT_4_E1: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.538703
+PTQ: FLOAT_4_E2
+direct quantization finish
+Test set: Quant Model Accuracy: 70.74%
+FLOAT_4_E2: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.176580
+PTQ: FLOAT_5_E1
+direct quantization finish
+Test set: Quant Model Accuracy: 65.04%
+FLOAT_5_E1: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.242929
+PTQ: FLOAT_5_E2
+direct quantization finish
+Test set: Quant Model Accuracy: 82.65%
+FLOAT_5_E2: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.037947
+PTQ: FLOAT_5_E3
+direct quantization finish
+Test set: Quant Model Accuracy: 80.86%
+FLOAT_5_E3: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.058782
+PTQ: FLOAT_6_E1
+direct quantization finish
+Test set: Quant Model Accuracy: 74.17%
+FLOAT_6_E1: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.136655
+PTQ: FLOAT_6_E2
+direct quantization finish
+Test set: Quant Model Accuracy: 84.28%
+FLOAT_6_E2: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.018973
+PTQ: FLOAT_6_E3
+direct quantization finish
+Test set: Quant Model Accuracy: 84.81%
+FLOAT_6_E3: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.012804
+PTQ: FLOAT_6_E4
+direct quantization finish
+Test set: Quant Model Accuracy: 78.06%
+FLOAT_6_E4: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.091375
+PTQ: FLOAT_7_E1
+direct quantization finish
+Test set: Quant Model Accuracy: 76.20%
+FLOAT_7_E1: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.113025
+PTQ: FLOAT_7_E2
+direct quantization finish
+Test set: Quant Model Accuracy: 84.83%
+FLOAT_7_E2: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.012571
+PTQ: FLOAT_7_E3
+direct quantization finish
+Test set: Quant Model Accuracy: 85.55%
+FLOAT_7_E3: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.004190
+PTQ: FLOAT_7_E4
+direct quantization finish
+Test set: Quant Model Accuracy: 82.00%
+FLOAT_7_E4: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.045513
+PTQ: FLOAT_7_E5
+direct quantization finish
+Test set: Quant Model Accuracy: 10.00%
+FLOAT_7_E5: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.883599
+PTQ: FLOAT_8_E1
+direct quantization finish
+Test set: Quant Model Accuracy: 77.39%
+FLOAT_8_E1: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.099174
+PTQ: FLOAT_8_E2
+direct quantization finish
+Test set: Quant Model Accuracy: 85.21%
+FLOAT_8_E2: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.008148
+PTQ: FLOAT_8_E3
+direct quantization finish
+Test set: Quant Model Accuracy: 86.00%
+FLOAT_8_E3: js_flops: 0.000000 js_param: 0.000000 acc_loss: -0.001048
+PTQ: FLOAT_8_E4
+direct quantization finish
+Test set: Quant Model Accuracy: 83.26%
+FLOAT_8_E4: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.030846
+PTQ: FLOAT_8_E5
+direct quantization finish
+Test set: Quant Model Accuracy: 10.02%
+FLOAT_8_E5: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.883366
+PTQ: FLOAT_8_E6
+direct quantization finish
+Test set: Quant Model Accuracy: 13.09%
+FLOAT_8_E6: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.847631
+```
+我在加上正确的QElementwiseAdd层前，PTQ后的acc都不超过15%，足以见到该层的重要性，他是负责残差的相加部分，因为两个层的输出结果是在不同量化范围，所以不能直接相加，而是需要做rescale。
+目前看到INT量化随位宽增加而先增大后下降，我查看了量化后的参数分布，其整体趋势与全精度模型是较为相似的，因此问题不在Conv，BN等普通的量化层上，我猜想可能是因为量化位宽较大的时候，QElementwiseAdd做rescale的过程中出现了溢出，还有待后续观察确认。
\ No newline at end of file
--- a/mzh/new_mzh/train_full.sh
+++ b/mzh/new_mzh/train_full.sh
+#!/bin/bash
+#- Job parameters
+# (TODO)
+# Please modify job name
+#SBATCH -J Resnet18_trial             # The job name
+#SBATCH -o ./info/ret-%j.out        # Write the standard output to file named 'ret-<job_number>.out'
+#SBATCH -e ./info/ret-%j.err        # Write the standard error to file named 'ret-<job_number>.err'
+#- Resources
+# (TODO)
+# Please modify your requirements
+#SBATCH -p nv-gpu                    # Submit to 'nv-gpu' Partitiion
+#SBATCH -t 0-12:00:00                # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
+#SBATCH --nodes=1                    # Request N nodes
+#SBATCH --gres=gpu:1                 # Request M GPU per node
+#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
+#SBATCH --qos=gpu-normal             # Request QOS Type
+###
+### The system will alloc 8 or 16 cores per gpu by default.
+### If you need more or less, use following:
+### #SBATCH --cpus-per-task=K            # Request K cores
+###
+### 
+### Without specifying the constraint, any available nodes that meet the requirement will be allocated
+### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
+###
+### #SBATCH --nodelist=gpu-v00           # Request a specific list of hosts 
+### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
+###
+#- Log information
+echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
+echo "Job run at:"
+echo "$(hostnamectl)"
+#- Load environments
+source /tools/module_env.sh
+source pyt1.5/bin/activate
+module list                       # list modules loaded
+##- Tools
+module load cluster-tools/v1.0
+module load slurm-tools/v1.0
+module load cmake/3.15.7
+module load git/2.17.1
+module load vim/8.1.2424
+##- language
+module load python3/3.6.8
+##- CUDA
+module load cuda-cudnn/11.1-8.1.1
+##- virtualenv
+# source xxxxx/activate
+echo $(module list)              # list modules loaded
+echo $(which gcc)
+echo $(which python)
+echo $(which python3)
+cluster-quota                    # nas quota
+nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
+#- Warning! Please not change your CUDA_VISIBLE_DEVICES
+#- in `.bashrc`, `env.sh`, or your job script
+echo "Use GPU ${CUDA_VISIBLE_DEVICES}"                              # which gpus
+#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
+#- Job step
+# [EDIT HERE(TODO)]
+sleep 2s
+hostname
+echo "python ./new_train.py -m ResNet18 -e 60 -b 128 -j 4 -lr 0.001 -wd 0.0001"
+python ./new_train.py -m ResNet18 -e 60 -b 128 -j 4 -lr 0.001 -wd 0.0001
+#- End
+echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
--- a/mzh/new_mzh/train_ptq.sh
+++ b/mzh/new_mzh/train_ptq.sh
+#!/bin/bash
+#- Job parameters
+# (TODO)
+# Please modify job name
+#SBATCH -J ResNet18_trial             # The job name
+#SBATCH -o ./info/ret-%j.out        # Write the standard output to file named 'ret-<job_number>.out'
+#SBATCH -e ./info/ret-%j.err        # Write the standard error to file named 'ret-<job_number>.err'
+#- Resources
+# (TODO)
+# Please modify your requirements
+#SBATCH -p nv-gpu                    # Submit to 'nv-gpu' Partitiion
+#SBATCH -t 0-12:00:00                # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
+#SBATCH --nodes=1                    # Request N nodes
+#SBATCH --gres=gpu:1                 # Request M GPU per node
+#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
+#SBATCH --qos=gpu-normal             # Request QOS Type
+###
+### The system will alloc 8 or 16 cores per gpu by default.
+### If you need more or less, use following:
+### #SBATCH --cpus-per-task=K            # Request K cores
+###
+### 
+### Without specifying the constraint, any available nodes that meet the requirement will be allocated
+### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
+###
+### #SBATCH --nodelist=gpu-v00           # Request a specific list of hosts 
+### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
+###
+#- Log information
+echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
+echo "Job run at:"
+echo "$(hostnamectl)"
+#- Load environments
+source /tools/module_env.sh
+source pyt1.5/bin/activate
+module list                       # list modules loaded
+##- Tools
+module load cluster-tools/v1.0
+module load slurm-tools/v1.0
+module load cmake/3.15.7
+module load git/2.17.1
+module load vim/8.1.2424
+##- language
+module load python3/3.6.8
+##- CUDA
+module load cuda-cudnn/11.1-8.1.1
+##- virtualenv
+# source xxxxx/activate
+echo $(module list)              # list modules loaded
+echo $(which gcc)
+echo $(which python)
+echo $(which python3)
+cluster-quota                    # nas quota
+nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
+#- Warning! Please not change your CUDA_VISIBLE_DEVICES
+#- in `.bashrc`, `env.sh`, or your job script
+echo "Use GPU ${CUDA_VISIBLE_DEVICES}"                              # which gpus
+#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
+#- Job step
+# [EDIT HERE(TODO)]
+sleep 2s
+hostname
+echo "python ./ptq.py -m ResNet18 -b 128 -j 4"
+python ./ptq.py -m ResNet18 -b 128 -j 4
+#- End
+echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
--- a/mzh/new_mzh/utils.py
+++ b/mzh/new_mzh/utils.py
+import torch
+import torch.nn as nn
+def ebit_list(quant_type, num_bits):
+    if quant_type == 'FLOAT':
+        e_bit_list = list(range(1,num_bits-1))
+    else:
+        e_bit_list = [0]
+    return e_bit_list
+def numbit_list(quant_type):
+    if quant_type == 'INT':
+        num_bit_list = list(range(2,17))
+    elif quant_type == 'POT':
+        num_bit_list = list(range(2,9))
+    else:
+        num_bit_list = list(range(2,9))
+        # num_bit_list = [8]
+    return num_bit_list     
+def build_bias_list(quant_type):
+    if quant_type == 'POT':
+        return build_pot_list(8)
+    else:
+        return build_float_list(16,7)
+def build_list(quant_type, num_bits, e_bits):
+    if quant_type == 'POT':
+        return build_pot_list(num_bits)
+    else:
+        return build_float_list(num_bits,e_bits)
+def build_pot_list(num_bits):
+    plist = [0.]
+    for i in range(-2 ** (num_bits-1) + 2, 1): 
+        # i最高到0，即pot量化最大值为1
+        plist.append(2. ** i)
+        plist.append(-2. ** i)
+    plist = torch.Tensor(list(set(plist)))
+    # plist = plist.mul(1.0 / torch.max(plist))
+    return plist
+def build_float_list(num_bits,e_bits):
+    m_bits = num_bits - 1 - e_bits
+    plist = [0.]
+    # 相邻尾数的差值
+    dist_m = 2 ** (-m_bits)
+    e = -2 ** (e_bits - 1) + 1
+    for m in range(1, 2 ** m_bits):
+        frac = m * dist_m   # 尾数部分
+        expo = 2 ** e       # 指数部分
+        flt = frac * expo
+        plist.append(flt)
+        plist.append(-flt)
+    for e in range(-2 ** (e_bits - 1) + 2, 2 ** (e_bits - 1) + 1):
+        expo = 2 ** e
+        for m in range(0, 2 ** m_bits):
+            frac = 1. + m * dist_m
+            flt = frac * expo
+            plist.append(flt)
+            plist.append(-flt)
+    plist = torch.Tensor(list(set(plist)))
+    return plist
+def fold_ratio(layer, par_ratio, flop_ratio):
+    idx = -1
+    for name in layer:
+        idx = idx + 1
+        if 'bn' in name:
+            par_ratio[idx-1] += par_ratio[idx]
+            flop_ratio[idx-1] += flop_ratio[idx]
+    return par_ratio,flop_ratio
+def fold_model(model):
+    idx = -1
+    module_list = []
+    for name, module in model.named_modules():
+        idx += 1
+        module_list.append(module)
+        if 'bn' in name:
+            module_list[idx-1] = fold_bn(module_list[idx-1],module)  # 在这里修改了
+    return model
+# def fold_model(model):
+#     last_conv = None
+#     last_bn = None
+#     for name, module in model.named_modules():
+#         if isinstance(module, nn.Conv2d):
+#             # 如果当前模块是卷积层，则将其 "fold" 到上一个 BN 层中
+#             if last_bn is not None:
+#                 last_conv = fold_bn(last_conv, last_bn)
+#                 last_bn = None
+#             last_conv = module
+#         elif isinstance(module, nn.BatchNorm2d):
+#             # 如果当前模块是 BN 层，则将其 "fold" 到上一个卷积层中
+#             last_bn = module
+#             if last_conv is not None:
+#                 last_conv = fold_bn(last_conv, last_bn)
+#                 last_bn = None
+#     # 处理最后一个 BN 层
+#     if last_bn is not None:
+#         last_conv = fold_bn(last_conv, last_bn)
+#     return model
+def fold_bn(conv, bn):
+    # 获取 BN 层的参数
+    gamma = bn.weight.data
+    beta = bn.bias.data
+    mean = bn.running_mean
+    var = bn.running_var
+    eps = bn.eps
+    std = torch.sqrt(var + eps)
+    feat = bn.num_features
+    # 获取卷积层的参数
+    weight = conv.weight.data
+    bias = conv.bias.data
+    if bn.affine:
+        gamma_ = gamma / std
+        weight = weight * gamma_.view(feat, 1, 1, 1)
+        if bias is not None:
+            bias = gamma_ * bias - gamma_ * mean + beta
+        else:
+            bias = beta - gamma_ * mean
+    else:
+        gamma_ = 1 / std
+        weight = weight * gamma_
+        if bias is not None:
+            bias = gamma_ * bias - gamma_ * mean
+        else:
+            bias = -gamma_ * mean
+    # 设置新的 weight 和 bias
+    conv.weight.data = weight
+    conv.bias.data = bias
+    return conv
\ No newline at end of file