feat: new version combined. ResNet18 trial

62550290 · Zhihong Ma · 84050d7f · 62550290 · 62550290 · 62550290
Commit 62550290 authored Apr 10, 2023 by Zhihong Ma
14 changed files
--- a/mzh/new_mzh/extract_ratio.py
+++ b/mzh/new_mzh/extract_ratio.py
+import sys
+import os
+
+
+# 从get_param.py输出重定向文件val.txt中提取参数量和计算量
+def extract_ratio():
+    fr = open('param_flops.txt','r')
+    lines = fr.readlines()
+    layer = []
+    par_ratio = []
+    flop_ratio = []
+    for line in lines:
+        if '(' in line and ')' in line:
+            layer.append(line.split(')')[0].split('(')[1])
+            r1 = line.split('%')[0].split(',')[-1]
+            r1 = float(r1)
+            par_ratio.append(r1)
+            r2 = line.split('%')[-2].split(',')[-1]
+            r2 = float(r2)
+            flop_ratio.append(r2)
+
+    return layer, par_ratio, flop_ratio
+
+
+if __name__ == "__main__":
+    layer, par_ratio, flop_ratio = extract_ratio()
+    print(layer)
+    print(par_ratio)
+    print(flop_ratio)
\ No newline at end of file
--- a/mzh/new_mzh/function.py
+++ b/mzh/new_mzh/function.py
+from torch.autograd import Function
+
+
+class FakeQuantize(Function):
+
+    @staticmethod
+    def forward(ctx, x, qparam):
+        x = qparam.quantize_tensor(x)
+        x = qparam.dequantize_tensor(x)
+        return x
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        return grad_output, None
\ No newline at end of file
--- a/mzh/new_mzh/get_param_flops.py
+++ b/mzh/new_mzh/get_param_flops.py
+from model import *
+
+import torch
+from ptflops import get_model_complexity_info
+
+
+if __name__ == "__main__":
+    model = resnet18()
+    full_file = 'ckpt/cifar10_ResNet18.pt'
+    model.load_state_dict(torch.load(full_file))
+
+    flops, params = get_model_complexity_info(model, (3, 32, 32), as_strings=True, print_per_layer_stat=True)
+
--- a/mzh/new_mzh/get_weight.py
+++ b/mzh/new_mzh/get_weight.py
+import os
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+from collections import OrderedDict
+
+def get_model_histogram(model):
+    """
+    Description:
+        - get norm gradients from model, and store in a OrderDict
+
+    Args:
+        - model: (torch.nn.Module), torch model
+
+    Returns:
+        - grads in OrderDict
+    """
+
+    gradshisto = OrderedDict()
+    grads = OrderedDict()
+    for name, params in model.named_parameters():
+        grad = params.grad
+        if grad is not None:
+            tmp = {}
+            params_np = grad.cpu().numpy()
+            histogram, bins = np.histogram(params_np.flatten(),bins=20)
+            tmp['histogram'] = list(histogram)
+            tmp['bins'] = list(bins)
+            gradshisto[name] = tmp
+            grads[name] = params_np
+
+    return gradshisto,grads
+
+
+def get_model_norm_gradient(model):
+    """
+    Description:
+        - get norm gradients from model, and store in a OrderDict
+
+    Args:
+        - model: (torch.nn.Module), torch model
+
+    Returns:
+        - grads in OrderDict
+    """
+    grads = OrderedDict()
+    for name, params in model.named_parameters():
+        grad = params.grad
+        if grad is not None:
+            grads[name] = grad.norm().item()
+    return grads
+
+
+def get_grad_histogram(grads_sum):
+
+    gradshisto = OrderedDict()
+   # grads = OrderedDict()
+    for name, params in grads_sum.items():
+        grad = params
+        if grad is not None:
+            tmp = {}
+            #params_np = grad.cpu().numpy()
+            params_np = grad
+            histogram, bins = np.histogram(params_np.flatten(),bins=20)
+            tmp['histogram'] = list(histogram)
+            tmp['bins'] = list(bins)
+            gradshisto[name] = tmp   #每层一个histogram （tmp中的是描述直方图的信息）
+        #    grads[name] = params_np
+
+    return gradshisto
\ No newline at end of file
--- a/mzh/new_mzh/global_var.py
+++ b/mzh/new_mzh/global_var.py
+class GlobalVariables:
+     SELF_INPLANES = 0
\ No newline at end of file
--- a/mzh/new_mzh/gol.py
+++ b/mzh/new_mzh/gol.py
+# -*- coding: utf-8 -*-
+
+# 用于多个module之间共享全局变量
+def _init():  # 初始化
+    global _global_dict
+    _global_dict = {}
+ 
+def set_value(value,is_bias=False):
+    # 定义一个全局变量
+    if is_bias:
+        _global_dict[0] = value
+    else:
+        _global_dict[1] = value
+ 
+ 
+def get_value(is_bias=False): # 给bias独立于各变量外的精度
+    if is_bias:
+        return _global_dict[0]
+    else:
+        return _global_dict[1]  
+
--- a/mzh/new_mzh/model.py
+++ b/mzh/new_mzh/model.py
--- a/mzh/new_mzh/module.py
+++ b/mzh/new_mzh/module.py
--- a/mzh/new_mzh/new_train.py
+++ b/mzh/new_mzh/new_train.py
--- a/mzh/new_mzh/ptq.py
+++ b/mzh/new_mzh/ptq.py
--- a/mzh/new_mzh/readme.md
+++ b/mzh/new_mzh/readme.md
+2023.4.10
+注：new_mzh中的程序改用了与游昆霖同学统一的度量方式、以及一些量化细节约定，将代码重新建立在游昆霖同学版本的程序上。
+
+在量化BN层的过程中遇到了较多问题，感谢游昆霖同学的帮助:D
+
+
+
+程序改动：
+
+为量化ResNet18，在module.py中新增的量化层包括QConvBNReLu层，QConvBN层，QElementwiseAdd层，QAdaptiveAvgPool2d层。在model.py中建立了ResNet18的量化架构，通过class BasicBlock, class Bottleneck, class MakeLayer等保障了ResNet的扩展性，能够较为方便的扩展成ResNet50和152 
+
+待完善：
+
+ResNet的网络架构相比于AlexNet，VGG等更加的跳跃，各种MakeLayer, Residual的结构使得其不是一个平铺开来的网络，则过去的很多计算相似度等的算法不能直接适用在ResNet上(直接遍历网络参数时，会有包装在conv，bn等层外面的layer, sequential, block等)，关于参数相似度、梯度相似度的分析有待后续研究补充。
+QAT方面有待后续补充
+
+下面的实验是关于ResNet18的PTQ结果：(js_flops, js_param等还未修改计算方式，因而暂时未计算，赋值为0)
+
+```
+PTQ: INT_2
+direct quantization finish
+Test set: Quant Model Accuracy: 10.00%
+INT_2: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.883599
+
+PTQ: INT_3
+direct quantization finish
+Test set: Quant Model Accuracy: 10.00%
+INT_3: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.883599
+
+PTQ: INT_4
+direct quantization finish
+Test set: Quant Model Accuracy: 49.76%
+INT_4: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.420789
+
+PTQ: INT_5
+direct quantization finish
+Test set: Quant Model Accuracy: 80.86%
+INT_5: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.058782
+
+PTQ: INT_6
+direct quantization finish
+Test set: Quant Model Accuracy: 84.91%
+INT_6: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.011640
+
+PTQ: INT_7
+direct quantization finish
+Test set: Quant Model Accuracy: 85.60%
+INT_7: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.003608
+
+PTQ: INT_8
+direct quantization finish
+Test set: Quant Model Accuracy: 85.85%
+INT_8: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.000698
+
+PTQ: INT_9
+direct quantization finish
+Test set: Quant Model Accuracy: 85.64%
+INT_9: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.003143
+
+PTQ: INT_10
+direct quantization finish
+Test set: Quant Model Accuracy: 82.81%
+INT_10: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.036084
+
+PTQ: INT_11
+direct quantization finish
+Test set: Quant Model Accuracy: 74.91%
+INT_11: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.128041
+
+PTQ: INT_12
+direct quantization finish
+Test set: Quant Model Accuracy: 56.50%
+INT_12: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.342335
+
+PTQ: INT_13
+direct quantization finish
+Test set: Quant Model Accuracy: 26.25%
+INT_13: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.694448
+
+PTQ: INT_14
+direct quantization finish
+Test set: Quant Model Accuracy: 14.16%
+INT_14: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.835176
+
+PTQ: INT_15
+direct quantization finish
+Test set: Quant Model Accuracy: 11.29%
+INT_15: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.868583
+
+PTQ: INT_16
+direct quantization finish
+Test set: Quant Model Accuracy: 10.25%
+INT_16: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.880689
+
+PTQ: POT_2
+direct quantization finish
+Test set: Quant Model Accuracy: 10.00%
+POT_2: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.883599
+
+PTQ: POT_3
+direct quantization finish
+Test set: Quant Model Accuracy: 10.00%
+POT_3: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.883599
+
+PTQ: POT_4
+direct quantization finish
+Test set: Quant Model Accuracy: 44.75%
+POT_4: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.479106
+
+PTQ: POT_5
+direct quantization finish
+Test set: Quant Model Accuracy: 40.29%
+POT_5: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.531021
+
+PTQ: POT_6
+direct quantization finish
+Test set: Quant Model Accuracy: 50.13%
+POT_6: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.416482
+
+PTQ: POT_7
+direct quantization finish
+Test set: Quant Model Accuracy: 45.75%
+POT_7: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.467466
+
+PTQ: POT_8
+direct quantization finish
+Test set: Quant Model Accuracy: 39.79%
+POT_8: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.536841
+
+PTQ: FLOAT_3_E1
+direct quantization finish
+Test set: Quant Model Accuracy: 9.93%
+FLOAT_3_E1: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.884414
+
+PTQ: FLOAT_4_E1
+direct quantization finish
+Test set: Quant Model Accuracy: 39.63%
+FLOAT_4_E1: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.538703
+
+PTQ: FLOAT_4_E2
+direct quantization finish
+Test set: Quant Model Accuracy: 70.74%
+FLOAT_4_E2: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.176580
+
+PTQ: FLOAT_5_E1
+direct quantization finish
+Test set: Quant Model Accuracy: 65.04%
+FLOAT_5_E1: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.242929
+
+PTQ: FLOAT_5_E2
+direct quantization finish
+Test set: Quant Model Accuracy: 82.65%
+FLOAT_5_E2: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.037947
+
+PTQ: FLOAT_5_E3
+direct quantization finish
+Test set: Quant Model Accuracy: 80.86%
+FLOAT_5_E3: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.058782
+
+PTQ: FLOAT_6_E1
+direct quantization finish
+Test set: Quant Model Accuracy: 74.17%
+FLOAT_6_E1: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.136655
+
+PTQ: FLOAT_6_E2
+direct quantization finish
+Test set: Quant Model Accuracy: 84.28%
+FLOAT_6_E2: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.018973
+
+PTQ: FLOAT_6_E3
+direct quantization finish
+Test set: Quant Model Accuracy: 84.81%
+FLOAT_6_E3: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.012804
+
+PTQ: FLOAT_6_E4
+direct quantization finish
+Test set: Quant Model Accuracy: 78.06%
+FLOAT_6_E4: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.091375
+
+PTQ: FLOAT_7_E1
+direct quantization finish
+Test set: Quant Model Accuracy: 76.20%
+FLOAT_7_E1: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.113025
+
+PTQ: FLOAT_7_E2
+direct quantization finish
+Test set: Quant Model Accuracy: 84.83%
+FLOAT_7_E2: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.012571
+
+PTQ: FLOAT_7_E3
+direct quantization finish
+Test set: Quant Model Accuracy: 85.55%
+FLOAT_7_E3: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.004190
+
+PTQ: FLOAT_7_E4
+direct quantization finish
+Test set: Quant Model Accuracy: 82.00%
+FLOAT_7_E4: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.045513
+
+PTQ: FLOAT_7_E5
+direct quantization finish
+Test set: Quant Model Accuracy: 10.00%
+FLOAT_7_E5: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.883599
+
+PTQ: FLOAT_8_E1
+direct quantization finish
+Test set: Quant Model Accuracy: 77.39%
+FLOAT_8_E1: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.099174
+
+PTQ: FLOAT_8_E2
+direct quantization finish
+Test set: Quant Model Accuracy: 85.21%
+FLOAT_8_E2: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.008148
+
+PTQ: FLOAT_8_E3
+direct quantization finish
+Test set: Quant Model Accuracy: 86.00%
+FLOAT_8_E3: js_flops: 0.000000 js_param: 0.000000 acc_loss: -0.001048
+
+PTQ: FLOAT_8_E4
+direct quantization finish
+Test set: Quant Model Accuracy: 83.26%
+FLOAT_8_E4: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.030846
+
+PTQ: FLOAT_8_E5
+direct quantization finish
+Test set: Quant Model Accuracy: 10.02%
+FLOAT_8_E5: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.883366
+
+PTQ: FLOAT_8_E6
+direct quantization finish
+Test set: Quant Model Accuracy: 13.09%
+FLOAT_8_E6: js_flops: 0.000000 js_param: 0.000000 acc_loss: 0.847631
+```
+
+
+
+我在加上正确的QElementwiseAdd层前，PTQ后的acc都不超过15%，足以见到该层的重要性，他是负责残差的相加部分，因为两个层的输出结果是在不同量化范围，所以不能直接相加，而是需要做rescale。
+
+目前看到INT量化随位宽增加而先增大后下降，我查看了量化后的参数分布，其整体趋势与全精度模型是较为相似的，因此问题不在Conv，BN等普通的量化层上，我猜想可能是因为量化位宽较大的时候，QElementwiseAdd做rescale的过程中出现了溢出，还有待后续观察确认。
\ No newline at end of file
--- a/mzh/new_mzh/train_full.sh
+++ b/mzh/new_mzh/train_full.sh
+#!/bin/bash
+
+#- Job parameters
+
+# (TODO)
+# Please modify job name
+
+#SBATCH -J Resnet18_trial             # The job name
+#SBATCH -o ./info/ret-%j.out        # Write the standard output to file named 'ret-<job_number>.out'
+#SBATCH -e ./info/ret-%j.err        # Write the standard error to file named 'ret-<job_number>.err'
+
+
+#- Resources
+
+# (TODO)
+# Please modify your requirements
+
+#SBATCH -p nv-gpu                    # Submit to 'nv-gpu' Partitiion
+#SBATCH -t 0-12:00:00                # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
+#SBATCH --nodes=1                    # Request N nodes
+#SBATCH --gres=gpu:1                 # Request M GPU per node
+#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
+#SBATCH --qos=gpu-normal             # Request QOS Type
+
+
+###
+### The system will alloc 8 or 16 cores per gpu by default.
+### If you need more or less, use following:
+### #SBATCH --cpus-per-task=K            # Request K cores
+###
+### 
+### Without specifying the constraint, any available nodes that meet the requirement will be allocated
+### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
+###
+### #SBATCH --nodelist=gpu-v00           # Request a specific list of hosts 
+### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
+###
+
+#- Log information
+
+echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
+echo "Job run at:"
+echo "$(hostnamectl)"
+
+#- Load environments
+source /tools/module_env.sh
+source pyt1.5/bin/activate
+module list                       # list modules loaded
+
+##- Tools
+module load cluster-tools/v1.0
+module load slurm-tools/v1.0
+module load cmake/3.15.7
+module load git/2.17.1
+module load vim/8.1.2424
+
+##- language
+module load python3/3.6.8
+
+##- CUDA
+module load cuda-cudnn/11.1-8.1.1
+
+##- virtualenv
+# source xxxxx/activate
+
+echo $(module list)              # list modules loaded
+echo $(which gcc)
+echo $(which python)
+echo $(which python3)
+
+cluster-quota                    # nas quota
+
+nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
+
+#- Warning! Please not change your CUDA_VISIBLE_DEVICES
+#- in `.bashrc`, `env.sh`, or your job script
+echo "Use GPU ${CUDA_VISIBLE_DEVICES}"                              # which gpus
+#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
+
+#- Job step
+# [EDIT HERE(TODO)]
+sleep 2s
+hostname
+
+echo "python ./new_train.py -m ResNet18 -e 60 -b 128 -j 4 -lr 0.001 -wd 0.0001"
+python ./new_train.py -m ResNet18 -e 60 -b 128 -j 4 -lr 0.001 -wd 0.0001
+
+
+
+
+#- End
+echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
--- a/mzh/new_mzh/train_ptq.sh
+++ b/mzh/new_mzh/train_ptq.sh
+#!/bin/bash
+
+#- Job parameters
+
+# (TODO)
+# Please modify job name
+
+#SBATCH -J ResNet18_trial             # The job name
+#SBATCH -o ./info/ret-%j.out        # Write the standard output to file named 'ret-<job_number>.out'
+#SBATCH -e ./info/ret-%j.err        # Write the standard error to file named 'ret-<job_number>.err'
+
+
+#- Resources
+
+# (TODO)
+# Please modify your requirements
+
+#SBATCH -p nv-gpu                    # Submit to 'nv-gpu' Partitiion
+#SBATCH -t 0-12:00:00                # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
+#SBATCH --nodes=1                    # Request N nodes
+#SBATCH --gres=gpu:1                 # Request M GPU per node
+#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
+#SBATCH --qos=gpu-normal             # Request QOS Type
+
+
+###
+### The system will alloc 8 or 16 cores per gpu by default.
+### If you need more or less, use following:
+### #SBATCH --cpus-per-task=K            # Request K cores
+###
+### 
+### Without specifying the constraint, any available nodes that meet the requirement will be allocated
+### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
+###
+### #SBATCH --nodelist=gpu-v00           # Request a specific list of hosts 
+### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
+###
+
+#- Log information
+
+echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
+echo "Job run at:"
+echo "$(hostnamectl)"
+
+#- Load environments
+source /tools/module_env.sh
+source pyt1.5/bin/activate
+module list                       # list modules loaded
+
+##- Tools
+module load cluster-tools/v1.0
+module load slurm-tools/v1.0
+module load cmake/3.15.7
+module load git/2.17.1
+module load vim/8.1.2424
+
+##- language
+module load python3/3.6.8
+
+##- CUDA
+module load cuda-cudnn/11.1-8.1.1
+
+##- virtualenv
+# source xxxxx/activate
+
+echo $(module list)              # list modules loaded
+echo $(which gcc)
+echo $(which python)
+echo $(which python3)
+
+cluster-quota                    # nas quota
+
+nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
+
+#- Warning! Please not change your CUDA_VISIBLE_DEVICES
+#- in `.bashrc`, `env.sh`, or your job script
+echo "Use GPU ${CUDA_VISIBLE_DEVICES}"                              # which gpus
+#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
+
+#- Job step
+# [EDIT HERE(TODO)]
+sleep 2s
+hostname
+
+echo "python ./ptq.py -m ResNet18 -b 128 -j 4"
+python ./ptq.py -m ResNet18 -b 128 -j 4
+
+
+
+
+
+#- End
+echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
--- a/mzh/new_mzh/utils.py
+++ b/mzh/new_mzh/utils.py
+import torch
+import torch.nn as nn
+
+def ebit_list(quant_type, num_bits):
+    if quant_type == 'FLOAT':
+        e_bit_list = list(range(1,num_bits-1))
+    else:
+        e_bit_list = [0]
+    return e_bit_list
+
+
+def numbit_list(quant_type):
+    if quant_type == 'INT':
+        num_bit_list = list(range(2,17))
+    elif quant_type == 'POT':
+        num_bit_list = list(range(2,9))
+    else:
+        num_bit_list = list(range(2,9))
+        # num_bit_list = [8]
+    
+    return num_bit_list     
+
+def build_bias_list(quant_type):
+    if quant_type == 'POT':
+        return build_pot_list(8)
+    else:
+        return build_float_list(16,7)
+    
+def build_list(quant_type, num_bits, e_bits):
+    if quant_type == 'POT':
+        return build_pot_list(num_bits)
+    else:
+        return build_float_list(num_bits,e_bits)
+
+def build_pot_list(num_bits):
+    plist = [0.]
+    for i in range(-2 ** (num_bits-1) + 2, 1): 
+        # i最高到0，即pot量化最大值为1
+        plist.append(2. ** i)
+        plist.append(-2. ** i)
+    plist = torch.Tensor(list(set(plist)))
+    # plist = plist.mul(1.0 / torch.max(plist))
+    return plist
+
+def build_float_list(num_bits,e_bits):
+    m_bits = num_bits - 1 - e_bits
+    plist = [0.]
+    # 相邻尾数的差值
+    dist_m = 2 ** (-m_bits)
+    e = -2 ** (e_bits - 1) + 1
+    for m in range(1, 2 ** m_bits):
+        frac = m * dist_m   # 尾数部分
+        expo = 2 ** e       # 指数部分
+        flt = frac * expo
+        plist.append(flt)
+        plist.append(-flt)
+
+    for e in range(-2 ** (e_bits - 1) + 2, 2 ** (e_bits - 1) + 1):
+        expo = 2 ** e
+        for m in range(0, 2 ** m_bits):
+            frac = 1. + m * dist_m
+            flt = frac * expo
+            plist.append(flt)
+            plist.append(-flt)
+    plist = torch.Tensor(list(set(plist)))
+    return plist
+
+def fold_ratio(layer, par_ratio, flop_ratio):
+    idx = -1
+    for name in layer:
+        idx = idx + 1
+        if 'bn' in name:
+            par_ratio[idx-1] += par_ratio[idx]
+            flop_ratio[idx-1] += flop_ratio[idx]
+    return par_ratio,flop_ratio
+
+def fold_model(model):
+    idx = -1
+    module_list = []
+    for name, module in model.named_modules():
+        idx += 1
+        module_list.append(module)
+        if 'bn' in name:
+            module_list[idx-1] = fold_bn(module_list[idx-1],module)  # 在这里修改了
+    return model
+# def fold_model(model):
+#     last_conv = None
+#     last_bn = None
+
+#     for name, module in model.named_modules():
+#         if isinstance(module, nn.Conv2d):
+#             # 如果当前模块是卷积层，则将其 "fold" 到上一个 BN 层中
+#             if last_bn is not None:
+#                 last_conv = fold_bn(last_conv, last_bn)
+#                 last_bn = None
+#             last_conv = module
+
+#         elif isinstance(module, nn.BatchNorm2d):
+#             # 如果当前模块是 BN 层，则将其 "fold" 到上一个卷积层中
+#             last_bn = module
+#             if last_conv is not None:
+#                 last_conv = fold_bn(last_conv, last_bn)
+#                 last_bn = None
+
+#     # 处理最后一个 BN 层
+#     if last_bn is not None:
+#         last_conv = fold_bn(last_conv, last_bn)
+
+#     return model
+
+def fold_bn(conv, bn):
+    # 获取 BN 层的参数
+    gamma = bn.weight.data
+    beta = bn.bias.data
+    mean = bn.running_mean
+    var = bn.running_var
+    eps = bn.eps
+    std = torch.sqrt(var + eps)
+    feat = bn.num_features
+    
+    # 获取卷积层的参数
+    weight = conv.weight.data
+    bias = conv.bias.data
+
+    if bn.affine:
+        gamma_ = gamma / std
+        weight = weight * gamma_.view(feat, 1, 1, 1)
+        if bias is not None:
+            bias = gamma_ * bias - gamma_ * mean + beta
+        else:
+            bias = beta - gamma_ * mean
+    else:
+        gamma_ = 1 / std
+        weight = weight * gamma_
+        if bias is not None:
+            bias = gamma_ * bias - gamma_ * mean
+        else:
+            bias = -gamma_ * mean
+
+    # 设置新的 weight 和 bias
+    conv.weight.data = weight
+    conv.bias.data = bias
+
+    return conv
\ No newline at end of file