Merge branch 'master' of http://62.234.201.16/hao/Model-Transfer-Adaptability

68add01f · Klin · a2ea6085 · 4c8bc7fe · 68add01f · 68add01f
Commit 68add01f authored May 29, 2023 by Klin
27 changed files
--- a/mzh/new_mzh/Loss_Trajectory_MIA/MIA.py
+++ b/mzh/new_mzh/Loss_Trajectory_MIA/MIA.py
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+import tqdm
+import utils
+import normal
+import dataset as DATA 
+from typing import TYPE_CHECKING, Callable, List, Optional, Tuple, Union
+from sklearn import metrics
+        
+class MLP_BLACKBOX(nn.Module):
+    def __init__(self, dim_in):
+        super(MLP_BLACKBOX, self).__init__()
+        self.dim_in = dim_in
+        self.fc1 = nn.Linear(self.dim_in, 512)
+        self.fc2 = nn.Linear(512, 128)
+        self.fc3 = nn.Linear(128, 32)
+        self.fc4 = nn.Linear(32, 2)
+
+    def forward(self, x):
+        x = x.view(-1, self.dim_in)
+        x = F.relu(self.fc1(x))
+        x = F.relu(self.fc2(x))
+        x = F.relu(self.fc3(x))
+        x = F.softmax(self.fc4(x), dim=1)
+        return x
+
+def train_mia_attack_model(args, epoch, model, attack_train_loader, optimizer, loss_fn, device):
+    model.train()
+    train_loss = 0
+    correct = 0
+
+    for batch_idx, (model_loss_ori, model_trajectory, orginal_labels, predicted_labels, predicted_status, member_status) in enumerate(attack_train_loader):
+
+        # 拼接trajectory 和 最终的loss 作为input 
+        input = torch.cat((model_trajectory, model_loss_ori.unsqueeze(1)),1) 
+        input = input.to(device)
+
+        output = model(input)
+        # member_status被当成了label? 但其明明不是这个含义?
+        member_status = member_status.to(device)
+        # cross entropy
+        loss = loss_fn(output, member_status)
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()   
+        train_loss += loss.item()
+        # 取得是idx，相当于label代表的类
+        pred = output.max(1, keepdim=True)[1]
+        # 用member_status来去和pred做eq : 因为是in/out 正好匹配 
+        correct += pred.eq(member_status.view_as(pred)).sum().item()
+
+    train_loss /= len(attack_train_loader.dataset)
+    accuracy = 100. * correct / len(attack_train_loader.dataset)
+    return train_loss, accuracy/100.
+
+def test_mia_attack_model(args, epoch, model, attack_test_loader, loss_fn, max_auc, max_acc, device):
+    model.eval()
+    test_loss = 0
+    correct = 0
+    auc_ground_truth = None
+    auc_pred = None
+    with torch.no_grad():
+        for batch_idx, (model_loss_ori, model_trajectory, orginal_labels, predicted_labels, predicted_status, member_status) in enumerate(attack_test_loader):
+
+            input = torch.cat((model_trajectory, model_loss_ori.unsqueeze(1)),1) 
+            input = input.to(device)
+            output = model(input)
+            member_status = member_status.to(device)
+            test_loss += loss_fn(output, member_status).item()
+            pred0, pred1 = output.max(1, keepdim=True)   # idx
+            correct += pred1.eq(member_status.view_as(pred1)).sum().item()
+            # output 概率向量
+            auc_pred_current = output[:, -1]
+            auc_ground_truth = member_status.cpu().numpy() if batch_idx == 0 else np.concatenate((auc_ground_truth, member_status.cpu().numpy()), axis=0)
+            auc_pred = auc_pred_current.cpu().numpy() if batch_idx == 0 else np.concatenate((auc_pred, auc_pred_current.cpu().numpy()), axis=0)
+
+    test_loss /= len(attack_test_loader.dataset)
+    accuracy = 100. * correct / len(attack_test_loader.dataset)
+
+    fpr, tpr, thresholds = metrics.roc_curve(auc_ground_truth, auc_pred, pos_label=1)
+    auc = metrics.auc(fpr, tpr)
+
+    if auc > max_auc:
+        max_auc = auc
+        save_data = {
+            'fpr': fpr,
+            'tpr': tpr
+        }
+        np.save(f'./outputs/{args.data}_{args.model}_{args.model_distill}_trajectory_auc', save_data)
+    if accuracy > max_acc:
+        max_acc = accuracy
+
+    return test_loss, accuracy/100., auc, max_auc, max_acc
+
+def check_and_transform_label_format(
+    labels: np.ndarray, nb_classes: Optional[int] = None, return_one_hot: bool = True
+) -> np.ndarray:
+    """
+    Check label format and transform to one-hot-encoded labels if necessary
+
+    :param labels: An array of integer labels of shape `(nb_samples,)`, `(nb_samples, 1)` or `(nb_samples, nb_classes)`.
+    :param nb_classes: The number of classes.
+    :param return_one_hot: True if returning one-hot encoded labels, False if returning index labels.
+    :return: Labels with shape `(nb_samples, nb_classes)` (one-hot) or `(nb_samples,)` (index).
+    """
+    if labels is not None:
+        if len(labels.shape) == 2 and labels.shape[1] > 1:
+            if not return_one_hot:
+                labels = np.argmax(labels, axis=1)
+        elif len(labels.shape) == 2 and labels.shape[1] == 1 and nb_classes is not None and nb_classes > 2:
+            labels = np.squeeze(labels)
+            if return_one_hot:
+                labels = to_categorical(labels, nb_classes)
+        elif len(labels.shape) == 2 and labels.shape[1] == 1 and nb_classes is not None and nb_classes == 2:
+            pass
+        elif len(labels.shape) == 1:
+            if return_one_hot:
+                if nb_classes == 2:
+                    labels = np.expand_dims(labels, axis=1)
+                else:
+                    labels = to_categorical(labels, nb_classes)
+        else:
+            raise ValueError(
+                "Shape of labels not recognised."
+                "Please provide labels in shape (nb_samples,) or (nb_samples, nb_classes)"
+            )
+
+    return labels
+
+def to_categorical(labels: Union[np.ndarray, List[float]], nb_classes: Optional[int] = None) -> np.ndarray:
+    """
+    Convert an array of labels to binary class matrix.
+
+    :param labels: An array of integer labels of shape `(nb_samples,)`.
+    :param nb_classes: The number of classes (possible labels).
+    :return: A binary matrix representation of `y` in the shape `(nb_samples, nb_classes)`.
+
+    exp:
+    labels = [0, 1, 2, 0, 2, 1]
+    =>
+    array([[1., 0., 0.],
+       [0., 1., 0.],
+       [0., 0., 1.],
+       [1., 0., 0.],
+       [0., 0., 1.],
+       [0., 1., 0.]], dtype=float32)
+
+    """
+    labels = np.array(labels, dtype=np.int32)
+    if nb_classes is None:
+        nb_classes = np.max(labels) + 1
+    categorical = np.zeros((labels.shape[0], nb_classes), dtype=np.float32)
+    categorical[np.arange(labels.shape[0]), np.squeeze(labels)] = 1
+
+    return categorical
+
+def build_trajectory_membership_dataset(args, ori_model_path, device='cpu'):
+
+    # if args.model == 'vgg':
+    #     model_name = '{}_vgg16bn'.format(args.data)
+    # elif args.model == 'mobilenet':
+    #     model_name = '{}_mobilenet'.format(args.data)
+    # elif args.model == 'resnet':
+    #     model_name = '{}_resnet56'.format(args.data)
+    # elif args.model == 'wideresnet':
+    #     model_name = '{}_wideresnet'.format(args.data)
+
+    # 用数据集 + 结构名作为model_name
+    if args.model == 'resnet18':
+        model_name = '{}_resnet18'.format(args.data)
+    elif args.model == 'resnet50':
+        model_name = '{}_resnet50'.format(args.data)
+    elif args.model == 'resnet152':
+        model_name = '{}_resnet152'.format(args.data)
+    elif args.model == 'mobilenetv2':
+        model_name = '{}_mobilenetv2'.format(args.data)
+
+
+    if args.mode == 'shadow':
+        cnn_model, cnn_params = normal.load_model(args, ori_model_path+'/shadow', model_name, epoch=args.epochs)
+    elif args.mode == 'target':
+        cnn_model, cnn_params = normal.load_model(args, ori_model_path+'/target', model_name, epoch=args.epochs)
+
+    MODEL = cnn_model.to(device)
+
+    # params['task']中记录用什么数据集
+    dataset = utils.get_dataset(cnn_params['task'], mode=args.mode, aug=True, batch_size=384)
+
+    if args.mode == 'target':
+        print('load target_dataset ... ')
+        train_loader = dataset.aug_target_train_loader
+        test_loader = dataset.aug_target_test_loader
+
+    elif args.mode == 'shadow':
+        print('load shadow_dataset ... ')
+        train_loader = dataset.aug_shadow_train_loader
+        test_loader = dataset.aug_shadow_test_loader
+
+    model_top1 = None
+    model_loss = None
+    orginal_labels = None
+    predicted_labels = None
+    predicted_status = None
+    member_status = None
+
+    def normalization(data):
+        _range = np.max(data) - np.min(data)
+        return (data - np.min(data)) / _range
+
+    MODEL.eval()
+        
+    for loader_idx, data_loader in enumerate([train_loader, test_loader]):
+        top1 = DATA.AverageMeter()
+        #ori_idx 是一个形状为 [batch_size] 的张量，表示一个批次中每个样本在原始数据集中的索引（即该样本在整个数据集中的编号）
+        for data_idx, (data, target, ori_idx) in enumerate(data_loader):
+            # distill model的output与label得到的loss trajectory  按一列列的形式组织为一个tensor
+            batch_trajectory = get_trajectory(data, target, args, ori_model_path, device)
+            data, target = data.to(device), target.to(device)
+            batch_logit_target = MODEL(data)
+
+            # 每行的最大值 (predict label)
+            _, batch_predict_label = batch_logit_target.max(1)
+
+            batch_predicted_label = batch_predict_label.long().cpu().detach().numpy()
+
+            batch_original_label = target.long().cpu().detach().numpy()
+
+            # 相当于最后一次loss，可以拼接到loss trajectory末尾
+            batch_loss_target = [F.cross_entropy(batch_logit_target_i.unsqueeze(0), target_i.unsqueeze(0)) for (batch_logit_target_i, target_i) in zip(batch_logit_target, target)]
+            batch_loss_target = np.array([batch_loss_target_i.cpu().detach().numpy() for batch_loss_target_i in batch_loss_target])
+
+            # (batch_size,) 布尔张量，其中第i个元素表示模型对第i个样本预测的最大值是否与真实标签相等
+            batch_predicted_status = (torch.argmax(batch_logit_target, dim=1) == target).float().cpu().detach().numpy()
+            # (batch_size) => (batch_size,1) 方便后续concat
+            batch_predicted_status = np.expand_dims(batch_predicted_status, axis=1)
+
+            # in / out    label for attack model
+            member = np.repeat(np.array(int(1 - loader_idx)), batch_trajectory.shape[0], 0)
+
+            batch_loss_ori = batch_loss_target
+
+            # 直接赋值或concat
+
+            model_loss_ori               =  batch_loss_ori                  if loader_idx == 0 and data_idx == 0     else np.concatenate((model_loss_ori, batch_loss_ori), axis=0)
+            
+            model_trajectory             =  batch_trajectory                if loader_idx == 0 and data_idx == 0     else np.concatenate((model_trajectory, batch_trajectory), axis=0)
+            
+            original_labels              =  batch_original_label            if loader_idx == 0 and data_idx == 0     else np.concatenate((original_labels, batch_original_label), axis=0)
+            
+            predicted_labels             =  batch_predicted_label           if loader_idx == 0 and data_idx == 0     else np.concatenate((predicted_labels, batch_predicted_label), axis=0)
+            
+            predicted_status             =  batch_predicted_status          if loader_idx == 0 and data_idx == 0     else np.concatenate((predicted_status, batch_predicted_status), axis=0)
+            
+            member_status                =  member                          if loader_idx == 0 and data_idx == 0     else np.concatenate((member_status, member), axis=0)
+            
+    print(f'------------Loading trajectory {args.mode} dataset successfully!---------')
+    data = {
+        'model_loss_ori':model_loss_ori, 
+        'model_trajectory':model_trajectory,
+        'original_labels':original_labels,
+        'predicted_labels':predicted_labels,
+        'predicted_status':predicted_status,   
+        'member_status':member_status,
+        'nb_classes':dataset.num_classes
+        }
+
+    # target model和shadow model的分别作为trajector_train_data, trajectory_test_data保存
+    dataset_type = 'trajectory_train_data' if args.mode == 'shadow' else 'trajectory_test_data'
+    utils.create_path(ori_model_path + f'/{args.mode}/{model_name}')
+    np.save(ori_model_path + f'/{args.mode}/{model_name}/{dataset_type}', data)
+
+def trajectory_black_box_membership_inference_attack(args, models_path, device='cpu'):
+
+    # if args.model == 'vgg':
+    #     model_name = '{}_vgg16bn'.format(args.data)
+    # elif args.model == 'mobilenet':
+    #     model_name = '{}_mobilenet'.format(args.data)
+    # elif args.model == 'resnet':
+    #     model_name = '{}_resnet56'.format(args.data)
+    # elif args.model == 'wideresnet':
+    #     model_name = '{}_wideresnet'.format(args.data)
+    if args.model == 'resnet18':
+        model_name = '{}_resnet18'.format(args.data)
+    elif args.model == 'resnet50':
+        model_name = '{}_resnet50'.format(args.data)
+    elif args.model == 'resnet152':
+        model_name = '{}_resnet152'.format(args.data)
+    elif args.model == 'mobilenetv2':
+        model_name = '{}_mobilenetv2'.format(args.data)
+
+    print(f"MODEL NAME IS :{model_name}")
+    # if args.model_distill == 'vgg':
+    #     model_distill_name = '{}_vgg16bn'.format(args.data)
+    # elif args.model_distill == 'mobilenet':
+    #     model_distill_name = '{}_mobilenet'.format(args.data)
+    # elif args.model_distill == 'resnet':
+    #     model_distill_name = '{}_resnet56'.format(args.data)
+    # elif args.model_distill == 'wideresnet':
+    #     model_disltill_name = '{}_wideresnet'.format(args.data)
+
+    if args.model_distill == 'resnet18':
+        model_distill_name = '{}_resnet18'.format(args.data)
+    elif args.model_distill == 'resnet50':
+        model_distill_name = '{}_resnet50'.format(args.data)
+    elif args.model_distill == 'resnet152':
+        model_distill_name = '{}_resnet152'.format(args.data)
+    elif args.model_distill == 'mobilenetv2':
+        model_distill_name = '{}_mobilenetv2'.format(args.data)
+    
+    print(f"MODEL DISTILL NAME IS :{model_distill_name}")
+
+    cnn = model_name
+
+    print(f'------------------model: {model_name}-------------------')
+
+    orgin_model_name = model_name
+
+    save_path = models_path + '/attack/' + model_name
+
+    utils.create_path(save_path)
+
+    best_prec1 = 0.0
+    best_auc = 0.0
+    AttackModelTrainSet = np.load(models_path + f'/shadow/{model_name}/trajectory_train_data.npy', allow_pickle=True).item()
+    AttackModelTestSet = np.load(models_path + f'/target/{model_name}/trajectory_test_data.npy', allow_pickle=True).item()
+
+    train_set = torch.utils.data.TensorDataset(
+            torch.from_numpy(np.array(AttackModelTrainSet['model_loss_ori'], dtype='f')),
+            torch.from_numpy(np.array(AttackModelTrainSet['model_trajectory'], dtype='f')),
+            # 转换为one-hot编码
+            torch.from_numpy(np.array(check_and_transform_label_format(AttackModelTrainSet['original_labels'], nb_classes=AttackModelTrainSet['nb_classes'], return_one_hot=True))).type(torch.float),
+            torch.from_numpy(np.array(check_and_transform_label_format(AttackModelTrainSet['predicted_labels'], nb_classes=AttackModelTrainSet['nb_classes'], return_one_hot=True))).type(torch.long),
+            # train/test i.e. in or out
+            torch.from_numpy(np.array(check_and_transform_label_format(AttackModelTrainSet['predicted_status'], nb_classes=2, return_one_hot=True)[:,:2])).type(torch.long),
+            torch.from_numpy(np.array(AttackModelTrainSet['member_status'])).type(torch.long),)
+
+    test_set = torch.utils.data.TensorDataset(
+            torch.from_numpy(np.array(AttackModelTestSet['model_loss_ori'], dtype='f')),
+            torch.from_numpy(np.array(AttackModelTestSet['model_trajectory'], dtype='f')),
+            torch.from_numpy(np.array(check_and_transform_label_format(AttackModelTestSet['original_labels'], nb_classes=AttackModelTestSet['nb_classes'], return_one_hot=True))).type(torch.float),
+            torch.from_numpy(np.array(check_and_transform_label_format(AttackModelTestSet['predicted_labels'], nb_classes=AttackModelTestSet['nb_classes'], return_one_hot=True))).type(torch.long),
+            # train/test i.e. in or out
+            torch.from_numpy(np.array(check_and_transform_label_format(AttackModelTestSet['predicted_status'], nb_classes=2, return_one_hot=True)[:,:2])).type(torch.long),
+            torch.from_numpy(np.array(AttackModelTestSet['member_status'])).type(torch.long),)
+
+    attack_train_loader = torch.utils.data.DataLoader(train_set, batch_size=128, shuffle=True)
+    attack_test_loader = torch.utils.data.DataLoader(test_set, batch_size=128, shuffle=True)
+    
+    print(f'-------------------"Loss Trajectory"------------------')
+     # 训练Attack Model
+    attack_model = MLP_BLACKBOX(dim_in = args.epochs_distill + 1)
+    attack_optimizer = torch.optim.SGD(attack_model.parameters(), lr=0.01, momentum=0.9, weight_decay=0.0001) 
+    attack_model = attack_model.to(device)
+    loss_fn = nn.CrossEntropyLoss()
+    max_auc = 0
+    max_acc = 0
+
+    for epoch in range(100):
+        train_loss, train_prec1 = train_mia_attack_model(args, epoch, attack_model, attack_train_loader, attack_optimizer, loss_fn, device)
+        val_loss, val_prec1, val_auc, max_auc, max_acc = test_mia_attack_model(args, epoch, attack_model, attack_test_loader, loss_fn, max_auc, max_acc, device)
+        is_best_prec1 = val_prec1 > best_prec1
+        is_best_auc = val_auc > best_auc
+        if is_best_prec1:
+            best_prec1 = val_prec1
+        if is_best_auc:
+            best_auc = val_auc
+        if epoch % 10 == 0:
+            print(('epoch:{} \t train_loss:{:.4f} \t test_loss:{:.4f} \t train_prec1:{:.4f} \t test_prec1:{:.4f} \t val_prec1:{:.4f} \t val_auc:{:.4f}')
+                    .format(epoch, train_loss, val_loss,
+                            train_prec1, val_prec1, val_prec1, val_auc))
+    print('Max AUC:  ', max_auc)
+    print('Max ACC:  ', max_acc/100)
+    # 这里可以改 仅存储is_best_auc or is_best_prec1的
+    torch.save(attack_model.state_dict(), save_path + '/' + 'trajectory' + '.pkl')
+
+    # 在 test_mia_attack_model存储的最优的数据
+    data_auc = np.load(f'./outputs/{args.data}_{args.model}_{args.model_distill}_trajectory_auc.npy', allow_pickle=True).item()
+    for i in range(len(data_auc['fpr'])):
+        if data_auc['fpr'][i] > 0.001:
+            print('TPR at 0.1% FPR:  {:.1%}'.format(data_auc['tpr'][i-1]))
+            break
+
+# 
+def get_trajectory(data, target, args, model_path, device='cpu'):
+
+    # if args.model_distill == 'vgg':
+    #     model_name = '{}_vgg16bn'.format(args.data)
+    # elif args.model_distill == 'mobilenet':
+    #     model_name = '{}_mobilenet'.format(args.data)
+    # elif args.model_distill == 'resnet':
+    #     model_name = '{}_resnet56'.format(args.data)
+    # elif args.model_distill == 'wideresnet':
+    #     model_name = '{}_wideresnet'.format(args.data)
+
+    if args.model_distill == 'resnet18':
+        model_name = '{}_resnet18'.format(args.data)
+    elif args.model_distill == 'resnet50':
+        model_name = '{}_resnet50'.format(args.data)
+    elif args.model_distill == 'resnet152':
+        model_name = '{}_resnet152'.format(args.data)
+    elif args.model_distill == 'mobilenetv2':
+        model_name = '{}_mobilenetv2'.format(args.data)
+
+    print(f"MODEL NAME IS :{model_name}")
+    trajectory = None
+    # 创建一个形状为 (data.shape[0], 1) 的 NumPy 数组 predicted_label，并将其初始化为 -1  data.shape[0]即batch_size
+    predicted_label = np.array([-1]).repeat(data.shape[0],0).reshape(data.shape[0],1)
+
+    for s in range(1):
+        trajectory_current = None
+        model_path_current = 'networks/{}'.format(s)
+        # 对每个distill epoch (是看的distill model对target的loss并记录成为loss_trajectory)
+        for i in range(1, args.epochs_distill+1):
+            # 通过load存储的distill model在各个epoch时候的权值参数来复现loss
+            if args.mode == 'shadow':
+                cnn_model_target, cnn_params_target = normal.load_model(args, model_path_current+'/distill_shadow', model_name, epoch=i)
+            elif args.mode == 'target':
+                cnn_model_target, cnn_params_target = normal.load_model(args, model_path_current+'/distill_target', model_name, epoch=i)
+            MODEL_target = cnn_model_target.to(device)
+            # data是一个包含batch个向量的list
+            data = data.to(device)
+            # label
+            target = target.to(device)
+            # 获取target model的输出
+            logit_target = MODEL_target(data)
+            # 看target model的output与label的loss  （batch list中的各个数据分别算）
+            loss = [F.cross_entropy(logit_target_i.unsqueeze(0), target_i.unsqueeze(0)) for (logit_target_i, target_i) in zip(logit_target, target)]
+            # list -> nparray 一列的
+            loss = np.array([loss_i.detach().cpu().numpy() for loss_i in loss]).reshape(-1, 1)
+            # 按列拼接 
+            trajectory_current = loss if i == 1 else np.concatenate((trajectory_current, loss), 1)
+        # 累加
+        trajectory = trajectory_current if s == 0 else trajectory + trajectory_current
+
+    return trajectory
+
+
+
+
+
+
+        
\ No newline at end of file
--- a/mzh/new_mzh/Loss_Trajectory_MIA/architectures.py
+++ b/mzh/new_mzh/Loss_Trajectory_MIA/architectures.py
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import math
+import numpy as np
+import utils
+
+
+from module import *
+import module
+from global_var import GlobalVariables
+
+
+# 定义 ResNet 模型
+# 适用于Cifar10
+class ResNet(nn.Module):
+    # TODO 需要调整接口
+    def __init__(self, args, params): # 这里将类别数设置为10
+        
+        super(ResNet, self).__init__()
+
+        self.augment_training = params['augment_training']
+        self.train_func = utils.cnn_train
+        self.test_func = utils.cnn_test
+        # self.initialize_weights()
+        self.input_size = int(params['input_size'])
+        self.num_blocks = params['num_blocks']
+        self.block_type = params['block_type']
+        self.num_classes = int(params['num_classes'])
+        self.num_blocks = params['num_blocks']
+        self.augment_training = params['augment_training']
+        self.block_type = params['block_type']
+        self.train_func = utils.cnn_train
+        self.test_func = utils.cnn_test
+
+        if self.block_type == 'basic':
+            self.block = BasicBlock
+
+        elif self.block_type == 'bottle':
+            self.block = Bottleneck
+
+
+        self.inplanes = 16 # 因为 CIFAR-10 图片较小，所以开始时需要更少的通道数
+        GlobalVariables.SELF_INPLANES = self.inplanes
+        # print('resnet init:'+ str(GlobalVariables.SELF_INPLANES))
+        # 输入层
+        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1,
+                               bias=False)
+        self.bn1 = nn.BatchNorm2d(16)
+        self.relu = nn.ReLU()
+
+        # 残差层（4 个阶段，每个阶段包含 6n+2 个卷积层）
+        self.layer1 = MakeLayer_ResNet(self.block, 16, self.num_blocks[0])
+        self.layer2 = MakeLayer_ResNet(self.block, 32, self.num_blocks[1], stride=2)
+        self.layer3 = MakeLayer_ResNet(self.block, 64, self.num_blocks[2], stride=2)
+        self.layer4 = MakeLayer_ResNet(self.block, 128, self.num_blocks[3], stride=2)
+
+        # 分类层
+        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+        self.fc = nn.Linear(128 * self.block.expansion, self.num_classes)
+
+
+        self.initialize_weights()
+
+        # preserve
+        self.augment_training = params['augment_training']
+        if 'distill' in args.mode:
+            self.train_func = utils.cnn_train_dis
+        else:
+            self.train_func = utils.cnn_train
+        self.test_func = utils.cnn_test
+
+        # # 参数初始化
+        # for m in self.modules():
+        #     if isinstance(m, nn.Conv2d):
+        #         nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+        #     elif isinstance(m, nn.BatchNorm2d):
+        #         nn.init.constant_(m.weight, 1)
+        #         nn.init.constant_(m.bias, 0)
+
+
+    def forward(self, x):
+        # 输入层
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        # 这里相比于imagenet的，少了一个maxpool，因为cifar10本身图片就小，如果再pool就太小了
+
+        # 残差层
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+
+        # 分类层
+        x = self.avgpool(x)  # 输出的尺寸为 B,C,1,1 
+        x = x.view(x.size(0), -1)
+        x = self.fc(x)
+
+        # out = F.softmax(x,dim = 1)         # 这里不softmax也行 影响不大
+
+        return x
+
+    def quantize(self, quant_type, num_bits=8, e_bits=3):
+        self.qconvbnrelu1 = QConvBNReLU(quant_type,self.conv1,self.bn1,qi=True,qo=True,num_bits=num_bits,e_bits=e_bits)
+        # 没有输入num_bits 需修改
+        self.layer1.quantize(quant_type=quant_type,num_bits=num_bits, e_bits=e_bits)      
+        self.layer2.quantize(quant_type=quant_type,num_bits=num_bits, e_bits=e_bits)
+        self.layer3.quantize(quant_type=quant_type,num_bits=num_bits, e_bits=e_bits)
+        self.layer4.quantize(quant_type=quant_type,num_bits=num_bits, e_bits=e_bits)
+        self.qavgpool1 = QAdaptiveAvgPool2d(quant_type,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
+        self.qfc1 = QLinear(quant_type, self.fc,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
+        # self.qfc1 = QLinear(quant_type, self.fc,qi=True,qo=True,num_bits=num_bits,e_bits=e_bits)
+
+    def quantize_forward(self, x):
+        # for _, layer in self.quantize_layers.items():
+        #     x = layer(x)
+
+        # out = F.softmax(x, dim=1)
+        # return out
+        x = self.qconvbnrelu1(x)
+        x = self.layer1.quantize_forward(x)
+        x = self.layer2.quantize_forward(x)
+        x = self.layer3.quantize_forward(x)
+        x = self.layer4.quantize_forward(x)
+        x = self.qavgpool1(x)
+        x = x.view(x.size(0), -1)   
+        x = self.qfc1(x)
+        
+        # out = F.softmax(x,dim = 1)         # 这里不softmax也行 影响不大
+        return x
+      
+
+    def freeze(self):
+        self.qconvbnrelu1.freeze()  # 因为作为第一层是有qi的，所以freeze的时候无需再重新提供qi
+        qo = self.layer1.freeze(qinput = self.qconvbnrelu1.qo)
+        qo = self.layer2.freeze(qinput = qo)
+        qo = self.layer3.freeze(qinput = qo)
+        qo = self.layer4.freeze(qinput = qo)
+        self.qavgpool1.freeze(qi=qo)
+        self.qfc1.freeze(qi=self.qavgpool1.qo)
+        # self.qfc1.freeze()
+
+    def fakefreeze(self):
+        self.qconvbnrelu1.fakefreeze()
+        self.layer1.fakefreeze()
+        self.layer2.fakefreeze()
+        self.layer3.fakefreeze()
+        self.layer4.fakefreeze()
+        self.qfc1.fakefreeze()
+
+    def quantize_inference(self, x):
+        qx = self.qconvbnrelu1.qi.quantize_tensor(x)
+        qx = self.qconvbnrelu1.quantize_inference(qx)
+        qx = self.layer1.quantize_inference(qx)
+        qx = self.layer2.quantize_inference(qx)
+        qx = self.layer3.quantize_inference(qx)
+        qx = self.layer4.quantize_inference(qx)
+        qx = self.qavgpool1.quantize_inference(qx)
+        qx = qx.view(qx.size(0), -1)
+        qx = self.qfc1.quantize_inference(qx) 
+        qx = self.qfc1.qo.dequantize_tensor(qx)
+        
+
+        # out = F.softmax(qx,dim = 1)         # 这里不softmax也行 影响不大
+        return qx
+
+    def initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+            elif isinstance(m, nn.Linear):
+                m.weight.data.normal_(0, 0.01)
+                m.bias.data.zero_()
+
+
+
+# BasicBlock 类
+class BasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(BasicBlock, self).__init__()
+
+        # 第一个卷积层
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride,
+                               padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+
+        # 第二个卷积层
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1,
+                               padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+
+        # shortcut
+        self.relu = nn.ReLU()
+        self.downsample = downsample
+        self.stride = stride
+
+
+    def forward(self, x):
+
+        identity = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+
+        if self.downsample is not None:
+            identity = self.downsample(identity)
+
+        out += identity
+        out = self.relu(out)
+
+        return out
+    
+    def quantize(self, quant_type ,num_bits=8, e_bits=3):
+        self.qconvbnrelu1 = QConvBNReLU(quant_type,self.conv1,self.bn1,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
+        self.qconvbn1 = QConvBN(quant_type,self.conv2,self.bn2,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
+        
+        if self.downsample is not None:
+            self.qconvbn2 =  QConvBN(quant_type,self.downsample[0],self.downsample[1],qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
+           
+        self.qelementadd = QElementwiseAdd(quant_type,qi0=False, qi1=False, qo=True,num_bits=num_bits,e_bits=e_bits)
+        self.qrelu1 = QReLU(quant_type,qi= False,num_bits=num_bits,e_bits=e_bits)   # 需要qi
+        
+
+    def quantize_forward(self, x):
+        identity = x
+        out = self.qconvbnrelu1(x)
+        out = self.qconvbn1(out)
+
+        if self.downsample is not None:
+            identity = self.qconvbn2(identity)
+        
+        # residual add
+        # out = identity + out    # 这里是需要写一个elementwiseadd的变换的，待后续修改
+        out = self.qelementadd(out,identity)
+        out = self.qrelu1(out)
+        return out
+
+    def freeze(self, qinput):
+        # 这里的qconvbnrelu1其实是可以用前一层的qo的，但感觉不太好传参，就没用 
+        # 还需仔细检查
+        self.qconvbnrelu1.freeze(qi= qinput)   # 需要接前一个module的最后一个qo
+        self.qconvbn1.freeze(qi = self.qconvbnrelu1.qo)
+
+        if self.downsample is not None:
+            self.qconvbn2.freeze(qi = qinput) # 一条支路
+            self.qelementadd.freeze(qi0 = self.qconvbn1.qo, qi1 = self.qconvbn2.qo)
+        else:
+            self.qelementadd.freeze(qi0 = self.qconvbn1.qo, qi1 = qinput)
+        # 这里或许需要补充个层来处理elementwise add
+        self.qrelu1.freeze(qi = self.qelementadd.qo) 
+        return self.qrelu1.qi  # relu后的qo可用relu统计的qi 
+
+    def fakefreeze(self):
+        # 这里的qconvbnrelu1其实是可以用前一层的qo的，但感觉不太好传参，就没用 
+        # 还需仔细检查
+        self.qconvbnrelu1.fakefreeze()   # 需要接前一个module的最后一个qo
+        self.qconvbn1.fakefreeze()
+
+        if self.downsample is not None:
+            self.qconvbn2.fakefreeze() # 一条支路
+
+
+    def quantize_inference(self, x):
+        # 感觉是不需要进行初始的quantize_tensor和dequantize_tensor，因为他不是最前/后一层，只要中间的每层都在量化后的领域内，就不需要这种处理。
+        identity = x
+        out = self.qconvbnrelu1.quantize_inference(x)
+        out = self.qconvbn1.quantize_inference(out)
+
+        if self.downsample is not None:
+            identity = self.qconvbn2.quantize_inference(identity)
+        
+        # out = identity + out    # 这里可能需要写一个elementwiseadd的变换的，待后续修改
+        out = self.qelementadd.quantize_inference(out,identity)
+        out = self.qrelu1.quantize_inference(out)
+        return out
+
+
+    
+
+
+# Bottleneck 类
+class Bottleneck(nn.Module):
+    expansion = 4
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(Bottleneck, self).__init__()
+
+        # 1x1 卷积层
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+
+        # 3x3 卷积层
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
+                               padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+
+        # 1x1 卷积层
+        self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
+                               bias=False)
+        self.bn3 = nn.BatchNorm2d(planes * self.expansion)
+
+        # shortcut
+        self.relu = nn.ReLU()
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+
+        identity = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        if self.downsample is not None:
+            identity = self.downsample(x)
+
+        out += identity  # 相加是在这里处理的
+        out = self.relu(out)
+
+        return out
+    def quantize(self, quant_type ,num_bits=8, e_bits=3):
+        self.qconvbnrelu1 = QConvBNReLU(quant_type,self.conv1,self.bn1,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
+        self.qconvbnrelu2 = QConvBNReLU(quant_type,self.conv2,self.bn2,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
+        self.qconvbn1 = QConvBN(quant_type,self.conv3,self.bn3,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
+        
+        if self.downsample is not None:
+            self.qconvbn2 =  QConvBN(quant_type,self.downsample[0],self.downsample[1],qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
+           
+        self.qelementadd = QElementwiseAdd(quant_type,qi0=False, qi1=False, qo=True,num_bits=num_bits,e_bits=e_bits)
+        self.qrelu1 = QReLU(quant_type,qi= False,num_bits=num_bits,e_bits=e_bits)   # 需要qi
+    
+    def quantize_forward(self, x):
+        identity = x
+        out = self.qconvbnrelu1(x)
+        out = self.qconvbnrelu2(out)
+        out = self.qconvbn1(out)
+
+        if self.downsample is not None:
+            identity = self.qconvbn2(identity)
+        
+        # residual add
+        # out = identity + out    # 这里是需要写一个elementwiseadd的变换的，待后续修改
+        out = self.qelementadd(out,identity)
+        out = self.qrelu1(out)
+        return out
+    
+    def freeze(self, qinput):
+        # 这里的qconvbnrelu1其实是可以用前一层的qo的，但感觉不太好传参，就没用 
+        # 还需仔细检查
+        self.qconvbnrelu1.freeze(qi= qinput)   # 需要接前一个module的最后一个qo
+        self.qconvbnrelu2.freeze(qi=self.qconvbnrelu1.qo)
+        self.qconvbn1.freeze(qi = self.qconvbnrelu2.qo)
+
+
+        if self.downsample is not None:
+            self.qconvbn2.freeze(qi = qinput) # 一条支路
+            self.qelementadd.freeze(qi0 = self.qconvbn1.qo, qi1 = self.qconvbn2.qo)
+        else:
+            self.qelementadd.freeze(qi0 = self.qconvbn1.qo, qi1 = qinput)
+        # 这里或许需要补充个层来处理elementwise add
+        self.qrelu1.freeze(qi = self.qelementadd.qo)  # 需要自己统计qi
+        return self.qrelu1.qi  # relu后的qo可用relu统计的qi 
+    
+    def fakefreeze(self):
+        # 这里的qconvbnrelu1其实是可以用前一层的qo的，但感觉不太好传参，就没用 
+        # 还需仔细检查
+        self.qconvbnrelu1.fakefreeze()   
+        self.qconvbnrelu2.fakefreeze()
+        self.qconvbn1.fakefreeze()
+
+
+        if self.downsample is not None:
+            self.qconvbn2.fakefreeze() # 一条支路
+
+
+    
+    def quantize_inference(self, x):
+        # 感觉是不需要进行初始的quantize_tensor和dequantize_tensor，因为他不是最前/后一层，只要中间的每层都在量化后的领域内，就不需要这种处理。
+        identity = x
+        out = self.qconvbnrelu1.quantize_inference(x)
+        out = self.qconvbnrelu2.quantize_inference(out)
+        out = self.qconvbn1.quantize_inference(out)
+
+        if self.downsample is not None:
+            identity = self.qconvbn2.quantize_inference(identity)
+        
+        # out = identity + out    # 这里可能需要写一个elementwiseadd的变换的，待后续修改
+        out = self.qelementadd.quantize_inference(out,identity)
+        out = self.qrelu1.quantize_inference(out)
+        return out
+
+
+
+class MakeLayer_ResNet(nn.Module):
+
+    def __init__(self, block, planes, blocks, stride=1):
+        super(MakeLayer_ResNet, self).__init__()
+        # print('makelayer init:'+ str(GlobalVariables.SELF_INPLANES))
+        self.downsample = None
+        if stride != 1 or GlobalVariables.SELF_INPLANES != planes * block.expansion:
+            self.downsample = nn.Sequential(
+            nn.Conv2d(GlobalVariables.SELF_INPLANES, planes * block.expansion,kernel_size=1, stride=stride, bias=False), 
+            nn.BatchNorm2d(planes * block.expansion)
+            )
+        self.blockdict = nn.ModuleDict()
+        self.blockdict['block1'] = block(inplanes=GlobalVariables.SELF_INPLANES, planes=planes, stride=stride, downsample=self.downsample)
+        GlobalVariables.SELF_INPLANES = planes * block.expansion
+        for i in range(1, blocks):  # block的个数   这里只能用字典了
+            self.blockdict['block' + str(i+1)] = block(inplanes=GlobalVariables.SELF_INPLANES, planes=planes)  # 此处进行实例化了
+        
+    def forward(self,x):
+        
+        for _, layer in self.blockdict.items():
+            x = layer(x)
+        
+        return x
+
+    def quantize(self, quant_type, num_bits=8, e_bits=3):
+        # 需检查
+        for _, layer in self.blockdict.items():
+            layer.quantize(quant_type=quant_type,num_bits=num_bits,e_bits=e_bits)   # 这里是因为每一块都是block，而block中有具体的quantize策略, n_exp和mode已经在__init__中赋值了
+        
+
+    def quantize_forward(self, x):
+        for _, layer in self.blockdict.items():
+            x = layer.quantize_forward(x)   # 各个block中有具体的quantize_forward
+
+        return x
+        
+       
+    def freeze(self, qinput):  # 需要在 Module Resnet的freeze里传出来
+        # 这里的qconvbnrelu1其实是可以用前一层的qo的，但感觉不太好传参，就没用 
+        # 还需仔细检查
+        cnt = 0
+        for _, layer in self.blockdict.items():
+            if cnt == 0:
+                qo = layer.freeze(qinput = qinput)
+                cnt = 1
+            else:
+                qo = layer.freeze(qinput = qo)  # 各个block中有具体的freeze
+
+        return qo   # 供后续的层用
+
+    def fakefreeze(self):
+        for _, layer in self.blockdict.items():
+            layer.fakefreeze()
+
+    def quantize_inference(self, x):
+        # 感觉是不需要进行初始的quantize_tensor和dequantize_tensor，因为他不是最前/后一层，只要中间的每层都在量化后的领域内，就不需要这种处理。
+        for _, layer in self.blockdict.items():
+            x = layer.quantize_inference(x)  # 每个block中有具体的quantize_inference
+        
+        return x
+
+
+
+
+# 使用 ResNet18 模型
+def resnet18(args, model_params):
+    model = ResNet(args, model_params)
+    return model
+
+
+# 使用 ResNet50 模型
+def resnet50(args, model_params):
+    model = ResNet(args, model_params)
+    return model
+
+
+# 使用 ResNet152 模型
+def resnet152(args, model_params):
+    model = ResNet(args, model_params)
+    return model
+
+
+
+
+class MobileNetV2(nn.Module):
+    def __init__(self, args,params):
+        super(MobileNetV2, self).__init__()
+
+        self.num_classes = int(params['num_classes'])
+        self.augment_training = params['augment_training']
+        self.input_size = int(params['input_size'])
+        if 'distill' in args.mode:
+            self.train_func = utils.cnn_train_dis
+        else:
+            self.train_func = utils.cnn_train
+        self.test_func = utils.cnn_test
+
+
+        self.conv1 = nn.Conv2d(3, 32, 3, stride=1, padding=1)
+        self.bn1 = nn.BatchNorm2d(32)
+        self.relu1 = nn.ReLU6(inplace=True)
+         # Bottleneck 层次, t指channel扩充系数
+        self.layer1 = MakeLayer_MobileNet(32, 16, 1, t=1, stride=1)
+        self.layer2 = MakeLayer_MobileNet(16, 24, 2, t=6, stride=2)
+        self.layer3 = MakeLayer_MobileNet(24, 32, 3, t=6, stride=2)
+         # 根据CIFAR-10图像大小调整层数
+        self.layer4 = MakeLayer_MobileNet(32, 96, 3, t=6, stride=1)
+        self.layer5 = MakeLayer_MobileNet(96, 160, 3, t=6, stride=2)
+        self.layer6 = MakeLayer_MobileNet(160, 320, 1, t=6, stride=1)
+
+        self.conv2 = nn.Conv2d(320, 1280, 1)
+        self.avg1 = nn.AdaptiveAvgPool2d(1)
+
+
+        self.fc = nn.Linear(1280, self.num_classes)
+
+
+    def forward(self, x):
+        # x = self.layers(x)
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu1(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        x = self.layer5(x)
+        x = self.layer6(x)
+        x = self.conv2(x)
+        x = self.avg1(x)
+        x = x.view(x.size(0), -1)
+        x = self.fc(x)
+        return x
+    
+    def quantize(self, quant_type, num_bits=8, e_bits=3):
+        self.qconvbnrelu1 = QConvBNReLU6(quant_type,self.conv1,self.bn1,qi=True,qo=True,num_bits=num_bits,e_bits=e_bits)
+        # 没有输入num_bits 需修改
+        
+        self.layer1.quantize(quant_type=quant_type,num_bits=num_bits, e_bits=e_bits)      
+        self.layer2.quantize(quant_type=quant_type,num_bits=num_bits, e_bits=e_bits)
+        self.layer3.quantize(quant_type=quant_type,num_bits=num_bits, e_bits=e_bits)
+        self.layer4.quantize(quant_type=quant_type,num_bits=num_bits, e_bits=e_bits)
+        self.layer5.quantize(quant_type=quant_type,num_bits=num_bits, e_bits=e_bits)
+        self.layer6.quantize(quant_type=quant_type,num_bits=num_bits, e_bits=e_bits)
+
+        self.qconv1 = QConv2d(quant_type, self.conv2, qi=False, qo=True, num_bits=num_bits, e_bits=e_bits)
+        self.qavgpool1 = QAdaptiveAvgPool2d(quant_type,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
+        self.qfc1 = QLinear(quant_type, self.fc,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
+
+    def quantize_forward(self, x):
+        # for _, layer in self.quantize_layers.items():
+        #     x = layer(x)
+
+        # out = F.softmax(x, dim=1)
+        # return out
+        x = self.qconvbnrelu1(x)
+        x = self.layer1.quantize_forward(x)
+        x = self.layer2.quantize_forward(x)
+        x = self.layer3.quantize_forward(x)
+        x = self.layer4.quantize_forward(x)
+        x = self.layer5.quantize_forward(x)
+        x = self.layer6.quantize_forward(x)
+        x = self.qconv1(x)
+        x = self.qavgpool1(x)
+        x = x.view(x.size(0), -1)   
+        x = self.qfc1(x)
+        
+        # out = F.softmax(x,dim = 1)         # 这里不softmax也行 影响不大
+        return x
+
+    def freeze(self):
+        self.qconvbnrelu1.freeze()  # 因为作为第一层是有qi的，所以freeze的时候无需再重新提供qi
+        qo = self.layer1.freeze(qinput = self.qconvbnrelu1.qo)
+        qo = self.layer2.freeze(qinput = qo)
+        qo = self.layer3.freeze(qinput = qo)
+        qo = self.layer4.freeze(qinput = qo)
+        qo = self.layer5.freeze(qinput = qo)
+        qo = self.layer6.freeze(qinput = qo)
+        self.qconv1.freeze(qi = qo)
+        self.qavgpool1.freeze(qi=self.qconv1.qo)
+        self.qfc1.freeze(qi=self.qavgpool1.qo)
+        # self.qfc1.freeze()
+
+    def fakefreeze(self):
+        self.qconvbnrelu1.fakefreeze()
+        self.layer1.fakefreeze()
+        self.layer2.fakefreeze()
+        self.layer3.fakefreeze()
+        self.layer4.fakefreeze()
+        self.layer5.fakefreeze()
+        self.layer6.fakefreeze()
+        self.qconv1.fakefreeze()
+        self.qfc1.fakefreeze()
+
+    
+    def quantize_inference(self, x):
+
+        qx = self.qconvbnrelu1.qi.quantize_tensor(x)
+        qx = self.qconvbnrelu1.quantize_inference(qx)
+
+        qx = self.layer1.quantize_inference(qx)
+        qx = self.layer2.quantize_inference(qx)
+        qx = self.layer3.quantize_inference(qx)
+        qx = self.layer4.quantize_inference(qx)
+        qx = self.layer5.quantize_inference(qx)
+        qx = self.layer6.quantize_inference(qx)
+
+        qx = self.qconv1.quantize_inference(qx)
+        qx = self.qavgpool1.quantize_inference(qx)
+        qx = qx.view(qx.size(0), -1)
+        qx = self.qfc1.quantize_inference(qx) 
+        qx = self.qfc1.qo.dequantize_tensor(qx)
+        
+        # out = F.softmax(qx,dim = 1)         # 这里不softmax也行 影响不大
+        return qx
+
+
+
+ 
+
+class InvertedResidual(nn.Module):
+    def __init__(self, in_channels, out_channels, stride, expand_ratio):
+        super(InvertedResidual, self).__init__()
+        hidden_dims = int(in_channels * expand_ratio)
+        self.identity_flag = stride == 1 and in_channels == out_channels
+
+        # self.bottleneck = nn.Sequential(
+        #     # Pointwise Convolution
+        #     nn.Conv2d(in_channels, hidden_dims, 1),
+        #     nn.BatchNorm2d(hidden_dims),
+        #     nn.ReLU6(inplace=True),
+        #     # Depthwise Convolution
+        #     nn.Conv2d(hidden_dims, hidden_dims, 3, stride=stride, padding=1, groups=hidden_dims),
+        #     nn.BatchNorm2d(hidden_dims),
+        #     nn.ReLU6(inplace=True),
+        #     # Pointwise & Linear Convolution
+        #     nn.Conv2d(hidden_dims, out_channels, 1),
+        #     nn.BatchNorm2d(out_channels),
+        # )
+        self.conv1 = nn.Conv2d(in_channels, hidden_dims, 1)
+        self.bn1 = nn.BatchNorm2d(hidden_dims)
+        self.relu1 = nn.ReLU6(inplace=True)
+        # Depthwise Convolution
+        self.conv2 = nn.Conv2d(hidden_dims, hidden_dims, 3, stride=stride, padding=1, groups=hidden_dims)
+        self.bn2 = nn.BatchNorm2d(hidden_dims)
+        self.relu2 = nn.ReLU6(inplace=True)
+        # Pointwise & Linear Convolution
+        self.conv3 = nn.Conv2d(hidden_dims, out_channels, 1)
+        self.bn3 = nn.BatchNorm2d(out_channels)
+
+    def forward(self, x):
+        # if self.identity_flag:
+        #     return x + self.bottleneck(x)
+        # else:
+        #     return self.bottleneck(x)
+        identity = x
+
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu1(x)
+
+        x = self.conv2(x)
+        x = self.bn2(x)
+        x = self.relu2(x)
+
+        x = self.conv3(x)
+        x = self.bn3(x)
+
+        if self.identity_flag:
+            return identity + x
+        
+        else:
+            return x
+
+    
+    def quantize(self, quant_type ,num_bits=8, e_bits=3):
+        self.qconvbnrelu1 = QConvBNReLU6(quant_type,self.conv1,self.bn1,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
+        self.qconvbnrelu2 = QConvBNReLU6(quant_type,self.conv2,self.bn2,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
+        self.qconvbn1 = QConvBN(quant_type,self.conv3,self.bn3,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
+        
+
+        self.qelementadd = QElementwiseAdd(quant_type,qi0=False, qi1=False, qo=True,num_bits=num_bits,e_bits=e_bits)
+
+   
+    def quantize_forward(self, x):
+        identity = x
+        out = self.qconvbnrelu1(x)
+        out = self.qconvbnrelu2(out)
+        out = self.qconvbn1(out)
+
+        if self.identity_flag:
+            out = self.qelementadd(out, identity)
+
+        return out
+    
+    def freeze(self, qinput):
+        # 这里的qconvbnrelu1其实是可以用前一层的qo的，但感觉不太好传参，就没用 
+        # 还需仔细检查
+        self.qconvbnrelu1.freeze(qi= qinput)   # 需要接前一个module的最后一个qo
+        self.qconvbnrelu2.freeze(qi=self.qconvbnrelu1.qo)
+        self.qconvbn1.freeze(qi = self.qconvbnrelu2.qo)
+
+        if self.identity_flag:
+            self.qelementadd.freeze(qi0 = self.qconvbn1.qo, qi1 = qinput)
+            return self.qelementadd.qo
+        else:
+            return self.qconvbn1.qo
+    
+    def fakefreeze(self):
+        self.qconvbnrelu1.fakefreeze()
+        self.qconvbnrelu2.fakefreeze()
+        self.qconvbn1.fakefreeze()
+
+
+    
+    def quantize_inference(self, x):
+        # 感觉是不需要进行初始的quantize_tensor和dequantize_tensor，因为他不是最前/后一层，只要中间的每层都在量化后的领域内，就不需要这种处理。
+        identity = x
+        out = self.qconvbnrelu1.quantize_inference(x)
+        out = self.qconvbnrelu2.quantize_inference(out)
+        out = self.qconvbn1.quantize_inference(out)
+
+        if self.identity_flag:
+            out = self.qelementadd.quantize_inference(out, identity)        
+        
+        return out
+
+
+
+class MakeLayer_MobileNet(nn.Module):
+
+    # def _make_bottleneck(self, in_channels, out_channels, n_repeat, t, stride):
+    #     layers = []
+    #     for i in range(n_repeat):
+    #         if i == 0:
+    #             layers.append(InvertedResidual(in_channels, out_channels, stride, t))
+    #         else:
+    #             layers.append(InvertedResidual(in_channels, out_channels, 1, t))
+    #         in_channels = out_channels
+    #     return nn.Sequential(*layers)
+
+    def __init__(self, in_channels, out_channels, n_repeat, t, stride):
+        super(MakeLayer_MobileNet, self).__init__()
+        # print('makelayer init:'+ str(GlobalVariables.SELF_INPLANES))
+        self.layers = nn.ModuleList()
+        for i in range(n_repeat):
+            if i == 0:
+                self.layers.append(InvertedResidual(in_channels, out_channels, stride, t))
+            else:
+                self.layers.append(InvertedResidual(in_channels, out_channels, 1, t))
+            in_channels = out_channels
+        
+        # for l in self.layers:
+        #     print(l)
+
+
+    def forward(self,x):
+        
+        for layer in self.layers:
+            x = layer(x)
+        
+        return x
+
+    def quantize(self, quant_type, num_bits=8, e_bits=3):
+        # 需检查
+        # print('CHECK======')
+        for layer in self.layers:
+            layer.quantize(quant_type=quant_type,num_bits=num_bits,e_bits=e_bits)   # 这里是因为每一块都是block，而block中有具体的quantize策略, n_exp和mode已经在__init__中赋值了
+            
+            # print(layer)
+
+        # print('CHECK======')
+        
+
+    def quantize_forward(self, x):
+        for layer in self.layers:
+            x = layer.quantize_forward(x)   # 各个block中有具体的quantize_forward
+
+        return x
+        
+       
+    def freeze(self, qinput):  # 需要在 Module Resnet的freeze里传出来
+        # 这里的qconvbnrelu1其实是可以用前一层的qo的，但感觉不太好传参，就没用 
+        # 还需仔细检查
+        cnt = 0
+        for layer in self.layers:
+            if cnt == 0:
+                qo = layer.freeze(qinput = qinput)
+                cnt = 1
+            else:
+                qo = layer.freeze(qinput = qo)  # 各个block中有具体的freeze
+
+        return qo   # 供后续的层用
+
+    def fakefreeze(self):
+        for layer in self.layers:
+            layer.fakefreeze()
+
+    def quantize_inference(self, x):
+        # 感觉是不需要进行初始的quantize_tensor和dequantize_tensor，因为他不是最前/后一层，只要中间的每层都在量化后的领域内，就不需要这种处理。
+        for layer in self.layers:
+            x = layer.quantize_inference(x)  # 每个block中有具体的quantize_inference
+        
+        return x
\ No newline at end of file
--- a/mzh/new_mzh/Loss_Trajectory_MIA/dataset.py
+++ b/mzh/new_mzh/Loss_Trajectory_MIA/dataset.py
+import torch
+import os 
+from torchvision import datasets, transforms, utils
+from torch.utils.data import sampler
+from PIL import Image
+from torch.utils.data import Subset, DataLoader, ConcatDataset
+import torch.utils.data as data
+from torch._utils import _accumulate
+from torch import randperm
+import numpy as np
+import pandas as pd
+
+def dataset_split(dataset, lengths):
+    if sum(lengths) != len(dataset):
+        raise ValueError("Sum of input lengths does not equal the length of the input dataset!")
+    
+    indices = list(range(sum(lengths)))
+    np.random.seed(1)
+    np.random.shuffle(indices)
+    return [Subset(dataset, indices[offset - length:offset]) for offset, length in zip(_accumulate(lengths), lengths)]
+
+    return all_data
+
+class GTSRB_ORI(data.Dataset):
+    base_folder = 'GTSRB'
+    def __init__(self, root_dir, train=False, transform=None):
+
+        self.root_dir = root_dir
+        self.sub_directory = 'trainingset' if train else 'testset'
+        self.csv_file_name = 'training.csv' if train else 'test.csv'
+
+        csv_file_path = os.path.join(
+            root_dir, self.base_folder, self.sub_directory, self.csv_file_name)
+
+        self.csv_data = pd.read_csv(csv_file_path)
+
+        self.transform = transform
+
+    def __len__(self):
+        return len(self.csv_data)
+
+    def __getitem__(self, idx):
+        img_path = os.path.join(self.root_dir, self.base_folder, self.sub_directory,
+                                self.csv_data.iloc[idx, 0])
+        img = Image.open(img_path)
+
+        classId = self.csv_data.iloc[idx, 1]
+
+        if self.transform is not None:
+            img = self.transform(img)
+
+        return img, classId
+
+class SUBGTSRB(data.Dataset):
+    def __init__(self, mode, aug, train):
+        self.img_size = 32
+        self.num_classes = 43
+        self.mean = [0.3403, 0.3121, 0.3214]
+        self.std = [0.2724, 0.2608, 0.2669]
+        normalize = transforms.Normalize(mean=self.mean, std=self.std)
+        self.augmented = transforms.Compose([transforms.Resize((32,32)), transforms.ToTensor(), normalize])
+        self.normalized = transforms.Compose([transforms.ToTensor(), normalize])    
+
+        self.aug_trainset = GTSRB_ORI(root_dir='/home/c01yili/datasets/common_dataset', train=True, transform=self.augmented)
+        self.aug_testset = GTSRB_ORI(root_dir='/home/c01yili/datasets/common_dataset', train=False, transform=self.augmented)
+        self.trainset = GTSRB_ORI(root_dir='/home/c01yili/datasets/common_dataset', train=True, transform=self.normalized)
+        self.testset = GTSRB_ORI(root_dir='/home/c01yili/datasets/common_dataset', train=False, transform=self.normalized)
+               
+        self.aug_dataset = ConcatDataset([self.aug_trainset, self.aug_testset])
+        self.dataset = ConcatDataset([self.trainset, self.testset])
+
+        self.aug_target_trainset, self.aug_target_testset, self.aug_shadow_trainset, self.aug_shadow_testset, self.aug_distill_trainset = dataset_split(self.aug_dataset, [1500, 1500, 1500, 1500, 45838])
+        self.aug_distill_testset = self.aug_shadow_testset
+        self.target_trainset, self.target_testset, self.shadow_trainset, self.shadow_testset, self.distill_trainset = dataset_split(self.dataset, [1500, 1500, 1500, 1500, 45838])
+        self.distill_testset = self.shadow_testset
+
+        if mode == 'target':
+            if aug:
+                if train:
+                    self.dataset = self.aug_target_trainset
+                else:
+                    self.dataset = self.aug_target_testset
+            else:
+                if train:
+                    self.dataset = self.target_trainset
+                else:
+                    self.dataset = self.target_testset
+        elif mode == 'shadow':
+            if aug:
+                if train:
+                    self.dataset = self.aug_shadow_trainset
+                else:
+                    self.dataset = self.aug_shadow_testset
+            else:
+                if train:
+                    self.dataset = self.shadow_trainset
+                else:
+                    self.dataset = self.shadow_testset
+        elif 'distill' in mode:
+            if aug:
+                if train:
+                    self.dataset = self.aug_distill_trainset
+                else:
+                    self.dataset = self.aug_distill_testset
+            else:
+                if train:
+                    self.dataset = self.distill_trainset
+                else:
+                    self.dataset = self.distill_testset
+
+        self.index = range(int(len(self.dataset)))
+
+
+    def __getitem__(self, idx):
+        return self.dataset[idx][0], self.dataset[idx][1], self.index[idx]
+
+    def __len__(self):
+        return len(self.index)
+        
+class GTSRB:
+    def __init__(self, mode, aug, batch_size=128):
+        self.batch_size = batch_size
+        self.img_size = 32
+        self.num_classes = 43
+ 
+        if aug:
+            if mode == 'target':
+                self.aug_target_trainset = SUBGTSRB(mode, aug, True)
+                self.aug_target_train_loader = torch.utils.data.DataLoader(self.aug_target_trainset, batch_size=batch_size, shuffle=True, num_workers=1)
+                self.aug_target_testset = SUBGTSRB(mode, aug, False)
+                self.aug_target_test_loader = torch.utils.data.DataLoader(self.aug_target_testset, batch_size=batch_size, shuffle=True, num_workers=1)
+            elif mode == 'shadow':
+                self.aug_shadow_trainset = SUBGTSRB(mode, aug, True)
+                self.aug_shadow_train_loader = torch.utils.data.DataLoader(self.aug_shadow_trainset, batch_size=batch_size, shuffle=True, num_workers=1)
+                self.aug_shadow_testset = SUBGTSRB(mode, aug, False)
+                self.aug_shadow_test_loader = torch.utils.data.DataLoader(self.aug_shadow_testset, batch_size=batch_size, shuffle=True, num_workers=1)
+            elif 'distill' in mode:
+                self.aug_distill_trainset = SUBGTSRB(mode, aug, True)
+                self.aug_distill_train_loader = torch.utils.data.DataLoader(self.aug_distill_trainset, batch_size=batch_size, shuffle=True, num_workers=1)
+                self.aug_distill_testset = SUBGTSRB(mode, aug, False)
+                self.aug_distill_test_loader = torch.utils.data.DataLoader(self.aug_distill_testset, batch_size=batch_size, shuffle=True, num_workers=1)
+
+        else:
+            if mode == 'target':
+                self.target_trainset = SUBGTSRB(mode, aug, True)
+                self.target_train_loader = torch.utils.data.DataLoader(self.target_trainset, batch_size=batch_size, shuffle=True, num_workers=1)
+                self.target_testset = SUBGTSRB(mode, aug, False)
+                self.target_test_loader = torch.utils.data.DataLoader(self.target_testset, batch_size=batch_size, shuffle=True, num_workers=1)
+            elif mode == 'shadow':
+                self.shadow_trainset = SUBGTSRB(mode, aug, True)
+                self.shadow_train_loader = torch.utils.data.DataLoader(self.shadow_trainset, batch_size=batch_size, shuffle=True, num_workers=1)
+                self.shadow_testset = SUBGTSRB(mode, aug, False)
+                self.shadow_test_loader = torch.utils.data.DataLoader(self.shadow_testset, batch_size=batch_size, shuffle=True, num_workers=1)
+            elif 'distill' in mode:
+                self.distill_trainset = SUBGTSRB(mode, aug, True)
+                self.distill_train_loader = torch.utils.data.DataLoader(self.distill_trainset, batch_size=batch_size, shuffle=True, num_workers=1)
+                self.distill_testset = SUBGTSRB(mode, aug, False)
+                self.distill_test_loader = torch.utils.data.DataLoader(self.distill_testset, batch_size=batch_size, shuffle=True, num_workers=1)
+
+class SUBCINIC10(data.Dataset):
+    def __init__(self, mode, aug, train):
+        self.img_size = 32
+        self.num_classes = 10
+        self.mean = [0.47889522, 0.47227842, 0.43047404]
+        self.std = [0.24205776, 0.23828046, 0.25874835]
+        normalize = transforms.Normalize(mean=self.mean, std=self.std)
+        self.augmented = transforms.Compose([transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4),transforms.ToTensor(), normalize])
+
+        self.normalized = transforms.Compose([transforms.ToTensor(), normalize])
+
+        # test set, val_set上是否还需要数据增强?
+        self.aug_trainset =  datasets.ImageFolder(root='/lustre/I/mazhihong/data/CINIC-10/train', transform=self.augmented)
+        self.aug_testset =  datasets.ImageFolder(root='/lustre/I/mazhihong/data/CINIC-10/test', transform=self.augmented)
+        self.aug_validset =  datasets.ImageFolder(root='/lustre/I/mazhihong/data/CINIC-10/valid', transform=self.augmented)
+        self.trainset =  datasets.ImageFolder(root='/lustre/I/mazhihong/data/CINIC-10/train', transform=self.normalized)
+        self.testset =  datasets.ImageFolder(root='/lustre/I/mazhihong/data/CINIC-10/test', transform=self.normalized)
+        self.validset =  datasets.ImageFolder(root='/lustre/I/mazhihong/data/CINIC-10/valid', transform=self.normalized)
+
+        self.aug_dataset = ConcatDataset([self.aug_trainset, self.aug_testset, self.aug_validset])
+        self.dataset = ConcatDataset([self.trainset, self.testset, self.validset])
+        
+        self.aug_target_trainset, self.aug_target_testset, self.aug_shadow_trainset, self.aug_shadow_testset, self.aug_distill_trainset, self.aug_distill_testset = dataset_split(self.aug_dataset, [10000, 10000, 10000, 10000, 220000, 10000])
+        self.target_trainset, self.target_testset, self.shadow_trainset, self.shadow_testset, self.distill_trainset, self.distill_testset = dataset_split(self.dataset, [10000, 10000, 10000, 10000, 220000, 10000])
+
+        if mode == 'target':
+            if aug:
+                if train:
+                    self.dataset = self.aug_target_trainset
+                else:
+                    self.dataset = self.aug_target_testset
+            else:
+                if train:
+                    self.dataset = self.target_trainset
+                else:
+                    self.dataset = self.target_testset
+        elif mode == 'shadow':
+            if aug:
+                if train:
+                    self.dataset = self.aug_shadow_trainset
+                else:
+                    self.dataset = self.aug_shadow_testset
+            else:
+                if train:
+                    self.dataset = self.shadow_trainset
+                else:
+                    self.dataset = self.shadow_testset
+        elif 'distill' in mode:
+            if aug:
+                if train:
+                    self.dataset = self.aug_distill_trainset
+                else:
+                    self.dataset = self.aug_distill_testset
+            else:
+                if train:
+                    self.dataset = self.distill_trainset
+                else:
+                    self.dataset = self.distill_testset
+
+        self.index = range(int(len(self.dataset)))
+
+
+    def __getitem__(self, idx):
+        return self.dataset[idx][0], self.dataset[idx][1], self.index[idx]
+
+    def __len__(self):
+        return len(self.index)
+
+class CINIC10:
+    def __init__(self, mode, aug, batch_size=128, add_trigger=False):
+        self.batch_size = batch_size
+        self.img_size = 32
+        self.num_classes = 10
+
+        if aug:
+            if mode == 'target':
+                self.aug_target_trainset = SUBCINIC10(mode, aug, True)
+                self.aug_target_train_loader = torch.utils.data.DataLoader(self.aug_target_trainset, batch_size=batch_size, shuffle=True, num_workers=2)
+                self.aug_target_testset = SUBCINIC10(mode, aug, False)
+                self.aug_target_test_loader = torch.utils.data.DataLoader(self.aug_target_testset, batch_size=batch_size, shuffle=True, num_workers=2)
+            elif mode == 'shadow':
+                self.aug_shadow_trainset = SUBCINIC10(mode, aug, True)
+                self.aug_shadow_train_loader = torch.utils.data.DataLoader(self.aug_shadow_trainset, batch_size=batch_size, shuffle=True, num_workers=2)
+                self.aug_shadow_testset = SUBCINIC10(mode, aug, False)
+                self.aug_shadow_test_loader = torch.utils.data.DataLoader(self.aug_shadow_testset, batch_size=batch_size, shuffle=True, num_workers=2)
+            elif 'distill' in mode:
+                self.aug_distill_trainset = SUBCINIC10(mode, aug, True)
+                self.aug_distill_train_loader = torch.utils.data.DataLoader(self.aug_distill_trainset, batch_size=batch_size, shuffle=True, num_workers=2)
+                self.aug_distill_testset = SUBCINIC10(mode, aug, False)
+                self.aug_distill_test_loader = torch.utils.data.DataLoader(self.aug_distill_testset, batch_size=batch_size, shuffle=True, num_workers=2)
+
+        else:
+            if mode == 'target':
+                self.target_trainset = SUBCINIC10(mode, aug, True)
+                self.target_train_loader = torch.utils.data.DataLoader(self.target_trainset, batch_size=batch_size, shuffle=True, num_workers=2)
+                self.target_testset = SUBCINIC10(mode, aug, False)
+                self.target_test_loader = torch.utils.data.DataLoader(self.target_testset, batch_size=batch_size, shuffle=True, num_workers=2)
+            elif mode == 'shadow':
+                self.shadow_trainset = SUBCINIC10(mode, aug, True)
+                self.shadow_train_loader = torch.utils.data.DataLoader(self.shadow_trainset, batch_size=batch_size, shuffle=True, num_workers=2)
+                self.shadow_testset = SUBCINIC10(mode, aug, False)
+                self.shadow_test_loader = torch.utils.data.DataLoader(self.shadow_testset, batch_size=batch_size, shuffle=True, num_workers=2)
+            elif 'distill' in mode:
+                self.distill_trainset = SUBCINIC10(mode, aug, True)
+                self.distill_train_loader = torch.utils.data.DataLoader(self.distill_trainset, batch_size=batch_size, shuffle=True, num_workers=2)
+                self.distill_testset = SUBCINIC10(mode, aug, False)
+                self.distill_test_loader = torch.utils.data.DataLoader(self.distill_testset, batch_size=batch_size, shuffle=True, num_workers=2)
+
+class SUBCIFAR10(data.Dataset):
+    def __init__(self, mode, aug, train):
+        self.img_size = 32
+        self.num_classes = 10
+        self.num_test = 10000
+        self.num_train = 50000
+        self.mean = [0.485, 0.456, 0.406]
+        self.std = [0.229, 0.224, 0.225]
+        normalize = transforms.Normalize(mean=self.mean, std=self.std)
+        self.augmented = transforms.Compose([transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4),transforms.ToTensor(), normalize])
+
+        self.normalized = transforms.Compose([transforms.ToTensor(), normalize])
+
+        self.aug_trainset =  datasets.CIFAR10(root='/lustre/I/mazhihong/data/CIFAR10', train=True, download=True, transform=self.augmented)
+        self.aug_testset =  datasets.CIFAR10(root='/lustre/I/mazhihong/data/CIFAR10', train=False, download=True, transform=self.augmented)
+        self.trainset =  datasets.CIFAR10(root='/lustre/I/mazhihong/data/CIFAR10', train=True, download=False, transform=self.normalized)
+        self.testset =  datasets.CIFAR10(root='/lustre/I/mazhihong/data/CIFAR10', train=False, download=False, transform=self.normalized)
+
+        # 合并train，test (在MIA下，原始的train，test划分无意义，需自行重新构建)
+        self.aug_dataset = ConcatDataset([self.aug_trainset, self.aug_testset])
+        self.dataset = ConcatDataset([self.trainset, self.testset])
+        
+        # 切分数据集
+        self.aug_target_trainset, self.aug_target_testset, self.aug_shadow_trainset, self.aug_shadow_testset, self.aug_distill_trainset = dataset_split(self.aug_dataset, [10000, 10000, 10000, 10000, 20000])
+        self.aug_distill_testset = self.aug_shadow_testset
+        self.target_trainset, self.target_testset, self.shadow_trainset, self.shadow_testset, self.distill_trainset = dataset_split(self.dataset, [10000, 10000, 10000, 10000, 20000])
+        self.distill_testset = self.shadow_testset
+
+        if mode == 'target':
+            if aug:
+                if train:
+                    self.dataset = self.aug_target_trainset
+                else:
+                    self.dataset = self.aug_target_testset
+            else:
+                if train:
+                    self.dataset = self.target_trainset
+                else:
+                    self.dataset = self.target_testset
+        elif mode == 'shadow':
+            if aug:
+                if train:
+                    self.dataset = self.aug_shadow_trainset
+                else:
+                    self.dataset = self.aug_shadow_testset
+            else:
+                if train:
+                    self.dataset = self.shadow_trainset
+                else:
+                    self.dataset = self.shadow_testset
+        elif 'distill' in mode:
+            if aug:
+                if train:
+                    self.dataset = self.aug_distill_trainset
+                else:
+                    self.dataset = self.aug_distill_testset
+            else:
+                if train:
+                    self.dataset = self.distill_trainset
+                else:
+                    self.dataset = self.distill_testset
+
+        self.index = range(int(len(self.dataset)))
+
+
+    def __getitem__(self, idx):
+        return self.dataset[idx][0], self.dataset[idx][1], self.index[idx]
+
+    def __len__(self):
+        return len(self.index)
+
+class CIFAR10:
+    def __init__(self, mode, aug, batch_size=128, add_trigger=False):
+        self.batch_size = batch_size
+        self.img_size = 32
+        self.num_classes = 10
+
+        if aug:
+            if mode == 'target':
+                self.aug_target_trainset = SUBCIFAR10(mode, aug, True)
+                self.aug_target_train_loader = torch.utils.data.DataLoader(self.aug_target_trainset, batch_size=batch_size, shuffle=True, num_workers=2)
+                self.aug_target_testset = SUBCIFAR10(mode, aug, False)
+                self.aug_target_test_loader = torch.utils.data.DataLoader(self.aug_target_testset, batch_size=batch_size, shuffle=True, num_workers=2)
+            elif mode == 'shadow':
+                self.aug_shadow_trainset = SUBCIFAR10(mode, aug, True)
+                self.aug_shadow_train_loader = torch.utils.data.DataLoader(self.aug_shadow_trainset, batch_size=batch_size, shuffle=True, num_workers=2)
+                self.aug_shadow_testset = SUBCIFAR10(mode, aug, False)
+                self.aug_shadow_test_loader = torch.utils.data.DataLoader(self.aug_shadow_testset, batch_size=batch_size, shuffle=True, num_workers=2)
+            elif 'distill' in mode:
+                self.aug_distill_trainset = SUBCIFAR10(mode, aug, True)
+                self.aug_distill_train_loader = torch.utils.data.DataLoader(self.aug_distill_trainset, batch_size=batch_size, shuffle=True, num_workers=2)
+                self.aug_distill_testset = SUBCIFAR10(mode, aug, False)
+                self.aug_distill_test_loader = torch.utils.data.DataLoader(self.aug_distill_testset, batch_size=batch_size, shuffle=True, num_workers=2)
+
+        else:
+            if mode == 'target':
+                self.target_trainset = SUBCIFAR10(mode, aug, True)
+                self.target_train_loader = torch.utils.data.DataLoader(self.target_trainset, batch_size=batch_size, shuffle=True, num_workers=2)
+                self.target_testset = SUBCIFAR10(mode, aug, False)
+                self.target_test_loader = torch.utils.data.DataLoader(self.target_testset, batch_size=batch_size, shuffle=True, num_workers=2)
+            elif mode == 'shadow':
+                self.shadow_trainset = SUBCIFAR10(mode, aug, True)
+                self.shadow_train_loader = torch.utils.data.DataLoader(self.shadow_trainset, batch_size=batch_size, shuffle=True, num_workers=2)
+                self.shadow_testset = SUBCIFAR10(mode, aug, False)
+                self.shadow_test_loader = torch.utils.data.DataLoader(self.shadow_testset, batch_size=batch_size, shuffle=True, num_workers=2)
+            elif 'distill' in mode:
+                self.distill_trainset = SUBCIFAR10(mode, aug, True)
+                self.distill_train_loader = torch.utils.data.DataLoader(self.distill_trainset, batch_size=batch_size, shuffle=True, num_workers=2)
+                self.distill_testset = SUBCIFAR10(mode, aug, False)
+                self.distill_test_loader = torch.utils.data.DataLoader(self.distill_testset, batch_size=batch_size, shuffle=True, num_workers=2)
+            
+class SUBCIFAR100(data.Dataset):
+    def __init__(self, mode, aug, train):
+        self.img_size = 32
+        self.num_classes = 100
+        self.num_test = 10000
+        self.num_train = 50000
+        self.mean=[0.507, 0.487, 0.441]
+        self.std=[0.267, 0.256, 0.276]
+        normalize = transforms.Normalize(mean=self.mean, std=self.std)
+        self.augmented = transforms.Compose([transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4),transforms.ToTensor(), normalize])
+        self.normalized = transforms.Compose([transforms.ToTensor(), normalize])
+
+        self.aug_trainset =  datasets.CIFAR100(root='/lustre/I/mazhihong/data/CIFAR100', train=True, download=True, transform=self.augmented)
+        self.aug_testset =  datasets.CIFAR100(root='/lustre/I/mazhihong/data/CIFAR100', train=False, download=True, transform=self.augmented)
+        self.trainset =  datasets.CIFAR100(root='/lustre/I/mazhihong/data/CIFAR100', train=True, download=True, transform=self.normalized)
+        self.testset =  datasets.CIFAR100(root='/lustre/I/mazhihong/data/CIFAR100', train=False, download=True, transform=self.normalized)
+
+        self.aug_dataset = ConcatDataset([self.aug_trainset, self.aug_testset])
+        self.dataset = ConcatDataset([self.trainset, self.testset])
+        
+        self.aug_target_trainset, self.aug_target_testset, self.aug_shadow_trainset, self.aug_shadow_testset, self.aug_distill_trainset = dataset_split(self.aug_dataset, [10000, 10000, 10000, 10000, 20000])
+        self.aug_distill_testset = self.aug_shadow_testset
+        self.target_trainset, self.target_testset, self.shadow_trainset, self.shadow_testset, self.distill_trainset = dataset_split(self.dataset, [10000, 10000, 10000, 10000, 20000])
+        self.distill_testset = self.shadow_testset
+
+        if mode == 'target':
+            if aug:
+                if train:
+                    self.dataset = self.aug_target_trainset
+                else:
+                    self.dataset = self.aug_target_testset
+            else:
+                if train:
+                    self.dataset = self.target_trainset
+                else:
+                    self.dataset = self.target_testset
+        elif mode == 'shadow':
+            if aug:
+                if train:
+                    self.dataset = self.aug_shadow_trainset
+                else:
+                    self.dataset = self.aug_shadow_testset
+            else:
+                if train:
+                    self.dataset = self.shadow_trainset
+                else:
+                    self.dataset = self.shadow_testset
+        elif 'distill' in mode:
+            if aug:
+                if train:
+                    self.dataset = self.aug_distill_trainset
+                else:
+                    self.dataset = self.aug_distill_testset
+            else:
+                if train:
+                    self.dataset = self.distill_trainset
+                else:
+                    self.dataset = self.distill_testset
+
+        self.index = range(int(len(self.dataset)))
+
+    def __getitem__(self, idx):
+        return self.dataset[idx][0], self.dataset[idx][1], self.index[idx]
+
+    def __len__(self):
+        return len(self.index)
+
+class CIFAR100:
+    def __init__(self, mode, aug, batch_size=128):
+        self.batch_size = batch_size
+        self.img_size = 32
+        self.num_classes = 100
+
+        if aug:
+            if mode == 'target':
+                self.aug_target_trainset = SUBCIFAR100(mode, aug, True)
+                self.aug_target_train_loader = torch.utils.data.DataLoader(self.aug_target_trainset, batch_size=batch_size, shuffle=True, num_workers=1)
+                self.aug_target_testset = SUBCIFAR100(mode, aug, False)
+                self.aug_target_test_loader = torch.utils.data.DataLoader(self.aug_target_testset, batch_size=batch_size, shuffle=True, num_workers=1)
+            elif mode == 'shadow':
+                self.aug_shadow_trainset = SUBCIFAR100(mode, aug, True)
+                self.aug_shadow_train_loader = torch.utils.data.DataLoader(self.aug_shadow_trainset, batch_size=batch_size, shuffle=True, num_workers=1)
+                self.aug_shadow_testset = SUBCIFAR100(mode, aug, False)
+                self.aug_shadow_test_loader = torch.utils.data.DataLoader(self.aug_shadow_testset, batch_size=batch_size, shuffle=True, num_workers=1)
+            elif 'distill' in mode:
+                self.aug_distill_trainset = SUBCIFAR100(mode, aug, True)
+                self.aug_distill_train_loader = torch.utils.data.DataLoader(self.aug_distill_trainset, batch_size=batch_size, shuffle=True, num_workers=1)
+                self.aug_distill_testset = SUBCIFAR100(mode, aug, False)
+                self.aug_distill_test_loader = torch.utils.data.DataLoader(self.aug_distill_testset, batch_size=batch_size, shuffle=True, num_workers=1)
+
+        else:
+            if mode == 'target':
+                self.target_trainset = SUBCIFAR100(mode, aug, True)
+                self.target_train_loader = torch.utils.data.DataLoader(self.target_trainset, batch_size=batch_size, shuffle=True, num_workers=1)
+                self.target_testset = SUBCIFAR100(mode, aug, False)
+                self.target_test_loader = torch.utils.data.DataLoader(self.target_testset, batch_size=batch_size, shuffle=True, num_workers=1)
+            elif mode == 'shadow':
+                self.shadow_trainset = SUBCIFAR100(mode, aug, True)
+                self.shadow_train_loader = torch.utils.data.DataLoader(self.shadow_trainset, batch_size=batch_size, shuffle=True, num_workers=1)
+                self.shadow_testset = SUBCIFAR100(mode, aug, False)
+                self.shadow_test_loader = torch.utils.data.DataLoader(self.shadow_testset, batch_size=batch_size, shuffle=True, num_workers=1)
+            elif 'distill' in mode:
+                self.distill_trainset = SUBCIFAR100(mode, aug, True)
+                self.distill_train_loader = torch.utils.data.DataLoader(self.distill_trainset, batch_size=batch_size, shuffle=True, num_workers=1)
+                self.distill_testset = SUBCIFAR100(mode, aug, False)
+                self.distill_test_loader = torch.utils.data.DataLoader(self.distill_testset, batch_size=batch_size, shuffle=True, num_workers=1)
+
+class AverageMeter(object):
+    """Computes and stores the average and current value"""
+    def __init__(self):
+        self.reset()
+
+    def reset(self):
+        self.val = 0
+        self.avg = 0
+        self.sum = 0
+        self.count = 0
+
+    def update(self, val, n=1):
+        self.val = val
+        self.sum += val * n
+        self.count += n
+        self.avg = self.sum / self.count
+
+def accuracy(output, target, topk=(1,)):
+    """Computes the precision@k for the specified values of k"""
+    with torch.no_grad():
+        maxk = max(topk)
+        batch_size = target.size(0)
+
+        _, pred = output.topk(maxk, 1, True, True)
+        pred = pred.t()
+        correct = pred.eq(target.view(1, -1).expand_as(pred))
+
+        res = []
+        for k in topk:
+            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
+            res.append(correct_k.mul_(100.0 / batch_size))
+    return res
\ No newline at end of file
--- a/mzh/new_mzh/Loss_Trajectory_MIA/fig/error.png
+++ b/mzh/new_mzh/Loss_Trajectory_MIA/fig/error.png
--- a/mzh/new_mzh/Loss_Trajectory_MIA/fig/p1.png
+++ b/mzh/new_mzh/Loss_Trajectory_MIA/fig/p1.png
--- a/mzh/new_mzh/Loss_Trajectory_MIA/function.py
+++ b/mzh/new_mzh/Loss_Trajectory_MIA/function.py
+from torch.autograd import Function
+
+
+class FakeQuantize(Function):
+
+    @staticmethod
+    def forward(ctx, x, qparam):
+        x = qparam.quantize_tensor(x)
+        x = qparam.dequantize_tensor(x)
+        return x
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        return grad_output, None
--- a/mzh/new_mzh/Loss_Trajectory_MIA/global_var.py
+++ b/mzh/new_mzh/Loss_Trajectory_MIA/global_var.py
+class GlobalVariables:
+     SELF_INPLANES = 0
--- a/mzh/new_mzh/Loss_Trajectory_MIA/gol.py
+++ b/mzh/new_mzh/Loss_Trajectory_MIA/gol.py
+# -*- coding: utf-8 -*-
+
+# 用于多个module之间共享全局变量
+def _init():  # 初始化
+    global _global_dict
+    _global_dict = {}
+ 
+def set_value(value,is_bias=False):
+    # 定义一个全局变量
+    if is_bias:
+        _global_dict[0] = value
+    else:
+        _global_dict[1] = value
+ 
+ 
+def get_value(is_bias=False): # 给bias独立于各变量外的精度
+    if is_bias:
+        return _global_dict[0]
+    else:
+        return _global_dict[1]  
+
--- a/mzh/new_mzh/Loss_Trajectory_MIA/main.py
+++ b/mzh/new_mzh/Loss_Trajectory_MIA/main.py
+import os
+import argparse
+import utils
+import normal
+import MIA
+
+def train_networks(args):
+    device = utils.get_pytorch_device()
+    utils.create_path('./outputs')
+    if 'distill' in args.mode:
+        model_path_tar = 'networks/{}/{}'.format(0, args.mode.split('_')[-1])
+        utils.create_path(model_path_tar)
+        model_path_dis = 'networks/{}/{}'.format(args.seed, args.mode)
+        utils.create_path(model_path_dis)
+    else:
+        model_path_tar = 'networks/{}/{}'.format(args.seed, args.mode)
+        utils.create_path(model_path_tar)
+        model_path_dis = None
+
+    utils.set_logger('outputs/train_models'.format(args.seed))
+    normal.train_models(args, model_path_tar, model_path_dis, device)
+
+def membership_inference_attack(args):
+    print(f'--------------{args.mia_type}-------------')
+
+    device = utils.get_pytorch_device()
+
+    if args.mia_type == 'build-dataset':
+        models_path = 'networks/{}'.format(0)
+        MIA.build_trajectory_membership_dataset(args, models_path, device)
+
+    if args.mia_type == 'black-box':
+        trained_models_path = 'networks/{}'.format(args.seed)
+        MIA.trajectory_black_box_membership_inference_attack(args, trained_models_path, device)
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='TrajectoryMIA')
+    parser.add_argument('--action', type=int, default=0, help=[0, 1])
+    parser.add_argument('--seed', type=int, default=0)
+    parser.add_argument('--mode', type=str, default='target', help=['target', 'shadow', 'distill_target', 'distill_shadow'])
+    parser.add_argument('--model', type=str, default='resnet18', help=['resnet18','resnet50','resnet152','mobilenetv2'])
+    parser.add_argument('--data', type=str, default='cifar10', help=['cinic10', 'cifar10', 'cifar100', 'gtsrb'])
+    parser.add_argument('--epochs', type=int, default=100)
+    parser.add_argument('--model_distill', type=str, default='resnet18',help=['resnet18','resnet50','resnet152','mobilenetv2'])
+    parser.add_argument('--epochs_distill', type=int, default=100)
+    parser.add_argument('--mia_type', type=str, help=['build-dataset', 'black-box'])
+
+    args = parser.parse_args()
+    utils.set_random_seeds(args.seed)
+    print('random seed:{}'.format(args.seed))
+
+    if args.action == 0:
+        train_networks(args)
+
+    elif args.action == 1:
+        membership_inference_attack(args)
+    
--- a/mzh/new_mzh/Loss_Trajectory_MIA/module.py
+++ b/mzh/new_mzh/Loss_Trajectory_MIA/module.py
+import math
+import numpy as np
+import gol
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.autograd import Variable
+
+from function import FakeQuantize
+
+# 获取最近的量化值
+# def get_nearest_val(quant_type,x,is_bias=False):
+#     if quant_type=='INT':
+#         return x.round_()
+    
+#     plist = gol.get_value(is_bias)
+#     # print('get')
+#     # print(plist)
+#     # x = x / 64
+#     shape = x.shape
+#     xhard = x.view(-1)
+#     plist = plist.type_as(x)
+#     # 取最近幂次作为索引
+#     idx = (xhard.unsqueeze(0) - plist.unsqueeze(1)).abs().min(dim=0)[1]
+#     xhard = plist[idx].view(shape)
+#     xout = (xhard - x).detach() + x
+#     # xout = xout * 64
+#     return xout
+
+
+def get_nearest_val(quant_type, x, is_bias=False, block_size=1000000):
+    if quant_type == 'INT':
+        return x.round_()
+
+    plist = gol.get_value(is_bias)
+    shape = x.shape
+    xhard = x.view(-1)
+    xout = torch.zeros_like(xhard)
+
+    plist = plist.type_as(x)
+    n_blocks = (x.numel() + block_size - 1) // block_size
+
+    for i in range(n_blocks):
+        start_idx = i * block_size
+        end_idx = min(start_idx + block_size, xhard.numel())
+        block_size_i = end_idx - start_idx
+        # print(x.numel())
+        # print(block_size_i)
+        # print(start_idx)
+        # print(end_idx)
+        xblock = xhard[start_idx:end_idx]
+        # xblock = xblock.view(shape[start_idx:end_idx])
+        plist_block = plist.unsqueeze(1) #.expand(-1, block_size_i)
+        idx = (xblock.unsqueeze(0) - plist_block).abs().min(dim=0)[1]
+        # print(xblock.shape)
+        xhard_block = plist[idx].view(xblock.shape)
+        xout[start_idx:end_idx] = (xhard_block - xblock).detach() + xblock
+
+    xout = xout.view(shape)
+    return xout
+
+
+# 采用对称有符号量化时，获取量化范围最大值
+def get_qmax(quant_type,num_bits=None, e_bits=None):
+    if quant_type == 'INT':
+        qmax = 2. ** (num_bits - 1) - 1
+    elif quant_type == 'POT':
+        qmax = 1
+    else: #FLOAT
+        m_bits = num_bits - 1 - e_bits
+        dist_m = 2 ** (-m_bits)
+        e = 2 ** (e_bits - 1)
+        expo = 2 ** e
+        m = 2 ** m_bits -1
+        frac = 1. + m * dist_m
+        qmax = frac * expo
+    return qmax
+
+# 都采用有符号量化，zeropoint都置为0
+def calcScaleZeroPoint(min_val, max_val, qmax):
+    scale = torch.max(max_val.abs(),min_val.abs()) / qmax
+    zero_point = torch.tensor(0.)
+    return scale, zero_point
+
+# 将输入进行量化，输入输出都为tensor
+def quantize_tensor(quant_type, x, scale, zero_point, qmax, is_bias=False):
+    # 量化后范围，直接根据位宽确定
+    qmin = -qmax
+    q_x = zero_point + x / scale
+
+    q_x.clamp_(qmin, qmax)
+    q_x = get_nearest_val(quant_type, q_x, is_bias)
+    return q_x
+
+# bias使用不同精度，需要根据量化类型指定num_bits/e_bits
+def bias_qmax(quant_type):
+    if quant_type == 'INT':
+        return get_qmax(quant_type, 64)
+    elif quant_type == 'POT':
+        return get_qmax(quant_type)
+    else:
+        return get_qmax(quant_type, 16, 7)
+        
+
+# 转化为FP32，不需再做限制
+def dequantize_tensor(q_x, scale, zero_point):
+    return scale * (q_x - zero_point)
+
+
+class QParam(nn.Module):
+
+    def __init__(self,quant_type, num_bits=8, e_bits=3):
+        super(QParam, self).__init__()
+        self.quant_type = quant_type
+        self.num_bits = num_bits
+        self.e_bits = e_bits
+        self.qmax = get_qmax(quant_type, num_bits, e_bits)
+
+        scale = torch.tensor([], requires_grad=False)
+        zero_point = torch.tensor([], requires_grad=False)
+        min = torch.tensor([], requires_grad=False)
+        max = torch.tensor([], requires_grad=False)
+        # 通过注册为register，使得buffer可以被记录到state_dict
+        self.register_buffer('scale', scale)
+        self.register_buffer('zero_point', zero_point)
+        self.register_buffer('min', min)
+        self.register_buffer('max', max)
+
+    # 更新统计范围及量化参数
+    def update(self, tensor):
+        if self.max.nelement() == 0 or self.max.data < tensor.max().data:
+            self.max.data = tensor.max().data
+        self.max.clamp_(min=0)
+
+        if self.min.nelement() == 0 or self.min.data > tensor.min().data:
+            self.min.data = tensor.min().data
+        self.min.clamp_(max=0)
+
+        self.scale, self.zero_point = calcScaleZeroPoint(self.min, self.max, self.qmax)
+
+    def quantize_tensor(self, tensor):
+        return quantize_tensor(self.quant_type, tensor, self.scale, self.zero_point, self.qmax)
+
+    def dequantize_tensor(self, q_x):
+        return dequantize_tensor(q_x, self.scale, self.zero_point)
+
+    # 该方法保证了可以从state_dict里恢复
+    def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys,
+                              error_msgs):
+        key_names = ['scale', 'zero_point', 'min', 'max']
+        for key in key_names:
+            value = getattr(self, key)
+            value.data = state_dict[prefix + key].data
+            state_dict.pop(prefix + key)
+
+    # 该方法返回值将是打印该对象的结果
+    def __str__(self):
+        info = 'scale: %.10f ' % self.scale
+        info += 'zp: %.6f ' % self.zero_point
+        info += 'min: %.6f ' % self.min
+        info += 'max: %.6f' % self.max
+        return info
+
+
+# 作为具体量化层的父类，qi和qo分别为量化输入/输出
+class QModule(nn.Module):
+
+    def __init__(self,quant_type, qi=True, qo=True, num_bits=8, e_bits=3):
+        super(QModule, self).__init__()
+        if qi:
+            self.qi = QParam(quant_type,num_bits, e_bits)
+        if qo:
+            self.qo = QParam(quant_type,num_bits, e_bits)
+        self.quant_type = quant_type
+        self.num_bits = num_bits
+        self.e_bits = e_bits
+        self.bias_qmax = bias_qmax(quant_type)
+
+    def freeze(self):
+        pass  # 空语句
+    
+    def fakefreeze(self):
+        pass
+
+    def quantize_inference(self, x):
+        raise NotImplementedError('quantize_inference should be implemented.')
+
+
+"""
+QModule  量化卷积
+
+:quant_type: 量化类型
+:conv_module: 卷积模块
+:qi: 是否量化输入特征图
+:qo: 是否量化输出特征图
+:num_bits: 8位bit数
+"""
+
+
+class QConv2d(QModule):
+
+    def __init__(self, quant_type, conv_module, qi=True, qo=True, num_bits=8, e_bits=3):
+        super(QConv2d, self).__init__(quant_type, qi, qo, num_bits, e_bits)
+        self.conv_module = conv_module
+        self.qw = QParam(quant_type, num_bits,e_bits)
+        self.register_buffer('M', torch.tensor([], requires_grad=False))  # 将M注册为buffer
+
+    # freeze方法可以固定真量化的权重参数，并将该值更新到原全精度层上，便于散度计算
+    def freeze(self, qi=None, qo=None):
+
+        if hasattr(self, 'qi') and qi is not None:
+            raise ValueError('qi has been provided in init function.')
+        if not hasattr(self, 'qi') and qi is None:
+            raise ValueError('qi is not existed, should be provided.')
+
+        if hasattr(self, 'qo') and qo is not None:
+            raise ValueError('qo has been provided in init function.')
+        if not hasattr(self, 'qo') and qo is None:
+            raise ValueError('qo is not existed, should be provided.')
+
+        # 这里因为在池化或者激活的输入，不需要对最大值和最小是进行额外的统计，会共享相同的输出
+        if qi is not None:
+            self.qi = qi
+        if qo is not None:
+            self.qo = qo
+
+        # 根据https://zhuanlan.zhihu.com/p/156835141, 这是式3 的系数
+        self.M.data = (self.qw.scale * self.qi.scale / self.qo.scale).data
+
+        self.conv_module.weight.data = self.qw.quantize_tensor(self.conv_module.weight.data)
+        self.conv_module.weight.data = self.conv_module.weight.data - self.qw.zero_point
+
+        self.conv_module.bias.data = quantize_tensor(self.quant_type,
+                                                    self.conv_module.bias.data, scale=self.qi.scale * self.qw.scale,
+                                                    zero_point=0.,qmax=self.bias_qmax, is_bias=True)
+        
+    def fakefreeze(self):
+        self.conv_module.weight.data = self.qw.dequantize_tensor(self.conv_module.weight.data)
+        self.conv_module.bias.data = dequantize_tensor(self.conv_module.bias.data,scale=self.qi.scale * self.qw.scale, zero_point=0.)
+        
+    
+    def forward(self, x):  # 前向传播,输入张量,x为浮点型数据
+        if hasattr(self, 'qi'):
+            self.qi.update(x)
+            x = FakeQuantize.apply(x, self.qi)  # 对输入张量X完成量化
+
+        # foward前更新qw，保证量化weight时候scale正确
+        self.qw.update(self.conv_module.weight.data)
+        # 注意:此处主要为了统计各层x和weight范围，未对bias进行量化操作
+        # tmp_wgt = FakeQuantize.apply(self.conv_module.weight, self.qw)
+        # x = F.conv2d(x, tmp_wgt, self.conv_module.bias,
+        #              stride=self.conv_module.stride,
+        #              padding=self.conv_module.padding, dilation=self.conv_module.dilation,
+        #              groups=self.conv_module.groups)
+        
+        x = F.conv2d(x, FakeQuantize.apply(self.conv_module.weight, self.qw), self.conv_module.bias,
+                     stride=self.conv_module.stride,
+                     padding=self.conv_module.padding, dilation=self.conv_module.dilation,
+                     groups=self.conv_module.groups)
+
+        if hasattr(self, 'qo'):
+            self.qo.update(x)
+            x = FakeQuantize.apply(x, self.qo)
+
+        return x
+
+    # 利用公式 q_a = M(\sigma(q_w-Z_w)(q_x-Z_x) + q_b)
+    def quantize_inference(self, x):  # 此处input为已经量化的qx
+        x = x - self.qi.zero_point
+        x = self.conv_module(x)
+        x = self.M * x
+
+        # if self.quant_type is 'INT':
+        x = get_nearest_val(self.quant_type,x)
+
+        x = x + self.qo.zero_point
+        return x
+
+
+class QLinear(QModule):
+
+    def __init__(self, quant_type, fc_module, qi=True, qo=True, num_bits=8, e_bits=3):
+        super(QLinear, self).__init__(quant_type, qi, qo, num_bits, e_bits)
+        self.fc_module = fc_module
+        self.qw = QParam(quant_type, num_bits, e_bits)
+        self.register_buffer('M', torch.tensor([], requires_grad=False))  # 将M注册为buffer
+
+    def freeze(self, qi=None, qo=None):
+
+        if hasattr(self, 'qi') and qi is not None:
+            raise ValueError('qi has been provided in init function.')
+        if not hasattr(self, 'qi') and qi is None:
+            raise ValueError('qi is not existed, should be provided.')
+
+        if hasattr(self, 'qo') and qo is not None:
+            raise ValueError('qo has been provided in init function.')
+        if not hasattr(self, 'qo') and qo is None:
+            raise ValueError('qo is not existed, should be provided.')
+
+        if qi is not None:
+            self.qi = qi
+        if qo is not None:
+            self.qo = qo
+        
+        self.M.data = (self.qw.scale * self.qi.scale / self.qo.scale).data
+
+        self.fc_module.weight.data = self.qw.quantize_tensor(self.fc_module.weight.data)
+        self.fc_module.weight.data = self.fc_module.weight.data - self.qw.zero_point
+        self.fc_module.bias.data = quantize_tensor(self.quant_type,
+                                                    self.fc_module.bias.data, scale=self.qi.scale * self.qw.scale,
+                                                    zero_point=0., qmax=self.bias_qmax, is_bias=True)
+
+    def fakefreeze(self):
+        self.fc_module.weight.data = self.qw.dequantize_tensor(self.fc_module.weight.data)
+        self.fc_module.bias.data = dequantize_tensor(self.fc_module.bias.data, scale=self.qi.scale * self.qw.scale, zero_point=0.)
+        
+    
+    
+    def forward(self, x):
+        if hasattr(self, 'qi'):
+            self.qi.update(x)
+            x = FakeQuantize.apply(x, self.qi)
+
+        self.qw.update(self.fc_module.weight.data)
+        # tmp_wgt = FakeQuantize.apply(self.fc_module.weight, self.qw)
+        # x = F.linear(x, tmp_wgt, self.fc_module.bias)
+        x = F.linear(x, FakeQuantize.apply(self.fc_module.weight, self.qw), self.fc_module.bias)
+
+        if hasattr(self, 'qo'):
+            self.qo.update(x)
+            x = FakeQuantize.apply(x, self.qo)
+
+        return x
+
+    def quantize_inference(self, x):
+        x = x - self.qi.zero_point
+        x = self.fc_module(x)
+        x = self.M * x
+
+        # if self.quant_type is 'INT':
+        x = get_nearest_val(self.quant_type,x)
+
+       
+        x = x + self.qo.zero_point
+
+        return x
+
+
+class QReLU(QModule):
+
+    def __init__(self,quant_type, qi=False, qo=True, num_bits=8, e_bits=3):
+        super(QReLU, self).__init__(quant_type, qi, qo, num_bits, e_bits)
+
+    def freeze(self, qi=None):
+
+        if hasattr(self, 'qi') and qi is not None:
+            raise ValueError('qi has been provided in init function.')
+        if not hasattr(self, 'qi') and qi is None:
+            raise ValueError('qi is not existed, should be provided.')
+
+        if qi is not None:
+            self.qi = qi
+
+    def forward(self, x):
+        if hasattr(self, 'qi'):
+            self.qi.update(x)
+            x = FakeQuantize.apply(x, self.qi)
+
+        x = F.relu(x)
+
+        return x
+
+    def quantize_inference(self, x):
+        x = x.clone()
+        # x[x < self.qi.zero_point] = self.qi.zero_point
+        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        a = self.qi.zero_point.float().to(device)
+        x[x < a] = a
+        return x
+
+
+class QMaxPooling2d(QModule):
+
+    def __init__(self, quant_type, kernel_size=3, stride=1, padding=0, qi=False, qo=True, num_bits=8,e_bits=3):
+        super(QMaxPooling2d, self).__init__(quant_type, qi, qo, num_bits, e_bits)
+        self.kernel_size = kernel_size
+        self.stride = stride
+        self.padding = padding
+        
+    def freeze(self, qi=None):
+        if hasattr(self, 'qi') and qi is not None:
+            raise ValueError('qi has been provided in init function.')
+        if not hasattr(self, 'qi') and qi is None:
+            raise ValueError('qi is not existed, should be provided.')
+        if qi is not None:
+            self.qi = qi
+
+    def forward(self, x):
+        if hasattr(self, 'qi'):
+            self.qi.update(x)
+            x = FakeQuantize.apply(x, self.qi)
+
+        x = F.max_pool2d(x, self.kernel_size, self.stride, self.padding)
+
+        return x
+
+    def quantize_inference(self, x):
+        return F.max_pool2d(x, self.kernel_size, self.stride, self.padding)
+
+class QConvBNReLU(QModule):
+
+    def __init__(self, quant_type, conv_module, bn_module, qi=True, qo=True, num_bits=8, e_bits=3):
+        super(QConvBNReLU, self).__init__(quant_type, qi, qo, num_bits, e_bits)
+        self.conv_module = conv_module
+        self.bn_module = bn_module
+        self.qw = QParam(quant_type, num_bits,e_bits)
+        self.register_buffer('M', torch.tensor([], requires_grad=False))  # 将M注册为buffer
+
+    def fold_bn(self, mean, std):
+        if self.bn_module.affine:
+            gamma_ = self.bn_module.weight / std
+            weight = self.conv_module.weight * gamma_.view(self.conv_module.out_channels, 1, 1, 1)
+            if self.conv_module.bias is not None:
+                bias = gamma_ * self.conv_module.bias - gamma_ * mean + self.bn_module.bias
+            else:
+                bias = self.bn_module.bias - gamma_ * mean
+        else:
+            gamma_ = 1 / std
+            weight = self.conv_module.weight * gamma_
+            if self.conv_module.bias is not None:
+                bias = gamma_ * self.conv_module.bias - gamma_ * mean
+            else:
+                bias = -gamma_ * mean
+            
+        return weight, bias
+
+    def freeze(self, qi=None, qo=None):
+        if hasattr(self, 'qi') and qi is not None:
+            raise ValueError('qi has been provided in init function.')
+        if not hasattr(self, 'qi') and qi is None:
+            raise ValueError('qi is not existed, should be provided.')
+
+        if hasattr(self, 'qo') and qo is not None:
+            raise ValueError('qo has been provided in init function.')
+        if not hasattr(self, 'qo') and qo is None:
+            raise ValueError('qo is not existed, should be provided.')
+
+        if qi is not None:
+            self.qi = qi
+        if qo is not None:
+            self.qo = qo
+
+        self.M.data = (self.qw.scale * self.qi.scale / self.qo.scale).data
+
+        std = torch.sqrt(self.bn_module.running_var + self.bn_module.eps)
+
+        weight, bias = self.fold_bn(self.bn_module.running_mean, std)
+
+        self.conv_module.weight.data = self.qw.quantize_tensor(weight.data)
+        self.conv_module.weight.data = self.conv_module.weight.data - self.qw.zero_point
+
+        if self.conv_module.bias is None:
+            self.conv_module.bias = nn.Parameter(quantize_tensor(self.quant_type,
+                                                     bias, scale=self.qi.scale * self.qw.scale,
+                                                     zero_point=0., qmax=self.bias_qmax,is_bias=True))
+        else:
+            self.conv_module.bias.data = quantize_tensor(self.quant_type,
+                                                     bias, scale=self.qi.scale * self.qw.scale,
+                                                     zero_point=0., qmax=self.bias_qmax,is_bias=True)
+        
+
+    def fakefreeze(self):
+        self.conv_module.weight.data = self.qw.dequantize_tensor(self.conv_module.weight.data)
+        self.conv_module.bias.data = dequantize_tensor(self.conv_module.bias.data,scale=self.qi.scale * self.qw.scale, zero_point=0.)
+        
+    
+    def forward(self, x):
+
+        if hasattr(self, 'qi'):
+            self.qi.update(x)
+            x = FakeQuantize.apply(x, self.qi)
+
+        if self.training:
+            y = F.conv2d(x, self.conv_module.weight, self.conv_module.bias, 
+                            stride=self.conv_module.stride,
+                            padding=self.conv_module.padding,
+                            dilation=self.conv_module.dilation,
+                            groups=self.conv_module.groups)
+            y = y.permute(1, 0, 2, 3) # NCHW -> CNHW
+            y = y.contiguous().view(self.conv_module.out_channels, -1) # CNHW -> C,NHW
+            # mean = y.mean(1)
+            # var = y.var(1)
+            mean = y.mean(1).detach()
+            var = y.var(1).detach()
+            self.bn_module.running_mean = \
+                (1 - self.bn_module.momentum) * self.bn_module.running_mean + \
+                self.bn_module.momentum * mean
+            self.bn_module.running_var = \
+                (1 - self.bn_module.momentum) * self.bn_module.running_var + \
+                self.bn_module.momentum * var
+        else:
+            mean = Variable(self.bn_module.running_mean)
+            var = Variable(self.bn_module.running_var)
+
+        std = torch.sqrt(var + self.bn_module.eps)
+
+        weight, bias = self.fold_bn(mean, std)
+
+        self.qw.update(weight.data)
+
+        x = F.conv2d(x, FakeQuantize.apply(weight, self.qw), bias, 
+                stride=self.conv_module.stride,
+                padding=self.conv_module.padding, dilation=self.conv_module.dilation, 
+                groups=self.conv_module.groups)
+
+        x = F.relu(x)
+
+        if hasattr(self, 'qo'):
+            self.qo.update(x)
+            x = FakeQuantize.apply(x, self.qo)
+
+        return x
+    
+    def quantize_inference(self, x):
+        x = x - self.qi.zero_point
+        x = self.conv_module(x)
+        x = self.M * x
+
+        # if self.quant_type is 'INT':
+        x = get_nearest_val(self.quant_type,x)
+
+        
+        x = x + self.qo.zero_point        
+        x.clamp_(min=0)
+        return x
+        
+
+
+class QConvBN(QModule):
+
+    def __init__(self, quant_type, conv_module, bn_module, qi=True, qo=True, num_bits=8, e_bits=3):
+        super(QConvBN, self).__init__(quant_type, qi, qo, num_bits, e_bits)
+        self.conv_module = conv_module
+        self.bn_module = bn_module
+        self.qw = QParam(quant_type, num_bits,e_bits)
+        self.register_buffer('M', torch.tensor([], requires_grad=False))  # 将M注册为buffer
+
+    def fold_bn(self, mean, std):
+        if self.bn_module.affine:
+            gamma_ = self.bn_module.weight / std
+            weight = self.conv_module.weight * gamma_.view(self.conv_module.out_channels, 1, 1, 1)
+            if self.conv_module.bias is not None:
+                bias = gamma_ * self.conv_module.bias - gamma_ * mean + self.bn_module.bias
+            else:
+                bias = self.bn_module.bias - gamma_ * mean
+        else:
+            gamma_ = 1 / std
+            weight = self.conv_module.weight * gamma_
+            if self.conv_module.bias is not None:
+                bias = gamma_ * self.conv_module.bias - gamma_ * mean
+            else:
+                bias = -gamma_ * mean
+            
+        return weight, bias
+
+    def freeze(self, qi=None, qo=None):
+        if hasattr(self, 'qi') and qi is not None:
+            raise ValueError('qi has been provided in init function.')
+        if not hasattr(self, 'qi') and qi is None:
+            raise ValueError('qi is not existed, should be provided.')
+
+        if hasattr(self, 'qo') and qo is not None:
+            raise ValueError('qo has been provided in init function.')
+        if not hasattr(self, 'qo') and qo is None:
+            raise ValueError('qo is not existed, should be provided.')
+
+        if qi is not None:
+            self.qi = qi
+        if qo is not None:
+            self.qo = qo
+
+        self.M.data = (self.qw.scale * self.qi.scale / self.qo.scale).data
+
+        std = torch.sqrt(self.bn_module.running_var + self.bn_module.eps)
+
+        weight, bias = self.fold_bn(self.bn_module.running_mean, std)
+
+        self.conv_module.weight.data = self.qw.quantize_tensor(weight.data)
+        self.conv_module.weight.data = self.conv_module.weight.data - self.qw.zero_point
+
+        if self.conv_module.bias is None:
+            self.conv_module.bias = nn.Parameter(quantize_tensor(self.quant_type,
+                                                     bias, scale=self.qi.scale * self.qw.scale,
+                                                     zero_point=0., qmax=self.bias_qmax,is_bias=True))
+        else:
+            self.conv_module.bias.data = quantize_tensor(self.quant_type,
+                                                     bias, scale=self.qi.scale * self.qw.scale,
+                                                     zero_point=0., qmax=self.bias_qmax,is_bias=True)
+
+    def fakefreeze(self):
+        self.conv_module.weight.data = self.qw.dequantize_tensor(self.conv_module.weight.data)
+        self.conv_module.bias.data = dequantize_tensor(self.conv_module.bias.data,scale=self.qi.scale * self.qw.scale, zero_point=0.)
+    
+    def forward(self, x):
+
+        if hasattr(self, 'qi'):
+            self.qi.update(x)
+            x = FakeQuantize.apply(x, self.qi)
+
+        if self.training:
+            y = F.conv2d(x, self.conv_module.weight, self.conv_module.bias, 
+                            stride=self.conv_module.stride,
+                            padding=self.conv_module.padding,
+                            dilation=self.conv_module.dilation,
+                            groups=self.conv_module.groups)
+            y = y.permute(1, 0, 2, 3) # NCHW -> CNHW
+            y = y.contiguous().view(self.conv_module.out_channels, -1) # CNHW -> C,NHW
+            # mean = y.mean(1)
+            # var = y.var(1)
+            mean = y.mean(1).detach()
+            var = y.var(1).detach()
+            self.bn_module.running_mean = \
+                (1 - self.bn_module.momentum) * self.bn_module.running_mean + \
+                self.bn_module.momentum * mean
+            self.bn_module.running_var = \
+                (1 - self.bn_module.momentum) * self.bn_module.running_var + \
+                self.bn_module.momentum * var
+        else:
+            mean = Variable(self.bn_module.running_mean)
+            var = Variable(self.bn_module.running_var)
+
+        std = torch.sqrt(var + self.bn_module.eps)
+
+        weight, bias = self.fold_bn(mean, std)
+
+        self.qw.update(weight.data)
+
+        x = F.conv2d(x, FakeQuantize.apply(weight, self.qw), bias, 
+                stride=self.conv_module.stride,
+                padding=self.conv_module.padding, dilation=self.conv_module.dilation, 
+                groups=self.conv_module.groups)
+
+        # x = F.relu(x)
+
+        if hasattr(self, 'qo'):
+            self.qo.update(x)
+            x = FakeQuantize.apply(x, self.qo)
+
+        return x
+    
+    def quantize_inference(self, x):
+        x = x - self.qi.zero_point
+        x = self.conv_module(x)
+        x = self.M * x
+        
+        # if self.quant_type is 'INT':
+        x = get_nearest_val(self.quant_type,x)
+        
+        
+        x = x + self.qo.zero_point        
+        # x.clamp_(min=0)
+        return x
+
+# 待修改  需要有qo吧
+class QAdaptiveAvgPool2d(QModule):
+    def __init__(self, quant_type, qi=False, qo=True, num_bits=8, e_bits=3):
+        super(QAdaptiveAvgPool2d, self).__init__(quant_type,qi,qo,num_bits,e_bits)
+        self.register_buffer('M', torch.tensor([], requires_grad=False))  # 将M注册为buffer
+
+    def freeze(self, qi=None, qo=None):
+        if hasattr(self, 'qi') and qi is not None:
+            raise ValueError('qi has been provided in init function.')
+        if not hasattr(self, 'qi') and qi is None:
+            raise ValueError('qi is not existed, should be provided.')
+        if qi is not None:
+            self.qi = qi
+
+        if hasattr(self, 'qo') and qo is not None:
+            raise ValueError('qo has been provided in init function.')
+        if not hasattr(self, 'qo') and qo is None:
+            raise ValueError('qo is not existed, should be provided.')
+        if qo is not None:
+            self.qo = qo
+        self.M.data = (self.qi.scale / self.qo.scale).data
+
+    def forward(self, x):
+        if hasattr(self, 'qi'):
+            self.qi.update(x)
+            x = FakeQuantize.apply(x, self.qi)   # 与ReLu一样，先更新qi的scale，再将x用PoT表示了 (不过一般前一层的qo都是True，则x已经被PoT表示了)
+
+
+        
+        x = F.adaptive_avg_pool2d(x,(1, 1))   # 对输入输出都量化一下就算是量化了
+        
+
+        if hasattr(self, 'qo'):
+            self.qo.update(x)
+            x = FakeQuantize.apply(x, self.qo)
+
+        return x
+
+    def quantize_inference(self, x):
+        
+        x = F.adaptive_avg_pool2d(x,(1, 1))   # 对输入输出都量化一下就算是量化了  
+        x = self.M * x
+        # if self.quant_type is 'INT':
+        x = get_nearest_val(self.quant_type,x)
+
+       
+
+        return x
+    
+class QConvBNReLU6(QModule):
+
+    def __init__(self, quant_type, conv_module, bn_module, qi=True, qo=True, num_bits=8, e_bits=3):
+        super(QConvBNReLU6, self).__init__(quant_type, qi, qo, num_bits, e_bits)
+        self.conv_module = conv_module
+        self.bn_module = bn_module
+        self.qw = QParam(quant_type, num_bits,e_bits)
+        self.register_buffer('M', torch.tensor([], requires_grad=False))  # 将M注册为buffer
+
+    def fold_bn(self, mean, std):
+        if self.bn_module.affine:
+            gamma_ = self.bn_module.weight / std
+            weight = self.conv_module.weight * gamma_.view(self.conv_module.out_channels, 1, 1, 1)
+            if self.conv_module.bias is not None:
+                bias = gamma_ * self.conv_module.bias - gamma_ * mean + self.bn_module.bias
+            else:
+                bias = self.bn_module.bias - gamma_ * mean
+        else:
+            gamma_ = 1 / std
+            weight = self.conv_module.weight * gamma_
+            if self.conv_module.bias is not None:
+                bias = gamma_ * self.conv_module.bias - gamma_ * mean
+            else:
+                bias = -gamma_ * mean
+            
+        return weight, bias
+
+    def freeze(self, qi=None, qo=None):
+        if hasattr(self, 'qi') and qi is not None:
+            raise ValueError('qi has been provided in init function.')
+        if not hasattr(self, 'qi') and qi is None:
+            raise ValueError('qi is not existed, should be provided.')
+
+        if hasattr(self, 'qo') and qo is not None:
+            raise ValueError('qo has been provided in init function.')
+        if not hasattr(self, 'qo') and qo is None:
+            raise ValueError('qo is not existed, should be provided.')
+
+        if qi is not None:
+            self.qi = qi
+        if qo is not None:
+            self.qo = qo
+
+        self.M.data = (self.qw.scale * self.qi.scale / self.qo.scale).data
+
+        std = torch.sqrt(self.bn_module.running_var + self.bn_module.eps)
+
+        weight, bias = self.fold_bn(self.bn_module.running_mean, std)
+
+        self.conv_module.weight.data = self.qw.quantize_tensor(weight.data)
+        self.conv_module.weight.data = self.conv_module.weight.data - self.qw.zero_point
+
+        self.conv_module.bias.data = quantize_tensor(self.quant_type,
+                                                     bias, scale=self.qi.scale * self.qw.scale,
+                                                     zero_point=0., qmax=self.bias_qmax,is_bias=True)
+
+    
+    def fakefreeze(self):
+        self.conv_module.weight.data = self.qw.dequantize_tensor(self.conv_module.weight.data)
+        self.conv_module.bias.data = dequantize_tensor(self.conv_module.bias.data,scale=self.qi.scale * self.qw.scale, zero_point=0.)
+    
+    
+    
+    def forward(self, x):
+
+        if hasattr(self, 'qi'):
+            self.qi.update(x)
+            x = FakeQuantize.apply(x, self.qi)
+
+        if self.training:
+            y = F.conv2d(x, self.conv_module.weight, self.conv_module.bias, 
+                            stride=self.conv_module.stride,
+                            padding=self.conv_module.padding,
+                            dilation=self.conv_module.dilation,
+                            groups=self.conv_module.groups)
+            y = y.permute(1, 0, 2, 3) # NCHW -> CNHW
+            y = y.contiguous().view(self.conv_module.out_channels, -1) # CNHW -> C,NHW
+            # mean = y.mean(1)
+            # var = y.var(1)
+            mean = y.mean(1).detach()
+            var = y.var(1).detach()
+            self.bn_module.running_mean = \
+                (1 - self.bn_module.momentum) * self.bn_module.running_mean + \
+                self.bn_module.momentum * mean
+            self.bn_module.running_var = \
+                (1 - self.bn_module.momentum) * self.bn_module.running_var + \
+                self.bn_module.momentum * var
+        else:
+            mean = Variable(self.bn_module.running_mean)
+            var = Variable(self.bn_module.running_var)
+
+        std = torch.sqrt(var + self.bn_module.eps)
+
+        weight, bias = self.fold_bn(mean, std)
+
+        self.qw.update(weight.data)
+
+        x = F.conv2d(x, FakeQuantize.apply(weight, self.qw), bias, 
+                stride=self.conv_module.stride,
+                padding=self.conv_module.padding, dilation=self.conv_module.dilation, 
+                groups=self.conv_module.groups)
+
+        x = F.relu6(x)
+
+        if hasattr(self, 'qo'):
+            self.qo.update(x)
+            x = FakeQuantize.apply(x, self.qo)
+
+        return x
+    
+    def quantize_inference(self, x):
+        a = torch.tensor(6)
+        a = self.qo.quantize_tensor(a)
+
+
+
+        x = x - self.qi.zero_point
+        x = self.conv_module(x)
+        x = self.M * x
+
+        # if self.quant_type is not 'POT':
+        #     x = get_nearest_val(self.quant_type,x)
+        x = get_nearest_val(self.quant_type,x)
+        
+        x = x + self.qo.zero_point        # 属于qo范围的数据
+        x.clamp_(min=0, max=a.item())
+        return x
+
+class QModule_2(nn.Module):
+
+    def __init__(self,quant_type, qi0=True, qi1=True, qo=True, num_bits=8, e_bits=3):
+        super(QModule_2, self).__init__()
+        if qi0:
+            self.qi0 = QParam(quant_type,num_bits, e_bits)  # qi在此处就已经被num_bits和mode赋值了
+        if qi1:
+            self.qi1 = QParam(quant_type,num_bits, e_bits)  # qi在此处就已经被num_bits和mode赋值了
+        if qo:
+            self.qo = QParam(quant_type,num_bits, e_bits)  # qo在此处就已经被num_bits和mode赋值了
+
+        self.quant_type = quant_type
+        self.num_bits = num_bits
+        self.e_bits = e_bits
+        self.bias_qmax = bias_qmax(quant_type)
+
+
+    def freeze(self):
+        pass
+
+    def fakefreeze(self):
+        pass
+
+    def quantize_inference(self, x):
+        raise NotImplementedError('quantize_inference should be implemented.')
+
+
+class QElementwiseAdd(QModule_2):
+    def __init__(self, quant_type, qi0=True, qi1=True,  qo=True, num_bits=8, e_bits=3):
+        super(QElementwiseAdd, self).__init__(quant_type, qi0, qi1, qo, num_bits, e_bits)
+        self.register_buffer('M0', torch.tensor([], requires_grad=False))  # 将M注册为buffer
+        self.register_buffer('M1', torch.tensor([], requires_grad=False))  # 将M注册为buffer
+    
+    def freeze(self, qi0=None, qi1=None ,qo=None):
+
+        if hasattr(self, 'qi') and qi0 is not None:
+            raise ValueError('qi0 has been provided in init function.')
+        if not hasattr(self, 'qi') and qi0 is None:
+            raise ValueError('qi0 is not existed, should be provided.')
+        if hasattr(self, 'qi1') and qi0 is not None:
+            raise ValueError('qi1 has been provided in init function.')
+        if not hasattr(self, 'qi1') and qi0 is None:
+            raise ValueError('qi1 is not existed, should be provided.')
+
+        if hasattr(self, 'qo') and qo is not None:
+            raise ValueError('qo has been provided in init function.')
+        if not hasattr(self, 'qo') and qo is None:
+            raise ValueError('qo is not existed, should be provided.')
+
+        # 这里因为在池化或者激活的输入，不需要对最大值和最小是进行额外的统计，会共享相同的输出
+        if qi0 is not None:
+            self.qi0 = qi0
+        if qi1 is not None:
+            self.qi1 = qi1
+        if qo is not None:
+            self.qo = qo
+
+        # 根据https://zhuanlan.zhihu.com/p/156835141, 这是式3 的系数
+        self.M0.data = self.qi0.scale / self.qo.scale
+        self.M1.data = self.qi1.scale / self.qi0.scale
+        # self.M0.data = self.qi0.scale / self.qo.scale
+        # self.M1.data = self.qi1.scale / self.qo.scale
+
+    def forward(self, x0, x1):  # 前向传播,输入张量,x为浮点型数据
+        if hasattr(self, 'qi0'):
+            self.qi0.update(x0)
+            x0 = FakeQuantize.apply(x0, self.qi0)  # 对输入张量X完成量化
+        if hasattr(self, 'qi1'):
+            self.qi1.update(x1)
+            x1 = FakeQuantize.apply(x1, self.qi1)  # 对输入张量X完成量化
+
+        x = x0 + x1 
+
+        if hasattr(self, 'qo'):
+            self.qo.update(x)
+            x = FakeQuantize.apply(x, self.qo)
+
+        return x
+    
+    def quantize_inference(self, x0, x1):  # 此处input为已经量化的qx
+        x0 = x0 - self.qi0.zero_point
+        x1 = x1 - self.qi1.zero_point
+        
+        x = self.M0 * (x0 + x1*self.M1)
+
+        # if self.quant_type is 'INT':
+        x = get_nearest_val(self.quant_type,x)
+
+        x = x + self.qo.zero_point
+
+
+        return x
+
+
+
+# new modules for full-precision model - fold bn
+# inference应该也需要相应的适配
+
+
+
+class ConvBNReLU(nn.Module):
+
+    def __init__(self,conv_module, bn_module):
+        super(ConvBNReLU, self).__init__()
+        self.conv_module = conv_module
+        self.bn_module = bn_module
+
+    def fold_bn(self, mean, std):
+        if self.bn_module.affine:
+            gamma_ = self.bn_module.weight / std
+            weight = self.conv_module.weight * gamma_.view(self.conv_module.out_channels, 1, 1, 1)
+            if self.conv_module.bias is not None:
+                bias = gamma_ * self.conv_module.bias - gamma_ * mean + self.bn_module.bias
+            else:
+                bias = self.bn_module.bias - gamma_ * mean
+        else:
+            gamma_ = 1 / std
+            weight = self.conv_module.weight * gamma_
+            if self.conv_module.bias is not None:
+                bias = gamma_ * self.conv_module.bias - gamma_ * mean
+            else:
+                bias = -gamma_ * mean
+            
+        return weight, bias
+
+    def freeze(self):
+
+        std = torch.sqrt(self.bn_module.running_var + self.bn_module.eps)
+
+        weight, bias = self.fold_bn(self.bn_module.running_mean, std)
+
+        self.conv_module.weight.data = weight.data
+        if self.conv_module.bias is None:
+            self.conv_module.bias = nn.Parameter(bias)
+        else:
+            self.conv_module.bias.data = bias        
+
+    def fakefreeze(self):
+        pass
+        
+    
+    def forward(self, x):
+
+        if self.training:
+            y = F.conv2d(x, self.conv_module.weight, self.conv_module.bias, 
+                            stride=self.conv_module.stride,
+                            padding=self.conv_module.padding,
+                            dilation=self.conv_module.dilation,
+                            groups=self.conv_module.groups)
+            y = y.permute(1, 0, 2, 3) # NCHW -> CNHW
+            y = y.contiguous().view(self.conv_module.out_channels, -1) # CNHW -> C,NHW
+            # mean = y.mean(1)
+            # var = y.var(1)
+            mean = y.mean(1).detach()
+            var = y.var(1).detach()
+            self.bn_module.running_mean = \
+                (1 - self.bn_module.momentum) * self.bn_module.running_mean + \
+                self.bn_module.momentum * mean
+            self.bn_module.running_var = \
+                (1 - self.bn_module.momentum) * self.bn_module.running_var + \
+                self.bn_module.momentum * var
+        else:
+            mean = Variable(self.bn_module.running_mean)
+            var = Variable(self.bn_module.running_var)
+
+        std = torch.sqrt(var + self.bn_module.eps)
+
+        weight, bias = self.fold_bn(mean, std)
+
+
+        x = F.conv2d(x, weight, bias, 
+                stride=self.conv_module.stride,
+                padding=self.conv_module.padding, dilation=self.conv_module.dilation, 
+                groups=self.conv_module.groups)
+
+        x = F.relu(x)
+
+        return x
+    
+    def quantize_inference(self, x):
+        x = self.conv_module(x)  
+        x.clamp_(min=0)
+        return x
+        
+
+class ConvBN(nn.Module):
+
+    def __init__(self,conv_module, bn_module):
+        super(ConvBN, self).__init__()
+        self.conv_module = conv_module
+        self.bn_module = bn_module
+
+    def fold_bn(self, mean, std):
+        if self.bn_module.affine:
+            gamma_ = self.bn_module.weight / std
+            weight = self.conv_module.weight * gamma_.view(self.conv_module.out_channels, 1, 1, 1)
+            if self.conv_module.bias is not None:
+                bias = gamma_ * self.conv_module.bias - gamma_ * mean + self.bn_module.bias
+            else:
+                bias = self.bn_module.bias - gamma_ * mean
+        else:
+            gamma_ = 1 / std
+            weight = self.conv_module.weight * gamma_
+            if self.conv_module.bias is not None:
+                bias = gamma_ * self.conv_module.bias - gamma_ * mean
+            else:
+                bias = -gamma_ * mean
+            
+        return weight, bias
+
+    def freeze(self):
+
+        std = torch.sqrt(self.bn_module.running_var + self.bn_module.eps)
+
+        weight, bias = self.fold_bn(self.bn_module.running_mean, std)
+
+        self.conv_module.weight.data = weight.data
+        if self.conv_module.bias is None:
+            self.conv_module.bias = nn.Parameter(bias)
+        else:
+            self.conv_module.bias.data = bias        
+
+    def fakefreeze(self):
+        pass
+        
+    
+    def forward(self, x):
+
+        if self.training:
+            y = F.conv2d(x, self.conv_module.weight, self.conv_module.bias, 
+                            stride=self.conv_module.stride,
+                            padding=self.conv_module.padding,
+                            dilation=self.conv_module.dilation,
+                            groups=self.conv_module.groups)
+            y = y.permute(1, 0, 2, 3) # NCHW -> CNHW
+            y = y.contiguous().view(self.conv_module.out_channels, -1) # CNHW -> C,NHW
+            # mean = y.mean(1)
+            # var = y.var(1)
+            mean = y.mean(1).detach()
+            var = y.var(1).detach()
+            self.bn_module.running_mean = \
+                (1 - self.bn_module.momentum) * self.bn_module.running_mean + \
+                self.bn_module.momentum * mean
+            self.bn_module.running_var = \
+                (1 - self.bn_module.momentum) * self.bn_module.running_var + \
+                self.bn_module.momentum * var
+        else:
+            mean = Variable(self.bn_module.running_mean)
+            var = Variable(self.bn_module.running_var)
+
+        std = torch.sqrt(var + self.bn_module.eps)
+
+        weight, bias = self.fold_bn(mean, std)
+
+
+        x = F.conv2d(x, weight, bias, 
+                stride=self.conv_module.stride,
+                padding=self.conv_module.padding, dilation=self.conv_module.dilation, 
+                groups=self.conv_module.groups)
+
+        return x
+    
+    def quantize_inference(self, x):
+        x = self.conv_module(x)  
+        return x
+  
+
+class ConvBNReLU6(nn.Module):
+    def __init__(self,conv_module, bn_module):
+        super(ConvBNReLU6, self).__init__()
+        self.conv_module = conv_module
+        self.bn_module = bn_module
+
+    def fold_bn(self, mean, std):
+        if self.bn_module.affine:
+            gamma_ = self.bn_module.weight / std
+            weight = self.conv_module.weight * gamma_.view(self.conv_module.out_channels, 1, 1, 1)
+            if self.conv_module.bias is not None:
+                bias = gamma_ * self.conv_module.bias - gamma_ * mean + self.bn_module.bias
+            else:
+                bias = self.bn_module.bias - gamma_ * mean
+        else:
+            gamma_ = 1 / std
+            weight = self.conv_module.weight * gamma_
+            if self.conv_module.bias is not None:
+                bias = gamma_ * self.conv_module.bias - gamma_ * mean
+            else:
+                bias = -gamma_ * mean
+            
+        return weight, bias
+
+    def freeze(self):
+
+        std = torch.sqrt(self.bn_module.running_var + self.bn_module.eps)
+
+        weight, bias = self.fold_bn(self.bn_module.running_mean, std)
+
+        self.conv_module.weight.data = weight.data
+        if self.conv_module.bias is None:
+            self.conv_module.bias = nn.Parameter(bias)
+        else:
+            self.conv_module.bias.data = bias        
+
+    def fakefreeze(self):
+        pass
+        
+    
+    def forward(self, x):
+
+        if self.training:
+            y = F.conv2d(x, self.conv_module.weight, self.conv_module.bias, 
+                            stride=self.conv_module.stride,
+                            padding=self.conv_module.padding,
+                            dilation=self.conv_module.dilation,
+                            groups=self.conv_module.groups)
+            y = y.permute(1, 0, 2, 3) # NCHW -> CNHW
+            y = y.contiguous().view(self.conv_module.out_channels, -1) # CNHW -> C,NHW
+            # mean = y.mean(1)
+            # var = y.var(1)
+            mean = y.mean(1).detach()
+            var = y.var(1).detach()
+            self.bn_module.running_mean = \
+                (1 - self.bn_module.momentum) * self.bn_module.running_mean + \
+                self.bn_module.momentum * mean
+            self.bn_module.running_var = \
+                (1 - self.bn_module.momentum) * self.bn_module.running_var + \
+                self.bn_module.momentum * var
+        else:
+            mean = Variable(self.bn_module.running_mean)
+            var = Variable(self.bn_module.running_var)
+
+        std = torch.sqrt(var + self.bn_module.eps)
+
+        weight, bias = self.fold_bn(mean, std)
+
+
+        x = F.conv2d(x, weight, bias, 
+                stride=self.conv_module.stride,
+                padding=self.conv_module.padding, dilation=self.conv_module.dilation, 
+                groups=self.conv_module.groups)
+
+        x = F.relu6(x)
+
+        return x
+    
+    def quantize_inference(self, x):
+        x = self.conv_module(x)  
+        x.clamp_(min=0,max=6)
+        return x
--- a/mzh/new_mzh/Loss_Trajectory_MIA/normal.py
+++ b/mzh/new_mzh/Loss_Trajectory_MIA/normal.py
+import os 
+import torch
+import time 
+import random
+import numpy as np
+import pickle
+import utils
+from architectures import *
+
+# 对 是否distill，有不同的train方法
+# 之前已经创建了model并把params config一起存储到了相应路径，此处先把model和params config load出来再trian
+def train(args, model_path_tar, untrained_model_tar, model_path_dis = None, untrained_model_dis = None, device='cpu'):
+    print('Training models...')
+    
+    # 蒸馏训练
+    if 'distill' in args.mode:
+        # load进来还没train的target model (create的时候就会save untrained)
+        trained_model, model_params = load_model(args, model_path_dis, untrained_model_dis, epoch=0)
+        # load进来已经训好的target model
+        trained_model_tar, model_params_tar =  load_model(args, model_path_tar, untrained_model_tar, epoch=args.epochs)
+    # 正常训练
+    else:
+        # load进来还没train的target model (create的时候就会save untrained)
+        trained_model, model_params = load_model(args, model_path_tar, untrained_model_tar, epoch=0)
+    print(model_params)
+
+    # 获得以划分好的且做了数据增强dataset
+    dataset = utils.get_dataset(model_params['task'], args.mode, aug=True)
+    # 做一系列超参数设置
+    # TODO 调整学习策略 看看改进后的效果
+    learning_rate = model_params['learning_rate']
+    momentum = model_params['momentum']
+    weight_decay = model_params['weight_decay']
+    num_epochs = model_params['epochs']
+    model_params['optimizer'] = 'SGD'
+    optimization_params = (learning_rate, weight_decay, momentum)
+    optimizer, scheduler = utils.get_full_optimizer(trained_model, optimization_params, args)
+
+    if 'distill' in args.mode:
+        trained_model_name = untrained_model_dis
+    else:
+        trained_model_name = untrained_model_tar
+
+    print('Training: {}...'.format(trained_model_name))
+    trained_model.to(device)
+    
+    # 具体训练
+    # metric中可以不断append，记录每一步的具体数据
+    if 'distill' in args.mode:
+        metrics = trained_model.train_func(args, trained_model_tar, trained_model, dataset, num_epochs, optimizer, scheduler, model_params, model_path_dis, trained_model_name, device=device)
+    else:
+        metrics = trained_model.train_func(args, trained_model, dataset, num_epochs, optimizer, scheduler, model_params, model_path_tar, trained_model_name, device=device)
+    
+    # 记录结果 
+    model_params['train_top1_acc'] = metrics['train_top1_acc']
+    model_params['test_top1_acc'] = metrics['test_top1_acc']
+    model_params['train_top5_acc'] = metrics['train_top5_acc']
+    model_params['test_top5_acc'] = metrics['test_top5_acc']
+    model_params['epoch_times'] = metrics['epoch_times']
+    model_params['lrs'] = metrics['lrs']
+    total_training_time = sum(model_params['epoch_times'])
+    model_params['total_time'] = total_training_time
+    print('Training took {} seconds...'.format(total_training_time))
+
+    # 存储训练后的模型权值参数和model_params
+    if 'distill' in args.mode:
+        save_model(trained_model, model_params, model_path_dis, trained_model_name, epoch=num_epochs)
+    else:
+        save_model(trained_model, model_params, model_path_tar, trained_model_name, epoch=num_epochs)
+
+# 配置模型信息,创建并训练模型
+def train_models(args, model_path_tar, model_path_dis, device='cpu'):
+    # if args.model == 'vgg':
+    #     cnn_tar = create_vgg16bn(model_path_tar, args)
+    # elif args.model == 'mobilenet':
+    #     cnn_tar = create_mobile(model_path_tar, args)
+    # 返回model name 实例化model，存储untrained model, model_params
+    if args.model == 'resnet18':
+        cnn_tar = create_resnet18(model_path_tar, args)
+    elif args.model == 'resnet50':
+        cnn_tar = create_resnet50(model_path_tar, args)
+    elif args.model == 'resnet152':
+        cnn_tar = create_resnet152(model_path_tar, args)
+    elif args.model == 'mobilenetv2':
+        cnn_tar = create_mobilenetv2(model_path_tar, args)
+    
+    # elif args.model == 'resnet':
+    #     cnn_tar = create_resnet56(model_path_tar, args)
+    # elif args.model == 'wideresnet':
+    #     cnn_tar = create_wideresnet32_4(model_path_tar, args)
+    if 'distill' in args.mode:
+        # if args.model == 'vgg':
+        #     cnn_dis = create_vgg16bn(model_path_dis, args)
+        # elif args.model == 'mobilenet':
+        #     cnn_dis = create_mobile(model_path_dis, args)
+        # elif args.model == 'resnet':
+        #     cnn_dis = create_resnet56(model_path_dis, args)
+        # elif args.model == 'wideresnet':
+        #     cnn_dis = create_wideresnet32_4(model_path_dis, args)
+        if args.model == 'resnet18':
+            cnn_dis = create_resnet18(model_path_dis, args)
+        elif args.model == 'resnet50':
+            cnn_dis = create_resnet50(model_path_dis, args)
+        elif args.model == 'resnet152':
+            cnn_dis = create_resnet152(model_path_dis, args)
+        elif args.model == 'mobilenetv2':
+            cnn_dis = create_mobilenetv2(model_path_dis, args)
+
+    # load untrained model和model_params，开始训练
+        train(args, model_path_tar, cnn_tar, model_path_dis, cnn_dis, device = device)
+    else:
+        train(args, model_path_tar, cnn_tar, device=device)
+
+
+    # 恢复模型的权值参数
+def load_model(args, model_path, model_name, epoch=0):
+    model_params = load_params(model_path, model_name, epoch)
+
+    architecture = 'empty' if 'architecture' not in model_params else model_params['architecture'] 
+    network_type = model_params['network_type']
+
+    # if 'vgg' in network_type:
+    #     model = VGG(args, model_params)
+    # elif 'mobilenet' in network_type:
+    #     model = MobileNet(args, model_params)
+    # elif 'resnet56' in network_type:
+    #     model = ResNet(args, model_params)
+    # elif 'wideresnet' in network_type:
+    #     model = WideResNet(args,model_params)
+    if 'resnet18' in network_type:
+        model = resnet18(args, model_params)
+    elif 'resnet50' in network_type:
+        model = resnet50(args, model_params)
+    elif 'resnet152' in network_type:
+        model = resnet152(args, model_params)
+    elif 'mobilenetv2' in network_type:
+        model = MobileNetV2(args, model_params)
+    
+    # model_name是用于区分路径的关键
+    network_path = model_path + '/' + model_name
+
+    if epoch == 0: 
+        load_path = network_path + '/untrained'
+    elif epoch == -1: 
+        load_path = network_path + '/last'
+    else:
+        load_path = network_path + '/' + str(epoch)
+    
+    if torch.cuda.is_available():
+        model.load_state_dict(torch.load(load_path), strict=False)
+    else:
+        model.load_state_dict(torch.load(load_path, map_location=torch.device('cpu')), strict=False)
+
+    return model, model_params
+
+
+
+    # 将model_params这个记录字典恢复
+def load_params(models_path, model_name, epoch=0):
+    params_path = models_path + '/' + model_name
+
+    if epoch == 0:
+        params_path = params_path + '/parameters_untrained'
+    elif epoch == -1:
+        params_path = params_path + '/parameters_last'
+    else: 
+        params_path = params_path + f'/parameters_{epoch}'
+
+    with open(params_path, 'rb') as f:
+        model_params = pickle.load(f)
+    return model_params
+
+
+def create_vgg16bn(model_path, args):
+    print('Creating VGG16BN untrained {} models...'.format(args.data))
+
+    model_params = get_data_params(args.data)
+    model_params['fc_layers'] = [512, 512]
+    model_params['conv_channels']  = [64, 64, 128, 128, 256, 256, 256, 512, 512, 512, 512, 512, 512]
+    model_name = '{}_vgg16bn'.format(args.data)
+    model_params['network_type'] = 'vgg16'
+    model_params['max_pool_sizes'] = [1, 2, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2]
+    model_params['conv_batch_norm'] = True
+    model_params['init_weights'] = True
+    model_params['augment_training'] = True
+
+    get_lr_params(model_params, args)
+    model_name = save_networks(args, model_name, model_params, model_path)
+
+    return model_name
+
+# def create_mobile(model_path, args):
+#     print('Creating MobileNet untrained {} models...'.format(args.data))
+#     model_params = get_data_params(args.data)
+#     model_name = '{}_mobilenet'.format(args.data)
+#     model_params['network_type'] = 'mobilenet'
+#     model_params['cfg'] = [64, (128,2), 128, (256,2), 256, (512,2), 512, 512, 512, 512, 512, (1024,2), 1024]
+#     model_params['augment_training'] = True
+#     model_params['init_weights'] = True
+
+#     get_lr_params(model_params, args)
+#     model_name = save_networks(args, model_name, model_params, model_path)
+
+#     return model_name
+
+def create_resnet56(models_path, args):
+    print('Creating resnet56 untrained {} models...'.format(args.data))
+    model_params = get_data_params(args.data)
+    # 
+    model_params['block_type'] = 'basic'
+    # 
+    model_params['num_blocks'] = [9,9,9]    
+    model_name = '{}_resnet56'.format(args.data)
+    model_params['network_type'] = 'resnet56'
+    model_params['augment_training'] = True 
+    model_params['init_weights'] = True
+
+    get_lr_params(model_params, args)
+      
+    model_name = save_networks(args, model_name, model_params, models_path)
+
+    return model_name
+
+
+# 设置model信息，实例化model，存储untrained model
+# model_name会体现在路径中标识
+def create_resnet18(models_path, args):
+    print('Creating resnet18 untrained {} models...'.format(args.data))
+    model_params = get_data_params(args.data)
+    # 
+    model_params['block_type'] = 'basic'
+    # 
+    model_params['num_blocks'] = [2, 2, 2, 2]    
+    model_name = '{}_resnet18'.format(args.data)
+    # 用于区分load什么model
+    model_params['network_type'] = 'resnet18'
+    model_params['augment_training'] = True 
+    model_params['init_weights'] = True
+
+    get_lr_params(model_params, args)
+      
+    model_name = save_networks(args, model_name, model_params, models_path)
+
+    return model_name
+
+def create_resnet50(models_path, args):
+    print('Creating resnet50 untrained {} models...'.format(args.data))
+    model_params = get_data_params(args.data)
+    # 
+    model_params['block_type'] = 'bottle'
+    # 
+    model_params['num_blocks'] = [3, 4, 6, 3]   
+    model_name = '{}_resnet50'.format(args.data)
+    model_params['network_type'] = 'resnet50'
+    model_params['augment_training'] = True 
+    model_params['init_weights'] = True
+
+    get_lr_params(model_params, args)
+      
+    model_name = save_networks(args, model_name, model_params, models_path)
+
+    return model_name
+
+def create_resnet152(models_path, args):
+    print('Creating resnet152 untrained {} models...'.format(args.data))
+    model_params = get_data_params(args.data)
+    # 
+    model_params['block_type'] = 'bottle'
+    # 
+    model_params['num_blocks'] = [3, 8, 36, 3]  
+    model_name = '{}_resnet152'.format(args.data)
+    model_params['network_type'] = 'resnet152'
+    model_params['augment_training'] = True 
+    model_params['init_weights'] = True
+
+    get_lr_params(model_params, args)
+      
+    model_name = save_networks(args, model_name, model_params, models_path)
+
+    return model_name
+
+def create_mobilenetv2(model_path, args):
+    print('Creating MobileNetV2 untrained {} models...'.format(args.data))
+    model_params = get_data_params(args.data)
+    model_name = '{}_mobilenetv2'.format(args.data)
+    model_params['network_type'] = 'mobilenetv2'
+    model_params['augment_training'] = True
+    model_params['init_weights'] = True
+
+    get_lr_params(model_params, args)
+    model_name = save_networks(args, model_name, model_params, model_path)
+
+    return model_name
+
+def create_wideresnet32_4(models_path, args):
+    print('Creating wideresnet32_4 untrained {} models...'.format(args.data))
+    model_params = get_data_params(args.data)
+    model_params['block_type'] = 'bottle'
+    model_params['num_blocks'] = [5,5,5]
+    model_params['widen_factor'] = 4
+    model_params['dropout_rate'] = 0.3
+    model_name = '{}_wideresnet'.format(args.data)
+    model_params['network_type'] = 'wideresnet'
+    model_params['augment_training'] = True 
+    model_params['init_weights'] = True
+
+    get_lr_params(model_params, args)
+    
+    model_name = save_networks(args, model_name, model_params, models_path)
+
+    return model_name
+
+# 实例化model，并调用save_model存储
+def save_networks(args, model_name, model_params, model_path):
+    print('Saving CNN...')
+    model_params['base_model'] = model_name
+    network_type = model_params['network_type']
+
+    # if 'vgg' in network_type: 
+    #     model = VGG(args, model_params)
+    # elif 'mobilenet' in network_type:
+    #     model = MobileNet(args, model_params)
+    # elif 'resnet56' in network_type:
+    #     model = ResNet(args, model_params)
+    # elif 'wideresnet' in network_type:
+    #     model = WideResNet(args, model_params)
+
+    if 'resnet18' in network_type: 
+        model = resnet18(args, model_params)
+    elif 'resnet50' in network_type: 
+        model = resnet50(args, model_params)
+    elif 'resnet152' in network_type: 
+        model = resnet152(args, model_params)
+    elif 'mobilenetv2' in network_type: 
+        model = MobileNetV2(args, model_params)
+
+    # 存储model权值参数
+    save_model(model, model_params, model_path, model_name, epoch=0)
+
+    return model_name
+
+# 按epoch去存储model的权值参数和model_params信息
+def save_model(model, model_params, model_path, model_name, epoch=-1):
+    if not os.path.exists(model_path):
+        os.makedirs(model_path)
+    network_path = model_path + '/' + model_name
+    if not os.path.exists(network_path):
+        os.makedirs(network_path)
+
+    if epoch == 0:
+        path =  network_path + '/untrained'
+        params_path = network_path + '/parameters_untrained'
+        torch.save(model.state_dict(), path)
+
+    elif epoch == -1:
+        path =  network_path + '/last'
+        params_path = network_path + '/parameters_last'
+        torch.save(model.state_dict(), path)
+
+    else:
+        path = network_path + '/' + str(epoch)
+        params_path = network_path + '/parameters_'+str(epoch)
+        torch.save(model.state_dict(), path)
+
+    if model_params is not None:
+        with open(params_path, 'wb') as f:
+            pickle.dump(model_params, f, pickle.HIGHEST_PROTOCOL)
+
+# 配置dataset,返回配置信息 (input_size, num_classes等)
+# 在params['task']中记录用什么数据集
+def get_data_params(data):   
+    if data == 'cinic10':
+        return cinic10_params()
+    elif data == 'gtsrb':
+        return gtsrb_params()
+    elif data == 'cifar10':
+        return cifar10_params()
+    elif data == 'cifar100':
+        return cifar100_params()
+
+def gtsrb_params():
+    model_params = {}
+    model_params['task'] = 'gtsrb'
+    model_params['input_size'] = 32
+    model_params['num_classes'] = 43
+    return model_params
+
+def cinic10_params():
+    model_params = {}
+    model_params['task'] = 'cinic10'
+    model_params['input_size'] = 32
+    model_params['num_classes'] = 10
+    return model_params
+
+def cifar10_params():
+    model_params = {}
+    model_params['task'] = 'cifar10'
+    model_params['input_size'] = 32
+    model_params['num_classes'] = 10
+    return model_params
+
+def cifar100_params():
+    model_params = {}
+    model_params['task'] = 'cifar100'
+    model_params['input_size'] = 32
+    model_params['num_classes'] = 100
+    return model_params
+
+# 设置optimizer和lr_scheduler的超参数 并记录到model_params中
+def get_lr_params(model_params, args):
+    model_params['momentum'] = 0.9
+
+    network_type = model_params['network_type']
+
+    if 'vgg' in network_type or 'wideresnet' in network_type:
+        model_params['weight_decay'] = 0.0005
+
+    else:
+        model_params['weight_decay'] = 0.0001
+    
+    model_params['learning_rate'] = 0.1
+    model_params['epochs'] = args.epochs
+    model_params['scheduler'] = f'CosineAnnealingLR_{args.epochs}'
--- a/mzh/new_mzh/Loss_Trajectory_MIA/plot.py
+++ b/mzh/new_mzh/Loss_Trajectory_MIA/plot.py
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn import metrics
+import argparse
+
+
+
+if __name__ == '__main__':
+ # 在 test_mia_attack_model存储的最优的数据
+    parser = argparse.ArgumentParser(description='PLOT_TrajectoryMIA')
+
+
+    parser.add_argument('--model', type=str, default='resnet18', help=['resnet18','resnet50','resnet152','mobilenetv2'])
+    parser.add_argument('--data', type=str, default='cifar10', help=['cinic10', 'cifar10', 'cifar100', 'gtsrb'])
+    parser.add_argument('--model_distill', type=str, default='resnet18',help=['resnet18','resnet50','resnet152','mobilenetv2'])
+
+    args = parser.parse_args()
+    data_auc = np.load(f'./outputs/{args.data}_{args.model}_{args.model_distill}_trajectory_auc.npy', allow_pickle=True).item()
+    for i in range(len(data_auc['fpr'])):
+        if data_auc['fpr'][i] > 0.001:
+            print('TPR at 0.1% FPR:  {:.1%}'.format(data_auc['tpr'][i-1]))
+            break
+    plt.plot(data_auc['fpr'], data_auc['tpr'], color='darkorange', lw=2, label='ROC curve')
+    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
+    plt.xlim([0.0, 1.0])
+    plt.ylim([0.0, 1.05])
+    plt.xlabel('False Positive Rate')
+    plt.ylabel('True Positive Rate')
+    plt.title('Receiver operating characteristic example')
+    plt.legend(loc="lower right")
+    # plt.show()
+    plt.savefig(f'./img/{args.data}_{args.model}_{args.model_distill}_roc_curve.png')
\ No newline at end of file
--- a/mzh/new_mzh/Loss_Trajectory_MIA/readme.md
+++ b/mzh/new_mzh/Loss_Trajectory_MIA/readme.md
+## Loss Trajectory MIA
+
+#### Update 2023.5.28
+
+1\. 思路
+
+   思路来源于CCS 2022的文章 ``Membership Inference Attacks by Exploiting Loss Trajectory``  https://arxiv.org/abs/2208.14933
+
+   文章中首先分析了常用的MIA方法的攻击效果不够理想的原因，同时给出了名为TrajectoryMIA的新方法。
+
+   传统的MIA方法只利用了模型的输出信息(output vector或loss)，由于模型一定存在一定程度的过拟合，因此模型对训练集/非训练集数据的输入而产生的output或loss会有比较明显的差异(比如训练集数据的loss小，非训练集数据的loss大)。
+
+   但传统方法无法区分那些虽然不是训练集的数据，但模型loss仍然很小的输入。文章作者发现了在模型的训练过程中，对于这类小loss数据，在训练集或不在训练集会在训练过程的loss收敛速度和进程方面有差异。如图所示，他们有着不同的Loss轨迹(Loss Trajectory)
+
+   ![p1](fig\p1.png)
+
+   因为小loss数据如果不在训练集中，一般是比较简单的图片，其训练loss会下降的很快，因此其loss轨迹在训练中期是低于在训练集中的数据的。通过捕获Loss Trajectory的区别，可以实现效果更好的MIA.
+
+   具体应该如何获得Loss Trajectory呢？考虑到我们对于Target Model，只能获得其output，无法获得其训练过程中的任何信息，更无法提取Loss Trajectory，于是我们可以采用知识蒸馏，通过保存distill_target_model在各个epoch的权值参数，最后按epoch分别load这些参数并计算与label的loss即可获得Loss Trajectory，进而得到Attack Model的测试集数据。
+
+   我们仍然通过Shadow Model的方式来构造Attack Model的训练集数据，Target Model与Shadow Model我暂时使用了相同的结构，Shadow Model的数据集的构造也采用了与Naive MIA相似的方式，都是切分了数据集。为了与distill Target Model的Loss Trajectory对齐，我们在训练完Shadow Model后，通过distill Shadow Model的方式获得其Loss Trajectory，然后再训练Attack Model. 
+
+2\. 代码文件说明
+
+   architectures.py   各种模型
+
+   dataset.py   构建Target Model和Shadow Model用到的各种数据集
+
+   normal.py   各种config参数配置，构建、训练、存储、加载模型
+
+   utils.py   配置optimizer，lr_scheduler，以及具体的训练和测试
+
+   plot.py    画Attack Model的AUC曲线图
+
+3\. 结果
+
+   - 分别在CIFAR10,100, CINIC10上对Resnet18，50，152，MobileNetV2进行了TrajectoryMIA，均取得了比较显著的攻击效果。
+
+   - 具体数据:   
+
+     注：
+     在试验阶段，训练Target Model, Shadow Model, Distill Target Model, Distill Shadow Model的时候没有使用early stopping，期望获得过拟合程度更高的模型来使得Attack Model的攻击效果更显著，验证了训练流程是否正常，后续会使用early stopping并尽量提高各个model的测试精度。
+
+     CIFAR10训练的各个模型的acc均低于过去的训练值，主要原因在于为了Shadow Model和Distill Model切分了数据集，训练数据变少了，且调整了一些超参数，还未进行更加细致的调整。也可能与代码目前对test dataset也进行了数据增强，提高了测试难度有关。
+
+     CIFAR100训练的模型的acc较低（ResNet系列仅有30%左右，MobileNetV2也只有40%左右）,并且train acc和test acc的差异也较大，出现了比较明显的过拟合，可能原因是给CIFAR100数据集拆分后，训练数据很不充分，而CIFAR100又是较难的任务。
+
+     CINIC10数据集有270000张图片，比CIFAR10和CIFAR100都大，但图片任务部分是对Imagenet下采样得到的，比CIFAR10难，训练结果显示的top1 acc低于CIFAR10，但高于CIFAR100。与Naive MIA中的Target Model和Shadow Model数据对比，能够看出来TrajectoryMIA中的Target Model和Shadow Model都没有full trained，之后需要再对各种超参数进行调整。
+
+     以下数据将分别展示各个模型、数据集组合的Target Model，Shadow Model，Distill Target Model，Distill Shadow Model，Attack Model的训练、测试结果。
+     
+     * CIFAR10 + ResNet18
+     
+       Target Model:
+     
+       ```
+       load aug_target_dataset ... 
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 78.6500015258789
+       Top5 Test accuracy: 98.57999420166016
+       Top1 Train accuracy: 99.18999481201172
+       Top5 Train accuracy: 100.0
+       ```
+     
+       Shadow Model
+     
+       ```
+       load aug_shadow_dataset ...
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 78.79999542236328
+       Top5 Test accuracy: 98.48999786376953
+       Top1 Train accuracy: 99.22000122070312
+       Top5 Train accuracy: 100.0
+       ```
+     
+       Distill Target Model
+     
+       ```
+       load aug_distill_target_dataset ...
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 79.02999877929688
+       Top5 Test accuracy: 98.97999572753906
+       Top1 Train accuracy: 80.11000061035156
+       Top5 Train accuracy: 98.66999816894531
+       ```
+     
+       Distill Shadow Model
+     
+       ```
+       load aug_distill_shadow_dataset ...
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 79.3499984741211
+       Top5 Test accuracy: 98.79999542236328
+       Top1 Train accuracy: 80.02999877929688
+       Top5 Train accuracy: 98.69999694824219
+       ```
+     
+       Attack Model:
+     
+       ```
+       epoch:90 	 train_loss:0.0045 	 test_loss:0.0050 	 train_prec1:0.7016 	 test_prec1:0.6216 	 val_prec1:0.6216 	 val_auc:0.6929
+       Max AUC:   0.70787291
+       Max ACC:   0.63575
+       TPR at 0.1% FPR:  1.2%
+       ```
+     
+     * CIFAR10 + ResNet50 
+     
+       Target Model:
+     
+       ```
+       load aug_target_dataset ...
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 68.25999450683594
+       Top5 Test accuracy: 97.3699951171875
+       Top1 Train accuracy: 87.27999877929688
+       Top5 Train accuracy: 99.80999755859375
+       ```
+     
+       Shadow Model:
+     
+       ```
+       load aug_shadow_dataset ...
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 66.6199951171875
+       Top5 Test accuracy: 96.89999389648438
+       Top1 Train accuracy: 94.79000091552734
+       Top5 Train accuracy: 99.97000122070312
+       ```
+     
+       Distill Target Model:
+     
+       ```
+       load aug_distill_target_dataset ...
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 70.20999908447266
+       Top5 Test accuracy: 97.30999755859375
+       Top1 Train accuracy: 70.08499908447266
+       Top5 Train accuracy: 97.46499633789062
+       ```
+     
+       Distill Shadow Model;
+     
+       ```
+       load aug_distill_shadow_dataset ...
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 69.4000015258789
+       Top5 Test accuracy: 97.07999420166016
+       Top1 Train accuracy: 69.16500091552734
+       Top5 Train accuracy: 97.3699951171875
+       ```
+     
+       Attack Model:
+     
+       ```
+       val_prec1:0.6158 	 val_auc:0.6675
+       Max AUC:   0.67598043
+       Max ACC:   0.6204500000000001
+       TPR at 0.1% FPR:  1.2%
+       ```
+     
+     * CIFAR10 + ResNet152 
+     
+       Target Model:
+     
+       ```
+       load aug_target_dataset ... 
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 66.77999877929688
+       Top5 Test accuracy: 96.55999755859375
+       Top1 Train accuracy: 97.04000091552734
+       Top5 Train accuracy: 99.95999908447266
+       ```
+     
+       Shadow Model:
+     
+       ```
+       load aug_shadow_dataset ...
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 67.58999633789062
+       Top5 Test accuracy: 96.5
+       Top1 Train accuracy: 94.2699966430664
+       Top5 Train accuracy: 99.88999938964844
+       ```
+     
+       Distill Target Model:
+     
+       ```
+       load aug_distill_target_dataset ...
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 68.80999755859375
+       Top5 Test accuracy: 97.30999755859375
+       Top1 Train accuracy: 69.83499908447266
+       Top5 Train accuracy: 97.42499542236328
+       ```
+     
+       Distill Shadow Model:
+     
+       ```
+       load aug_distill_shadow_dataset ...
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 69.98999786376953
+       Top5 Test accuracy: 97.31999969482422
+       Top1 Train accuracy: 70.15499877929688
+       Top5 Train accuracy: 97.31999969482422
+       ```
+     
+       Attack Model:
+     
+       ```
+       epoch:90 	 train_loss:0.0046 	 test_loss:0.0048 	 train_prec1:0.7022 	 test_prec1:0.6547 	 val_prec1:0.6547 	 val_auc:0.7353
+       Max AUC:   0.7494320150000001
+       Max ACC:   0.6719499999999999
+       TPR at 0.1% FPR:  2.2%
+       ```
+     
+     * CIFAR10 + MobileNetV2 
+     
+       Target Model:
+     
+       ```
+       load aug_target_dataset ... 
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 79.66999816894531
+       Top5 Test accuracy: 98.55999755859375
+       Top1 Train accuracy: 99.81999969482422
+       Top5 Train accuracy: 100.0
+       ```
+     
+       Shadow Model:
+     
+       ```
+       load aug_shadow_dataset ...
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 79.40999603271484
+       Top5 Test accuracy: 98.50999450683594
+       Top1 Train accuracy: 99.70999908447266
+       Top5 Train accuracy: 100.0
+       ```
+     
+       Distill Target Model:
+     
+       ```
+       load aug_distill_target_dataset ...
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 80.32999420166016
+       Top5 Test accuracy: 98.93999481201172
+       Top1 Train accuracy: 81.10499572753906
+       Top5 Train accuracy: 98.83499908447266
+       ```
+     
+       Distill Shadow Model:
+     
+       ```
+       load aug_distill_shadow_dataset ...
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 80.68999481201172
+       Top5 Test accuracy: 98.79999542236328
+       Top1 Train accuracy: 80.93000030517578
+       Top5 Train accuracy: 98.80500030517578
+       ```
+     
+       Attack Model:
+     
+       ```
+       epoch:90 	 train_loss:0.0044 	 test_loss:0.0051 	 train_prec1:0.7212 	 test_prec1:0.6013 	 val_prec1:0.6013 	 val_auc:0.6869
+       Max AUC:   0.70338137
+       Max ACC:   0.63095
+       TPR at 0.1% FPR:  2.6%
+       ```
+     
+     * CIFAR100 + ResNet18
+     
+       Target Model:
+     
+       ```
+       load aug_target_dataset ... 
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 38.79999923706055
+       Top5 Test accuracy: 68.69999694824219
+       Top1 Train accuracy: 97.58999633789062
+       Top5 Train accuracy: 99.93999481201172
+       ```
+     
+       Shadow Model:
+     
+       ```
+       load aug_shadow_dataset ...
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 38.22999954223633
+       Top5 Test accuracy: 68.5199966430664
+       Top1 Train accuracy: 97.37999725341797
+       Top5 Train accuracy: 100.0
+       ```
+     
+       Distill Target Model:
+     
+       ```
+       load aug_distill_target_dataset ...
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 38.77000045776367
+       Top5 Test accuracy: 70.6199951171875
+       Top1 Train accuracy: 40.91999816894531
+       Top5 Train accuracy: 71.22999572753906
+       ```
+     
+       Distill Shadow Model:
+     
+       ```
+       load aug_distill_shadow_dataset ...
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 40.11000061035156
+       Top5 Test accuracy: 70.83000183105469
+       Top1 Train accuracy: 42.06999969482422
+       Top5 Train accuracy: 72.04000091552734
+       ```
+     
+       Attack Model:
+     
+       ```
+       epoch:90 	 train_loss:0.0036 	 test_loss:0.0038 	 train_prec1:0.8490 	 test_prec1:0.8244 	 val_prec1:0.8244 	 val_auc:0.9037
+       Max AUC:   0.9115403099999998
+       Max ACC:   0.8295999999999999
+       TPR at 0.1% FPR:  0.0%
+       ```
+     
+       
+     
+     * CIFAR100 + ResNet50
+     
+       Target Model:
+     
+       ```python
+       load aug_target_dataset ... 
+       Epoch: 100/100
+       Cur lr: 2.467198171342e-07
+       Top1 Test accuracy: 31.079999923706055
+       Top5 Test accuracy: 59.44999694824219
+       Top1 Train accuracy: 97.79999542236328
+       Top5 Train accuracy: 99.94999694824219
+       ```
+     
+       Shadow Model:
+     
+       ```python
+       load aug_shadow_dataset ...
+       Epoch: 100/100
+       Cur lr: 2.467198171342e-07
+       Top1 Test accuracy: 29.779998779296875
+       Top5 Test accuracy: 57.64999771118164
+       Top1 Train accuracy: 98.31999969482422
+       Top5 Train accuracy: 99.95999908447266
+       ```
+     
+       Target Distill Model:
+     
+       ```python
+       load aug_distill_target_dataset ...
+       Epoch: 100/100
+       Cur lr: 2.467198171342e-07
+       Top1 Test accuracy: 32.04999923706055
+       Top5 Test accuracy: 61.43000030517578
+       Top1 Train accuracy: 33.18499755859375
+       Top5 Train accuracy: 62.18499755859375
+       ```
+     
+       Shadow Distill Model:
+     
+       ```python
+       load aug_distill_shadow_dataset ...
+       Epoch: 100/100
+       Cur lr: 2.467198171342e-07
+       Top1 Test accuracy: 31.809999465942383
+       Top5 Test accuracy: 61.03999710083008
+       Top1 Train accuracy: 33.06999969482422
+       Top5 Train accuracy: 62.20499801635742
+       ```
+     
+       Attack Model:
+     
+       ```
+       Max AUC:   0.9372197300000003
+       Max ACC:   0.8634499999999999
+       TPR at 0.1% FPR:  0.0%
+       ```
+     
+     * CIFAR100 + ResNet152
+     
+       Target Model:
+     
+       ```python
+       load aug_target_dataset ... 
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 30.67999839782715
+       Top5 Test accuracy: 57.959999084472656
+       Top1 Train accuracy: 96.80999755859375
+       Top5 Train accuracy: 99.86000061035156
+       ```
+     
+       Shadow Model:
+     
+       ```python
+       load aug_shadow_dataset ...
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 27.939998626708984
+       Top5 Test accuracy: 56.05999755859375
+       Top1 Train accuracy: 95.86000061035156
+       Top5 Train accuracy: 99.75999450683594
+       ```
+     
+       Target Distill Model:
+     
+       ```python
+       load aug_distill_target_dataset ...
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 30.529998779296875
+       Top5 Test accuracy: 60.55999755859375
+       Top1 Train accuracy: 32.154998779296875
+       Top5 Train accuracy: 61.53999710083008
+       ```
+     
+       Shadow Distill Model:
+     
+       ```python
+       load aug_distill_shadow_dataset ...
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 29.170000076293945
+       Top5 Test accuracy: 58.34000015258789
+       Top1 Train accuracy: 30.14499855041504
+       Top5 Train accuracy: 59.11499786376953
+       ```
+     
+       Attack Model:
+     
+       ```
+       val_prec1:0.8617 	 val_auc:0.9357
+       Max AUC:   0.94010921
+       Max ACC:   0.8649
+       TPR at 0.1% FPR:  7.1%
+       ```
+     
+     * CIFAR100 + MobileNetV2
+     
+       
+     
+        Target Model
+       
+       ```python
+       load aug_target_dataset ... 
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 41.38999938964844
+       Top5 Test accuracy: 69.93999481201172
+       Top1 Train accuracy: 99.91999816894531
+       Top5 Train accuracy: 100.0
+       ```
+       
+       Shadow Model:
+       
+       ```python
+       load aug_shadow_dataset ...
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 41.82999801635742
+       Top5 Test accuracy: 70.54000091552734
+       Top1 Train accuracy: 99.93000030517578
+       Top5 Train accuracy: 100.0
+       ```
+       
+       Target Distill Model:
+       
+       ```python
+       load aug_distill_target_dataset ...
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 42.09000015258789
+       Top5 Test accuracy: 71.91999816894531
+       Top1 Train accuracy: 43.28999710083008
+       Top5 Train accuracy: 72.50499725341797
+       ```
+       
+       Shadow Distill Model:
+       
+       ```python
+       load aug_distill_shadow_dataset ...
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 42.45000076293945
+       Top5 Test accuracy: 72.43000030517578
+       Top1 Train accuracy: 44.01499938964844
+       Top5 Train accuracy: 73.53499603271484
+       ```
+       
+       Attack Model:
+       
+       ```
+       Max AUC:   0.9335802350000002
+       Max ACC:   0.8553499999999999
+       TPR at 0.1% FPR:  0.0%
+       ```
+       
+     * CINIC10 + ResNet18:
+     
+       Target Model:
+     
+       ```
+       load aug_target_dataset ... 
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 58.48999786376953
+       Top5 Test accuracy: 94.68000030517578
+       Top1 Train accuracy: 96.1199951171875
+       Top5 Train accuracy: 99.98999786376953
+       ```
+     
+       Shadow Model:
+     
+       ```
+       load aug_shadow_dataset ...
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 58.68000030517578
+       Top5 Test accuracy: 94.6199951171875
+       Top1 Train accuracy: 96.38999938964844
+       Top5 Train accuracy: 100.0
+       ```
+     
+       Distill Target Model:
+     
+       ```
+       load aug_distill_target_dataset ...
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 60.56999969482422
+       Top5 Test accuracy: 95.50999450683594
+       Top1 Train accuracy: 61.137725830078125
+       Top5 Train accuracy: 95.41818237304688
+       ```
+     
+       Distill Shadow Model:
+     
+       ```
+       load aug_distill_shadow_dataset ...
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 60.15999984741211
+       Top5 Test accuracy: 95.23999786376953
+       Top1 Train accuracy: 61.085453033447266
+       Top5 Train accuracy: 95.53772735595703
+       ```
+     
+       Attack Model:
+     
+       ```
+       Max AUC:   0.8098187400000001
+       Max ACC:   0.7252500000000001
+       TPR at 0.1% FPR:  3.8%
+       ```
+     
+     * CINIC10 + ResNet50:
+     
+       Target Model:
+     
+       ```
+       load aug_target_dataset ... 
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 51.7599983215332
+       Top5 Test accuracy: 93.8499984741211
+       Top1 Train accuracy: 84.88999938964844
+       Top5 Train accuracy: 99.52999877929688
+       ```
+     
+       Shadow Model:
+     
+       ```
+       load aug_shadow_dataset ...
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 48.77000045776367
+       Top5 Test accuracy: 92.0
+       Top1 Train accuracy: 80.72000122070312
+       Top5 Train accuracy: 99.29000091552734
+       ```
+     
+       Distill Target Model:
+     
+       ```
+       load aug_distill_target_dataset ...
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 55.72999954223633
+       Top5 Test accuracy: 93.97999572753906
+       Top1 Train accuracy: 55.32772445678711
+       Top5 Train accuracy: 94.23681640625
+       ```
+     
+       Distill Shadow Model:
+     
+       ```
+       load aug_distill_shadow_dataset ...
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 51.459999084472656
+       Top5 Test accuracy: 93.04999542236328
+       Top1 Train accuracy: 51.17409133911133
+       Top5 Train accuracy: 93.13136291503906
+       ```
+     
+       Attack Model:
+     
+       ```
+       Max AUC:   0.764546
+       Max ACC:   0.69
+       TPR at 0.1% FPR:  1.7%
+       ```
+     
+     * CINIC10 + ResNet152:
+     
+       Target Model:
+     
+       ```
+       load aug_target_dataset ... 
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 47.75
+       Top5 Test accuracy: 91.29999542236328
+       Top1 Train accuracy: 89.69999694824219
+       Top5 Train accuracy: 99.81999969482422
+       ```
+     
+       Shadow Model:
+     
+       ```
+       load aug_shadow_dataset ...
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 46.22999954223633
+       Top5 Test accuracy: 90.55999755859375
+       Top1 Train accuracy: 84.0199966430664
+       Top5 Train accuracy: 99.18000030517578
+       ```
+     
+       Distill Target Model:
+     
+       ```
+       load aug_distill_target_dataset ...
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 51.09000015258789
+       Top5 Test accuracy: 92.58999633789062
+       Top1 Train accuracy: 50.53999710083008
+       Top5 Train accuracy: 92.56181335449219
+       ```
+     
+       Distill Shadow Model:
+     
+       ```
+       load aug_distill_shadow_dataset ...
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 48.93000030517578
+       Top5 Test accuracy: 91.62999725341797
+       Top1 Train accuracy: 48.51545333862305
+       Top5 Train accuracy: 91.79590606689453
+       ```
+     
+       Attack Model:
+     
+       ```
+       epoch:90 	 train_loss:0.0042 	 test_loss:0.0044 	 train_prec1:0.7580 	 test_prec1:0.7336 	 val_prec1:0.7336 	 val_auc:0.8117
+       Max AUC:   0.81900534
+       Max ACC:   0.7394
+       TPR at 0.1% FPR:  3.0%
+       ```
+     
+     * CINIC10 + MobileNetV2:
+     
+       Target Model:
+     
+       ```
+       load aug_target_dataset ... 
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 58.619998931884766
+       Top5 Test accuracy: 94.02999877929688
+       Top1 Train accuracy: 99.19999694824219
+       Top5 Train accuracy: 100.0
+       ```
+     
+       Shadow Model:
+     
+       ```
+       load aug_shadow_dataset ...
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 60.779998779296875
+       Top5 Test accuracy: 94.16999816894531
+       Top1 Train accuracy: 99.31999969482422
+       Top5 Train accuracy: 100.0
+       ```
+     
+       Distill Target Model:
+     
+       ```
+       load aug_distill_target_dataset ...
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 60.55999755859375
+       Top5 Test accuracy: 95.18000030517578
+       Top1 Train accuracy: 60.62772750854492
+       Top5 Train accuracy: 95.2677230834961
+       ```
+     
+       Distill Shadow Model:
+     
+       ```
+       load aug_distill_shadow_dataset ...
+       Epoch: 100/100
+       Cur lr: 2.4671981713420017e-05
+       Top1 Test accuracy: 61.23999786376953
+       Top5 Test accuracy: 95.41999816894531
+       Top1 Train accuracy: 61.27545166015625
+       Top5 Train accuracy: 95.35317993164062
+       ```
+     
+       Attack Model:
+     
+       ```
+       Max AUC:   0.843258295
+       Max ACC:   0.75125
+       TPR at 0.1% FPR:  0.0%
+       ```
+     
+       
+
+4\. 问题及改进方向
+
+Q1: 如何把量化考虑进来？
+
+A1：主要取决于我们假设的攻击情景是怎样的。
+
+如果认为攻击者不知道要攻击的模型是全精度的还是量化后的，那么就会采用全精度的Shadow Model和Distill Target Model, Distill Shadow Model. 考虑到只要知道Target Model的output，我们就能进行模型蒸馏，因此如果Target Model是量化模型，则已经把量化考虑在MIA内了。
+
+如果攻击者知道Target Model是量化模型，则可以将Shadow Model也进行量化来实现对齐，可能会得到更好的攻击效果。
+
+Q2：如何进行预测？
+
+A2：可以考虑将loss trajectory作为property，去计算in，out数据(member_status可以作为in, out的标记)的loss trajectory的相似度然后预测? 可以用Distill Target Model的Loss Trajectory与Attack Acc(AUC...)之类的配对构成数据点(也可以增加Distill Shadow Model的作为补充)。
+
+预计的效果是相似度越低，则攻击成功率越高
+
+
+
+
+
--- a/mzh/new_mzh/Loss_Trajectory_MIA/train_attack_152_10.sh
+++ b/mzh/new_mzh/Loss_Trajectory_MIA/train_attack_152_10.sh
+#!/bin/bash
+
+#- Job parameters
+
+# (TODO)
+# Please modify job name
+
+#SBATCH -J Tra_152           # The job name
+#SBATCH -o ./info/ret-%j.out        # Write the standard output to file named 'ret-<job_number>.out'
+#SBATCH -e ./info/ret-%j.err        # Write the standard error to file named 'ret-<job_number>.err'
+
+
+#- Resources
+
+# (TODO)
+# Please modify your requirements
+
+#SBATCH -p nv-gpu                    # Submit to 'nv-gpu' Partitiion
+#SBATCH -t 0-12:00:00                # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
+#SBATCH --nodes=1                    # Request N nodes
+#SBATCH --gres=gpu:1                 # Request M GPU per node
+#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
+#SBATCH --qos=gpu-normal           # Request QOS Type
+
+
+###
+### The system will alloc 8 or 16 cores per gpu by default.
+### If you need more or less, use following:
+### #SBATCH --cpus-per-task=K            # Request K cores
+###
+### 
+### Without specifying the constraint, any available nodes that meet the requirement will be allocated
+### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
+###
+### #SBATCH --nodelist=gpu-v00           # Request a specific list of hosts 
+### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
+###
+
+#- Log information
+
+echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
+echo "Job run at:"
+echo "$(hostnamectl)"
+
+#- Load environments
+source /tools/module_env.sh
+source ~/pyt1.5/bin/activate
+module list                       # list modules loaded
+
+##- Tools
+module load cluster-tools/v1.0
+module load slurm-tools/v1.0
+module load cmake/3.15.7
+module load git/2.17.1
+module load vim/8.1.2424
+
+##- language
+module load python3/3.6.8
+
+##- CUDA
+module load cuda-cudnn/11.1-8.1.1
+
+##- virtualenv
+# source xxxxx/activate
+
+echo $(module list)              # list modules loaded
+echo $(which gcc)
+echo $(which python)
+echo $(which python3)
+
+cluster-quota                    # nas quota
+
+nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
+
+#- Warning! Please not change your CUDA_VISIBLE_DEVICES
+#- in `.bashrc`, `env.sh`, or your job script
+echo "Use GPU ${CUDA_VISIBLE_DEVICES}"                              # which gpus
+#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
+
+#- Job step
+# [EDIT HERE(TODO)]
+sleep 2s
+hostname
+
+echo "python main.py --mode target --model resnet152 --data cifar10"
+python main.py --mode target --model resnet152 --data cifar10
+
+echo "python main.py --mode shadow --model resnet152 --data cifar10"
+python main.py --mode shadow --model resnet152 --data cifar10
+
+echo "python main.py --mode distill_target --model resnet152 --data cifar10"
+python main.py --mode distill_target --model resnet152 --data cifar10
+
+echo "python main.py --mode distill_shadow --model resnet152 --data cifar10"
+python main.py --mode distill_shadow --model resnet152 --data cifar10
+
+echo "python main.py --action 1 --mode shadow --mia_type build-dataset --model resnet152  --model_distill resnet152 --data cifar10"
+python main.py --action 1 --mode shadow --mia_type build-dataset --model resnet152 --model_distill resnet152 --data cifar10
+
+echo "python main.py --action 1 --mode target --mia_type build-dataset --model resnet152 --model_distill resnet152 --data cifar10"
+python main.py --action 1 --mode target --mia_type build-dataset --model resnet152 --model_distill resnet152 --data cifar10
+
+echo "python main.py --action 1 --mia_type black-box --model resnet152 --model_distill resnet152 --data cifar10"
+python main.py --action 1 --mia_type black-box --model resnet152 --model_distill resnet152 --data cifar10 
+
+#- End
+echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
--- a/mzh/new_mzh/Loss_Trajectory_MIA/train_attack_152_100.sh
+++ b/mzh/new_mzh/Loss_Trajectory_MIA/train_attack_152_100.sh
+#!/bin/bash
+
+#- Job parameters
+
+# (TODO)
+# Please modify job name
+
+#SBATCH -J Tra_152_10           # The job name
+#SBATCH -o ./info/ret-%j.out        # Write the standard output to file named 'ret-<job_number>.out'
+#SBATCH -e ./info/ret-%j.err        # Write the standard error to file named 'ret-<job_number>.err'
+
+
+#- Resources
+
+# (TODO)
+# Please modify your requirements
+
+#SBATCH -p nv-gpu                    # Submit to 'nv-gpu' Partitiion
+#SBATCH -t 0-12:00:00                # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
+#SBATCH --nodes=1                    # Request N nodes
+#SBATCH --gres=gpu:1                 # Request M GPU per node
+#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
+#SBATCH --qos=gpu-normal           # Request QOS Type
+
+
+###
+### The system will alloc 8 or 16 cores per gpu by default.
+### If you need more or less, use following:
+### #SBATCH --cpus-per-task=K            # Request K cores
+###
+### 
+### Without specifying the constraint, any available nodes that meet the requirement will be allocated
+### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
+###
+### #SBATCH --nodelist=gpu-v00           # Request a specific list of hosts 
+### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
+###
+
+#- Log information
+
+echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
+echo "Job run at:"
+echo "$(hostnamectl)"
+
+#- Load environments
+source /tools/module_env.sh
+source ~/pyt1.5/bin/activate
+module list                       # list modules loaded
+
+##- Tools
+module load cluster-tools/v1.0
+module load slurm-tools/v1.0
+module load cmake/3.15.7
+module load git/2.17.1
+module load vim/8.1.2424
+
+##- language
+module load python3/3.6.8
+
+##- CUDA
+module load cuda-cudnn/11.1-8.1.1
+
+##- virtualenv
+# source xxxxx/activate
+
+echo $(module list)              # list modules loaded
+echo $(which gcc)
+echo $(which python)
+echo $(which python3)
+
+cluster-quota                    # nas quota
+
+nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
+
+#- Warning! Please not change your CUDA_VISIBLE_DEVICES
+#- in `.bashrc`, `env.sh`, or your job script
+echo "Use GPU ${CUDA_VISIBLE_DEVICES}"                              # which gpus
+#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
+
+#- Job step
+# [EDIT HERE(TODO)]
+sleep 2s
+hostname
+
+echo "python main.py --mode target --model resnet152 --data cifar100"
+python main.py --mode target --model resnet152 --data cifar100
+
+echo "python main.py --mode shadow --model resnet152 --data cifar100"
+python main.py --mode shadow --model resnet152 --data cifar100
+
+echo "python main.py --mode distill_target --model resnet152 --data cifar100"
+python main.py --mode distill_target --model resnet152 --data cifar100
+
+echo "python main.py --mode distill_shadow --model resnet152 --data cifar100"
+python main.py --mode distill_shadow --model resnet152 --data cifar100
+
+echo "python main.py --action 1 --mode shadow --mia_type build-dataset --model resnet152  --model_distill resnet152 --data cifar100"
+python main.py --action 1 --mode shadow --mia_type build-dataset --model resnet152 --model_distill resnet152 --data cifar100 
+
+echo "python main.py --action 1 --mode target --mia_type build-dataset --model resnet152 --model_distill resnet152 --data cifar100"
+python main.py --action 1 --mode target --mia_type build-dataset --model resnet152 --model_distill resnet152 --data cifar100 
+
+echo "python main.py --action 1 --mia_type black-box --model resnet152 --model_distill resnet152 --data cifar100"
+python main.py --action 1 --mia_type black-box --model resnet152 --model_distill resnet152 --data cifar100
+
+#- End
+echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
--- a/mzh/new_mzh/Loss_Trajectory_MIA/train_attack_152_cinic.sh
+++ b/mzh/new_mzh/Loss_Trajectory_MIA/train_attack_152_cinic.sh
+#!/bin/bash
+
+#- Job parameters
+
+# (TODO)
+# Please modify job name
+
+#SBATCH -J Tra_CIN_10           # The job name
+#SBATCH -o ./info/ret-%j.out        # Write the standard output to file named 'ret-<job_number>.out'
+#SBATCH -e ./info/ret-%j.err        # Write the standard error to file named 'ret-<job_number>.err'
+
+
+#- Resources
+
+# (TODO)
+# Please modify your requirements
+
+#SBATCH -p nv-gpu                    # Submit to 'nv-gpu' Partitiion
+#SBATCH -t 3-00:00:00                # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
+#SBATCH --nodes=1                    # Request N nodes
+#SBATCH --gres=gpu:1                 # Request M GPU per node
+#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
+#SBATCH --qos=gpu-long           # Request QOS Type
+
+
+###
+### The system will alloc 8 or 16 cores per gpu by default.
+### If you need more or less, use following:
+### #SBATCH --cpus-per-task=K            # Request K cores
+###
+### 
+### Without specifying the constraint, any available nodes that meet the requirement will be allocated
+### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
+###
+### #SBATCH --nodelist=gpu-v00           # Request a specific list of hosts 
+### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
+###
+
+#- Log information
+
+echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
+echo "Job run at:"
+echo "$(hostnamectl)"
+
+#- Load environments
+source /tools/module_env.sh
+source ~/pyt1.5/bin/activate
+module list                       # list modules loaded
+
+##- Tools
+module load cluster-tools/v1.0
+module load slurm-tools/v1.0
+module load cmake/3.15.7
+module load git/2.17.1
+module load vim/8.1.2424
+
+##- language
+module load python3/3.6.8
+
+##- CUDA
+module load cuda-cudnn/11.1-8.1.1
+
+##- virtualenv
+# source xxxxx/activate
+
+echo $(module list)              # list modules loaded
+echo $(which gcc)
+echo $(which python)
+echo $(which python3)
+
+cluster-quota                    # nas quota
+
+nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
+
+#- Warning! Please not change your CUDA_VISIBLE_DEVICES
+#- in `.bashrc`, `env.sh`, or your job script
+echo "Use GPU ${CUDA_VISIBLE_DEVICES}"                              # which gpus
+#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
+
+#- Job step
+# [EDIT HERE(TODO)]
+sleep 2s
+hostname
+
+echo "python main.py --mode target --model resnet152 --data cinic10"
+python main.py --mode target --model resnet152 --data cinic10
+
+echo "python main.py --mode shadow --model resnet152 --data cinic10"
+python main.py --mode shadow --model resnet152 --data cinic10
+
+echo "python main.py --mode distill_target --model resnet152 --data cinic10"
+python main.py --mode distill_target --model resnet152 --data cinic10
+
+echo "python main.py --mode distill_shadow --model resnet152 --data cinic10"
+python main.py --mode distill_shadow --model resnet152 --data cinic10
+
+echo "python main.py --action 1 --mode shadow --mia_type build-dataset --model resnet152  --model_distill resnet152 --data cinic10"
+python main.py --action 1 --mode shadow --mia_type build-dataset --model resnet152 --model_distill resnet152 --data cinic10 
+
+echo "python main.py --action 1 --mode target --mia_type build-dataset --model resnet152 --model_distill resnet152 --data cinic10"
+python main.py --action 1 --mode target --mia_type build-dataset --model resnet152 --model_distill resnet152 --data cinic10 
+
+echo "python main.py --action 1 --mia_type black-box --model resnet152 --model_distill resnet152 --data cinic10"
+python main.py --action 1 --mia_type black-box --model resnet152 --model_distill resnet152 --data cinic10
+
+#- End
+echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
--- a/mzh/new_mzh/Loss_Trajectory_MIA/train_attack_18_10.sh
+++ b/mzh/new_mzh/Loss_Trajectory_MIA/train_attack_18_10.sh
+#!/bin/bash
+
+#- Job parameters
+
+# (TODO)
+# Please modify job name
+
+#SBATCH -J Tra_10_18           # The job name
+#SBATCH -o ./info/ret-%j.out        # Write the standard output to file named 'ret-<job_number>.out'
+#SBATCH -e ./info/ret-%j.err        # Write the standard error to file named 'ret-<job_number>.err'
+
+
+#- Resources
+
+# (TODO)
+# Please modify your requirements
+
+#SBATCH -p nv-gpu                    # Submit to 'nv-gpu' Partitiion
+#SBATCH -t 1-00:00:00                # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
+#SBATCH --nodes=1                    # Request N nodes
+#SBATCH --gres=gpu:1                 # Request M GPU per node
+#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
+#SBATCH --qos=gpu-normal           # Request QOS Type
+
+
+###
+### The system will alloc 8 or 16 cores per gpu by default.
+### If you need more or less, use following:
+### #SBATCH --cpus-per-task=K            # Request K cores
+###
+### 
+### Without specifying the constraint, any available nodes that meet the requirement will be allocated
+### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
+###
+### #SBATCH --nodelist=gpu-v00           # Request a specific list of hosts 
+### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
+###
+
+#- Log information
+
+echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
+echo "Job run at:"
+echo "$(hostnamectl)"
+
+#- Load environments
+source /tools/module_env.sh
+source ~/pyt1.5/bin/activate
+module list                       # list modules loaded
+
+##- Tools
+module load cluster-tools/v1.0
+module load slurm-tools/v1.0
+module load cmake/3.15.7
+module load git/2.17.1
+module load vim/8.1.2424
+
+##- language
+module load python3/3.6.8
+
+##- CUDA
+module load cuda-cudnn/11.1-8.1.1
+
+##- virtualenv
+# source xxxxx/activate
+
+echo $(module list)              # list modules loaded
+echo $(which gcc)
+echo $(which python)
+echo $(which python3)
+
+cluster-quota                    # nas quota
+
+nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
+
+#- Warning! Please not change your CUDA_VISIBLE_DEVICES
+#- in `.bashrc`, `env.sh`, or your job script
+echo "Use GPU ${CUDA_VISIBLE_DEVICES}"                              # which gpus
+#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
+
+#- Job step
+# [EDIT HERE(TODO)]
+sleep 2s
+hostname
+
+echo "python main.py --mode target --model resnet18 --data cifar10"
+python main.py --mode target --model resnet18 --data cifar10
+
+echo "python main.py --mode shadow --model resnet18 --data cifar10"
+python main.py --mode shadow --model resnet18 --data cifar10
+
+echo "python main.py --mode distill_target --model resnet18 --data cifar10"
+python main.py --mode distill_target --model resnet18 --data cifar10
+
+echo "python main.py --mode distill_shadow --model resnet18 --data cifar10"
+python main.py --mode distill_shadow --model resnet18 --data cifar10
+
+echo "python main.py --action 1 --mode shadow --mia_type build-dataset --model resnet18 --model_distill resnet18 --data cifar10"
+python main.py --action 1 --mode shadow --mia_type build-dataset --model resnet18 --model_distill resnet18 --data cifar10
+
+echo "python main.py --action 1 --mode target --mia_type build-dataset --model resnet18 --model_distill resnet18 --data cifar10"
+python main.py --action 1 --mode target --mia_type build-dataset --model resnet18 --model_distill resnet18 --data cifar10
+
+echo "python main.py --action 1 --mia_type black-box --model resnet18 --model_distill resnet18 --data cifar10"
+python main.py --action 1 --mia_type black-box --model resnet18  --model_distill resnet18 --data cifar10
+
+#- End
+echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
--- a/mzh/new_mzh/Loss_Trajectory_MIA/train_attack_18_100.sh
+++ b/mzh/new_mzh/Loss_Trajectory_MIA/train_attack_18_100.sh
+#!/bin/bash
+
+#- Job parameters
+
+# (TODO)
+# Please modify job name
+
+#SBATCH -J Tra_C100_18           # The job name
+#SBATCH -o ./info/ret-%j.out        # Write the standard output to file named 'ret-<job_number>.out'
+#SBATCH -e ./info/ret-%j.err        # Write the standard error to file named 'ret-<job_number>.err'
+
+
+#- Resources
+
+# (TODO)
+# Please modify your requirements
+
+#SBATCH -p nv-gpu                    # Submit to 'nv-gpu' Partitiion
+#SBATCH -t 1-00:00:00                # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
+#SBATCH --nodes=1                    # Request N nodes
+#SBATCH --gres=gpu:1                 # Request M GPU per node
+#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
+#SBATCH --qos=gpu-normal           # Request QOS Type
+
+
+###
+### The system will alloc 8 or 16 cores per gpu by default.
+### If you need more or less, use following:
+### #SBATCH --cpus-per-task=K            # Request K cores
+###
+### 
+### Without specifying the constraint, any available nodes that meet the requirement will be allocated
+### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
+###
+### #SBATCH --nodelist=gpu-v00           # Request a specific list of hosts 
+### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
+###
+
+#- Log information
+
+echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
+echo "Job run at:"
+echo "$(hostnamectl)"
+
+#- Load environments
+source /tools/module_env.sh
+source ~/pyt1.5/bin/activate
+module list                       # list modules loaded
+
+##- Tools
+module load cluster-tools/v1.0
+module load slurm-tools/v1.0
+module load cmake/3.15.7
+module load git/2.17.1
+module load vim/8.1.2424
+
+##- language
+module load python3/3.6.8
+
+##- CUDA
+module load cuda-cudnn/11.1-8.1.1
+
+##- virtualenv
+# source xxxxx/activate
+
+echo $(module list)              # list modules loaded
+echo $(which gcc)
+echo $(which python)
+echo $(which python3)
+
+cluster-quota                    # nas quota
+
+nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
+
+#- Warning! Please not change your CUDA_VISIBLE_DEVICES
+#- in `.bashrc`, `env.sh`, or your job script
+echo "Use GPU ${CUDA_VISIBLE_DEVICES}"                              # which gpus
+#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
+
+#- Job step
+# [EDIT HERE(TODO)]
+sleep 2s
+hostname
+
+echo "python main.py --mode target --model resnet18 --data cifar100"
+python main.py --mode target --model resnet18 --data cifar100
+
+echo "python main.py --mode shadow --model resnet18 --data cifar100"
+python main.py --mode shadow --model resnet18 --data cifar100
+
+echo "python main.py --mode distill_target --model resnet18 --data cifar100"
+python main.py --mode distill_target --model resnet18 --data cifar100
+
+echo "python main.py --mode distill_shadow --model resnet18 --data cifar100"
+python main.py --mode distill_shadow --model resnet18 --data cifar100
+
+echo "python main.py --action 1 --mode shadow --mia_type build-dataset --model resnet18 --model_distill resnet18 --data cifar100"
+python main.py --action 1 --mode shadow --mia_type build-dataset --model resnet18 --model_distill resnet18 --data cifar100
+
+echo "python main.py --action 1 --mode target --mia_type build-dataset --model resnet18 --model_distill resnet18 --data cifar100"
+python main.py --action 1 --mode target --mia_type build-dataset --model resnet18 --model_distill resnet18 --data cifar100
+
+echo "python main.py --action 1 --mia_type black-box --model resnet18 --model_distill resnet18 --data cifar100"
+python main.py --action 1 --mia_type black-box --model resnet18  --model_distill resnet18 --data cifar100
+
+#- End
+echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
--- a/mzh/new_mzh/Loss_Trajectory_MIA/train_attack_18_cinic.sh
+++ b/mzh/new_mzh/Loss_Trajectory_MIA/train_attack_18_cinic.sh
+#!/bin/bash
+
+#- Job parameters
+
+# (TODO)
+# Please modify job name
+
+#SBATCH -J Tra_CIN_18           # The job name
+#SBATCH -o ./info/ret-%j.out        # Write the standard output to file named 'ret-<job_number>.out'
+#SBATCH -e ./info/ret-%j.err        # Write the standard error to file named 'ret-<job_number>.err'
+
+
+#- Resources
+
+# (TODO)
+# Please modify your requirements
+
+#SBATCH -p nv-gpu                    # Submit to 'nv-gpu' Partitiion
+#SBATCH -t 1-12:00:00                # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
+#SBATCH --nodes=1                    # Request N nodes
+#SBATCH --gres=gpu:1                 # Request M GPU per node
+#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
+#SBATCH --qos=gpu-long           # Request QOS Type
+
+
+###
+### The system will alloc 8 or 16 cores per gpu by default.
+### If you need more or less, use following:
+### #SBATCH --cpus-per-task=K            # Request K cores
+###
+### 
+### Without specifying the constraint, any available nodes that meet the requirement will be allocated
+### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
+###
+### #SBATCH --nodelist=gpu-v00           # Request a specific list of hosts 
+### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
+###
+
+#- Log information
+
+echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
+echo "Job run at:"
+echo "$(hostnamectl)"
+
+#- Load environments
+source /tools/module_env.sh
+source ~/pyt1.5/bin/activate
+module list                       # list modules loaded
+
+##- Tools
+module load cluster-tools/v1.0
+module load slurm-tools/v1.0
+module load cmake/3.15.7
+module load git/2.17.1
+module load vim/8.1.2424
+
+##- language
+module load python3/3.6.8
+
+##- CUDA
+module load cuda-cudnn/11.1-8.1.1
+
+##- virtualenv
+# source xxxxx/activate
+
+echo $(module list)              # list modules loaded
+echo $(which gcc)
+echo $(which python)
+echo $(which python3)
+
+cluster-quota                    # nas quota
+
+nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
+
+#- Warning! Please not change your CUDA_VISIBLE_DEVICES
+#- in `.bashrc`, `env.sh`, or your job script
+echo "Use GPU ${CUDA_VISIBLE_DEVICES}"                              # which gpus
+#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
+
+#- Job step
+# [EDIT HERE(TODO)]
+sleep 2s
+hostname
+
+echo "python main.py --mode target --model resnet18 --data cinic10"
+python main.py --mode target --model resnet18 --data cinic10
+
+echo "python main.py --mode shadow --model resnet18 --data cinic10"
+python main.py --mode shadow --model resnet18 --data cinic10
+
+echo "python main.py --mode distill_target --model resnet18 --data cinic10"
+python main.py --mode distill_target --model resnet18 --data cinic10
+
+echo "python main.py --mode distill_shadow --model resnet18 --data cinic10"
+python main.py --mode distill_shadow --model resnet18 --data cinic10
+
+echo "python main.py --action 1 --mode shadow --mia_type build-dataset --model resnet18 --model_distill resnet18 --data cinic10"
+python main.py --action 1 --mode shadow --mia_type build-dataset --model resnet18 --model_distill resnet18 --data cinic10
+
+echo "python main.py --action 1 --mode target --mia_type build-dataset --model resnet18 --model_distill resnet18 --data cinic10"
+python main.py --action 1 --mode target --mia_type build-dataset --model resnet18 --model_distill resnet18 --data cinic10
+
+echo "python main.py --action 1 --mia_type black-box --model resnet18 --model_distill resnet18 --data cinic10"
+python main.py --action 1 --mia_type black-box --model resnet18  --model_distill resnet18 --data cinic10
+
+#- End
+echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
--- a/mzh/new_mzh/Loss_Trajectory_MIA/train_attack_50_10.sh
+++ b/mzh/new_mzh/Loss_Trajectory_MIA/train_attack_50_10.sh
+#!/bin/bash
+
+#- Job parameters
+
+# (TODO)
+# Please modify job name
+
+#SBATCH -J Tra_50           # The job name
+#SBATCH -o ./info/ret-%j.out        # Write the standard output to file named 'ret-<job_number>.out'
+#SBATCH -e ./info/ret-%j.err        # Write the standard error to file named 'ret-<job_number>.err'
+
+
+#- Resources
+
+# (TODO)
+# Please modify your requirements
+
+#SBATCH -p nv-gpu                    # Submit to 'nv-gpu' Partitiion
+#SBATCH -t 0-12:00:00                # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
+#SBATCH --nodes=1                    # Request N nodes
+#SBATCH --gres=gpu:1                 # Request M GPU per node
+#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
+#SBATCH --qos=gpu-normal           # Request QOS Type
+
+
+###
+### The system will alloc 8 or 16 cores per gpu by default.
+### If you need more or less, use following:
+### #SBATCH --cpus-per-task=K            # Request K cores
+###
+### 
+### Without specifying the constraint, any available nodes that meet the requirement will be allocated
+### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
+###
+### #SBATCH --nodelist=gpu-v00           # Request a specific list of hosts 
+### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
+###
+
+#- Log information
+
+echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
+echo "Job run at:"
+echo "$(hostnamectl)"
+
+#- Load environments
+source /tools/module_env.sh
+source ~/pyt1.5/bin/activate
+module list                       # list modules loaded
+
+##- Tools
+module load cluster-tools/v1.0
+module load slurm-tools/v1.0
+module load cmake/3.15.7
+module load git/2.17.1
+module load vim/8.1.2424
+
+##- language
+module load python3/3.6.8
+
+##- CUDA
+module load cuda-cudnn/11.1-8.1.1
+
+##- virtualenv
+# source xxxxx/activate
+
+echo $(module list)              # list modules loaded
+echo $(which gcc)
+echo $(which python)
+echo $(which python3)
+
+cluster-quota                    # nas quota
+
+nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
+
+#- Warning! Please not change your CUDA_VISIBLE_DEVICES
+#- in `.bashrc`, `env.sh`, or your job script
+echo "Use GPU ${CUDA_VISIBLE_DEVICES}"                              # which gpus
+#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
+
+#- Job step
+# [EDIT HERE(TODO)]
+sleep 2s
+hostname
+
+echo "python main.py --mode target --model resnet50  "
+python main.py --mode target --model resnet50  
+
+echo "python main.py --mode shadow --model resnet50  "
+python main.py --mode shadow --model resnet50  
+
+echo "python main.py --mode distill_target --model resnet50  "
+python main.py --mode distill_target --model resnet50  
+
+echo "python main.py --mode distill_shadow --model resnet50  "
+python main.py --mode distill_shadow --model resnet50  
+
+echo "python main.py --action 1 --mode shadow --mia_type build-dataset --model resnet50 --model_distill resnet50  "
+python main.py --action 1 --mode shadow --mia_type build-dataset --model resnet50 --model_distill resnet50  
+
+echo "python main.py --action 1 --mode target --mia_type build-dataset --model resnet50 --model_distill resnet50  "
+python main.py --action 1 --mode target --mia_type build-dataset --model resnet50 --model_distill resnet50  
+
+echo "python main.py --action 1 --mia_type black-box --model resnet50 --model_distill resnet50  "
+python main.py --action 1 --mia_type black-box --model resnet50  --model_distill resnet50  
+
+#- End
+echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
--- a/mzh/new_mzh/Loss_Trajectory_MIA/train_attack_50_100.sh
+++ b/mzh/new_mzh/Loss_Trajectory_MIA/train_attack_50_100.sh
+#!/bin/bash
+
+#- Job parameters
+
+# (TODO)
+# Please modify job name
+
+#SBATCH -J Tra_50_10           # The job name
+#SBATCH -o ./info/ret-%j.out        # Write the standard output to file named 'ret-<job_number>.out'
+#SBATCH -e ./info/ret-%j.err        # Write the standard error to file named 'ret-<job_number>.err'
+
+
+#- Resources
+
+# (TODO)
+# Please modify your requirements
+
+#SBATCH -p nv-gpu                    # Submit to 'nv-gpu' Partitiion
+#SBATCH -t 0-12:00:00                # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
+#SBATCH --nodes=1                    # Request N nodes
+#SBATCH --gres=gpu:1                 # Request M GPU per node
+#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
+#SBATCH --qos=gpu-normal           # Request QOS Type
+
+
+###
+### The system will alloc 8 or 16 cores per gpu by default.
+### If you need more or less, use following:
+### #SBATCH --cpus-per-task=K            # Request K cores
+###
+### 
+### Without specifying the constraint, any available nodes that meet the requirement will be allocated
+### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
+###
+### #SBATCH --nodelist=gpu-v00           # Request a specific list of hosts 
+### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
+###
+
+#- Log information
+
+echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
+echo "Job run at:"
+echo "$(hostnamectl)"
+
+#- Load environments
+source /tools/module_env.sh
+source ~/pyt1.5/bin/activate
+module list                       # list modules loaded
+
+##- Tools
+module load cluster-tools/v1.0
+module load slurm-tools/v1.0
+module load cmake/3.15.7
+module load git/2.17.1
+module load vim/8.1.2424
+
+##- language
+module load python3/3.6.8
+
+##- CUDA
+module load cuda-cudnn/11.1-8.1.1
+
+##- virtualenv
+# source xxxxx/activate
+
+echo $(module list)              # list modules loaded
+echo $(which gcc)
+echo $(which python)
+echo $(which python3)
+
+cluster-quota                    # nas quota
+
+nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
+
+#- Warning! Please not change your CUDA_VISIBLE_DEVICES
+#- in `.bashrc`, `env.sh`, or your job script
+echo "Use GPU ${CUDA_VISIBLE_DEVICES}"                              # which gpus
+#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
+
+#- Job step
+# [EDIT HERE(TODO)]
+sleep 2s
+hostname
+
+echo "python main.py --mode target --model resnet50 --data cifar100"
+python main.py --mode target --model resnet50 --data cifar100
+
+echo "python main.py --mode shadow --model resnet50 --data cifar100"
+python main.py --mode shadow --model resnet50 --data cifar100
+
+echo "python main.py --mode distill_target --model resnet50 --data cifar100"
+python main.py --mode distill_target --model resnet50 --data cifar100
+
+echo "python main.py --mode distill_shadow --model resnet50 --data cifar100"
+python main.py --mode distill_shadow --model resnet50 --data cifar100
+
+echo "python main.py --action 1 --mode shadow --mia_type build-dataset --model resnet50 --model_distill resnet50 --data cifar100"
+python main.py --action 1 --mode shadow --mia_type build-dataset --model resnet50 --model_distill resnet50 --data cifar100
+
+echo "python main.py --action 1 --mode target --mia_type build-dataset --model resnet50 --model_distill resnet50 --data cifar100"
+python main.py --action 1 --mode target --mia_type build-dataset --model resnet50 --model_distill resnet50 --data cifar100
+
+echo "python main.py --action 1 --mia_type black-box --model resnet50 --model_distill resnet50 --data cifar100"
+python main.py --action 1 --mia_type black-box --model resnet50  --model_distill resnet50 --data cifar100
+
+#- End
+echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
--- a/mzh/new_mzh/Loss_Trajectory_MIA/train_attack_50_cinic.sh
+++ b/mzh/new_mzh/Loss_Trajectory_MIA/train_attack_50_cinic.sh
+#!/bin/bash
+
+#- Job parameters
+
+# (TODO)
+# Please modify job name
+
+#SBATCH -J Tra_CIN_50         # The job name
+#SBATCH -o ./info/ret-%j.out        # Write the standard output to file named 'ret-<job_number>.out'
+#SBATCH -e ./info/ret-%j.err        # Write the standard error to file named 'ret-<job_number>.err'
+
+
+#- Resources
+
+# (TODO)
+# Please modify your requirements
+
+#SBATCH -p nv-gpu                    # Submit to 'nv-gpu' Partitiion
+#SBATCH -t 1-12:00:00                # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
+#SBATCH --nodes=1                    # Request N nodes
+#SBATCH --gres=gpu:1                 # Request M GPU per node
+#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
+#SBATCH --qos=gpu-long           # Request QOS Type
+
+
+###
+### The system will alloc 8 or 16 cores per gpu by default.
+### If you need more or less, use following:
+### #SBATCH --cpus-per-task=K            # Request K cores
+###
+### 
+### Without specifying the constraint, any available nodes that meet the requirement will be allocated
+### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
+###
+### #SBATCH --nodelist=gpu-v00           # Request a specific list of hosts 
+### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
+###
+
+#- Log information
+
+echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
+echo "Job run at:"
+echo "$(hostnamectl)"
+
+#- Load environments
+source /tools/module_env.sh
+source ~/pyt1.5/bin/activate
+module list                       # list modules loaded
+
+##- Tools
+module load cluster-tools/v1.0
+module load slurm-tools/v1.0
+module load cmake/3.15.7
+module load git/2.17.1
+module load vim/8.1.2424
+
+##- language
+module load python3/3.6.8
+
+##- CUDA
+module load cuda-cudnn/11.1-8.1.1
+
+##- virtualenv
+# source xxxxx/activate
+
+echo $(module list)              # list modules loaded
+echo $(which gcc)
+echo $(which python)
+echo $(which python3)
+
+cluster-quota                    # nas quota
+
+nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
+
+#- Warning! Please not change your CUDA_VISIBLE_DEVICES
+#- in `.bashrc`, `env.sh`, or your job script
+echo "Use GPU ${CUDA_VISIBLE_DEVICES}"                              # which gpus
+#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
+
+#- Job step
+# [EDIT HERE(TODO)]
+sleep 2s
+hostname
+
+echo "python main.py --mode target --model resnet50 --data cinic10"
+python main.py --mode target --model resnet50 --data cinic10
+
+echo "python main.py --mode shadow --model resnet50 --data cinic10"
+python main.py --mode shadow --model resnet50 --data cinic10
+
+echo "python main.py --mode distill_target --model resnet50 --data cinic10"
+python main.py --mode distill_target --model resnet50 --data cinic10
+
+echo "python main.py --mode distill_shadow --model resnet50 --data cinic10"
+python main.py --mode distill_shadow --model resnet50 --data cinic10
+
+echo "python main.py --action 1 --mode shadow --mia_type build-dataset --model resnet50 --model_distill resnet50 --data cinic10"
+python main.py --action 1 --mode shadow --mia_type build-dataset --model resnet50 --model_distill resnet50 --data cinic10
+
+echo "python main.py --action 1 --mode target --mia_type build-dataset --model resnet50 --model_distill resnet50 --data cinic10"
+python main.py --action 1 --mode target --mia_type build-dataset --model resnet50 --model_distill resnet50 --data cinic10
+
+echo "python main.py --action 1 --mia_type black-box --model resnet50 --model_distill resnet50 --data cinic10"
+python main.py --action 1 --mia_type black-box --model resnet50  --model_distill resnet50 --data cinic10
+
+#- End
+echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
--- a/mzh/new_mzh/Loss_Trajectory_MIA/train_attack_mobile_10.sh
+++ b/mzh/new_mzh/Loss_Trajectory_MIA/train_attack_mobile_10.sh
+#!/bin/bash
+
+#- Job parameters
+
+# (TODO)
+# Please modify job name
+
+#SBATCH -J Tra_Mobile          # The job name
+#SBATCH -o ./info/ret-%j.out        # Write the standard output to file named 'ret-<job_number>.out'
+#SBATCH -e ./info/ret-%j.err        # Write the standard error to file named 'ret-<job_number>.err'
+
+
+#- Resources
+
+# (TODO)
+# Please modify your requirements
+
+#SBATCH -p nv-gpu                    # Submit to 'nv-gpu' Partitiion
+#SBATCH -t 0-12:00:00                # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
+#SBATCH --nodes=1                    # Request N nodes
+#SBATCH --gres=gpu:1                 # Request M GPU per node
+#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
+#SBATCH --qos=gpu-normal           # Request QOS Type
+
+
+###
+### The system will alloc 8 or 16 cores per gpu by default.
+### If you need more or less, use following:
+### #SBATCH --cpus-per-task=K            # Request K cores
+###
+### 
+### Without specifying the constraint, any available nodes that meet the requirement will be allocated
+### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
+###
+### #SBATCH --nodelist=gpu-v00           # Request a specific list of hosts 
+### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
+###
+
+#- Log information
+
+echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
+echo "Job run at:"
+echo "$(hostnamectl)"
+
+#- Load environments
+source /tools/module_env.sh
+source ~/pyt1.5/bin/activate
+module list                       # list modules loaded
+
+##- Tools
+module load cluster-tools/v1.0
+module load slurm-tools/v1.0
+module load cmake/3.15.7
+module load git/2.17.1
+module load vim/8.1.2424
+
+##- language
+module load python3/3.6.8
+
+##- CUDA
+module load cuda-cudnn/11.1-8.1.1
+
+##- virtualenv
+# source xxxxx/activate
+
+echo $(module list)              # list modules loaded
+echo $(which gcc)
+echo $(which python)
+echo $(which python3)
+
+cluster-quota                    # nas quota
+
+nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
+
+#- Warning! Please not change your CUDA_VISIBLE_DEVICES
+#- in `.bashrc`, `env.sh`, or your job script
+echo "Use GPU ${CUDA_VISIBLE_DEVICES}"                              # which gpus
+#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
+
+#- Job step
+# [EDIT HERE(TODO)]
+sleep 2s
+hostname
+
+echo "python main.py --mode target --model mobilenetv2"
+python main.py --mode target --model mobilenetv2
+
+echo "python main.py --mode shadow --model mobilenetv2"
+python main.py --mode shadow --model mobilenetv2
+
+echo "python main.py --mode distill_target --model mobilenetv2"
+python main.py --mode distill_target --model mobilenetv2
+
+echo "python main.py --mode distill_shadow --model mobilenetv2"
+python main.py --mode distill_shadow --model mobilenetv2
+
+echo "python main.py --action 1 --mode shadow --mia_type build-dataset --model mobilenetv2 --model_distill mobilenetv2 "
+python main.py --action 1 --mode shadow --mia_type build-dataset --model mobilenetv2  --model_distill mobilenetv2
+
+echo "python main.py --action 1 --mode target --mia_type build-dataset --model mobilenetv2 --model_distill mobilenetv2"
+python main.py --action 1 --mode target --mia_type build-dataset --model mobilenetv2 --model_distill mobilenetv2
+
+echo "python main.py --action 1 --mia_type black-box --model mobilenetv2 --model_distill mobilenetv2"
+python main.py --action 1 --mia_type black-box --model mobilenetv2 --model_distill mobilenetv2
+
+#- End
+echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
--- a/mzh/new_mzh/Loss_Trajectory_MIA/train_attack_mobile_100.sh
+++ b/mzh/new_mzh/Loss_Trajectory_MIA/train_attack_mobile_100.sh
+#!/bin/bash
+
+#- Job parameters
+
+# (TODO)
+# Please modify job name
+
+#SBATCH -J Tra_100_Mobile          # The job name
+#SBATCH -o ./info/ret-%j.out        # Write the standard output to file named 'ret-<job_number>.out'
+#SBATCH -e ./info/ret-%j.err        # Write the standard error to file named 'ret-<job_number>.err'
+
+
+#- Resources
+
+# (TODO)
+# Please modify your requirements
+
+#SBATCH -p nv-gpu                    # Submit to 'nv-gpu' Partitiion
+#SBATCH -t 0-12:00:00                # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
+#SBATCH --nodes=1                    # Request N nodes
+#SBATCH --gres=gpu:1                 # Request M GPU per node
+#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
+#SBATCH --qos=gpu-normal           # Request QOS Type
+
+
+###
+### The system will alloc 8 or 16 cores per gpu by default.
+### If you need more or less, use following:
+### #SBATCH --cpus-per-task=K            # Request K cores
+###
+### 
+### Without specifying the constraint, any available nodes that meet the requirement will be allocated
+### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
+###
+### #SBATCH --nodelist=gpu-v00           # Request a specific list of hosts 
+### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
+###
+
+#- Log information
+
+echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
+echo "Job run at:"
+echo "$(hostnamectl)"
+
+#- Load environments
+source /tools/module_env.sh
+source ~/pyt1.5/bin/activate
+module list                       # list modules loaded
+
+##- Tools
+module load cluster-tools/v1.0
+module load slurm-tools/v1.0
+module load cmake/3.15.7
+module load git/2.17.1
+module load vim/8.1.2424
+
+##- language
+module load python3/3.6.8
+
+##- CUDA
+module load cuda-cudnn/11.1-8.1.1
+
+##- virtualenv
+# source xxxxx/activate
+
+echo $(module list)              # list modules loaded
+echo $(which gcc)
+echo $(which python)
+echo $(which python3)
+
+cluster-quota                    # nas quota
+
+nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
+
+#- Warning! Please not change your CUDA_VISIBLE_DEVICES
+#- in `.bashrc`, `env.sh`, or your job script
+echo "Use GPU ${CUDA_VISIBLE_DEVICES}"                              # which gpus
+#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
+
+#- Job step
+# [EDIT HERE(TODO)]
+sleep 2s
+hostname
+
+echo "python main.py --mode target --model mobilenetv2 --data cifar100"
+python main.py --mode target --model mobilenetv2 --data cifar100
+
+echo "python main.py --mode shadow --model mobilenetv2 --data cifar100"
+python main.py --mode shadow --model mobilenetv2 --data cifar100
+
+echo "python main.py --mode distill_target --model mobilenetv2 --data cifar100"
+python main.py --mode distill_target --model mobilenetv2 --data cifar100
+
+echo "python main.py --mode distill_shadow --model mobilenetv2 --data cifar100"
+python main.py --mode distill_shadow --model mobilenetv2 --data cifar100
+
+echo "python main.py --action 1 --mode shadow --mia_type build-dataset --model mobilenetv2 --data cifar100 --model_distill mobilenetv2"
+python main.py --action 1 --mode shadow --mia_type build-dataset --model mobilenetv2 --model_distill mobilenetv2 --data cifar100
+
+echo "python main.py --action 1 --mode target --mia_type build-dataset --model mobilenetv2 --data cifar100 --model_distill mobilenetv2"
+python main.py --action 1 --mode target --mia_type build-dataset --model mobilenetv2 --model_distill mobilenetv2 --data cifar100
+
+echo "python main.py --action 1 --mia_type black-box --model mobilenetv2  --model_distill mobilenetv2 --data cifar100"
+python main.py --action 1 --mia_type black-box --model mobilenetv2 --model_distill mobilenetv2 --data cifar100
+
+#- End
+echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
--- a/mzh/new_mzh/Loss_Trajectory_MIA/train_attack_mobile_cinic.sh
+++ b/mzh/new_mzh/Loss_Trajectory_MIA/train_attack_mobile_cinic.sh
+#!/bin/bash
+
+#- Job parameters
+
+# (TODO)
+# Please modify job name
+
+#SBATCH -J Tra_CIN_Mobile          # The job name
+#SBATCH -o ./info/ret-%j.out        # Write the standard output to file named 'ret-<job_number>.out'
+#SBATCH -e ./info/ret-%j.err        # Write the standard error to file named 'ret-<job_number>.err'
+
+
+#- Resources
+
+# (TODO)
+# Please modify your requirements
+
+#SBATCH -p nv-gpu                    # Submit to 'nv-gpu' Partitiion
+#SBATCH -t 3-00:00:00                # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
+#SBATCH --nodes=1                    # Request N nodes
+#SBATCH --gres=gpu:1                 # Request M GPU per node
+#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
+#SBATCH --qos=gpu-long           # Request QOS Type
+
+
+###
+### The system will alloc 8 or 16 cores per gpu by default.
+### If you need more or less, use following:
+### #SBATCH --cpus-per-task=K            # Request K cores
+###
+### 
+### Without specifying the constraint, any available nodes that meet the requirement will be allocated
+### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
+###
+### #SBATCH --nodelist=gpu-v00           # Request a specific list of hosts 
+### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
+###
+
+#- Log information
+
+echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
+echo "Job run at:"
+echo "$(hostnamectl)"
+
+#- Load environments
+source /tools/module_env.sh
+source ~/pyt1.5/bin/activate
+module list                       # list modules loaded
+
+##- Tools
+module load cluster-tools/v1.0
+module load slurm-tools/v1.0
+module load cmake/3.15.7
+module load git/2.17.1
+module load vim/8.1.2424
+
+##- language
+module load python3/3.6.8
+
+##- CUDA
+module load cuda-cudnn/11.1-8.1.1
+
+##- virtualenv
+# source xxxxx/activate
+
+echo $(module list)              # list modules loaded
+echo $(which gcc)
+echo $(which python)
+echo $(which python3)
+
+cluster-quota                    # nas quota
+
+nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
+
+#- Warning! Please not change your CUDA_VISIBLE_DEVICES
+#- in `.bashrc`, `env.sh`, or your job script
+echo "Use GPU ${CUDA_VISIBLE_DEVICES}"                              # which gpus
+#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
+
+#- Job step
+# [EDIT HERE(TODO)]
+sleep 2s
+hostname
+
+echo "python main.py --mode target --model mobilenetv2 --data cinic10"
+python main.py --mode target --model mobilenetv2 --data cinic10
+
+echo "python main.py --mode shadow --model mobilenetv2 --data cinic10"
+python main.py --mode shadow --model mobilenetv2 --data cinic10
+
+echo "python main.py --mode distill_target --model mobilenetv2 --data cinic10"
+python main.py --mode distill_target --model mobilenetv2 --data cinic10
+
+echo "python main.py --mode distill_shadow --model mobilenetv2 --data cinic10"
+python main.py --mode distill_shadow --model mobilenetv2 --data cinic10
+
+echo "python main.py --action 1 --mode shadow --mia_type build-dataset --model mobilenetv2 --data cinic10 --model_distill mobilenetv2"
+python main.py --action 1 --mode shadow --mia_type build-dataset --model mobilenetv2 --model_distill mobilenetv2 --data cinic10
+
+echo "python main.py --action 1 --mode target --mia_type build-dataset --model mobilenetv2 --data cinic10 --model_distill mobilenetv2"
+python main.py --action 1 --mode target --mia_type build-dataset --model mobilenetv2 --model_distill mobilenetv2 --data cinic10
+
+echo "python main.py --action 1 --mia_type black-box --model mobilenetv2  --model_distill mobilenetv2 --data cinic10"
+python main.py --action 1 --mia_type black-box --model mobilenetv2  --model_distill mobilenetv2 --data cinic10
+
+#- End
+echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
--- a/mzh/new_mzh/Loss_Trajectory_MIA/train_plot.sh
+++ b/mzh/new_mzh/Loss_Trajectory_MIA/train_plot.sh
+#!/bin/bash
+
+#- Job parameters
+
+# (TODO)
+# Please modify job name
+
+#SBATCH -J PLOT          # The job name
+#SBATCH -o ./info/ret-%j.out        # Write the standard output to file named 'ret-<job_number>.out'
+#SBATCH -e ./info/ret-%j.err        # Write the standard error to file named 'ret-<job_number>.err'
+
+
+#- Resources
+
+# (TODO)
+# Please modify your requirements
+
+#SBATCH -p nv-gpu                    # Submit to 'nv-gpu' Partitiion
+#SBATCH -t 0-12:00:00                # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
+#SBATCH --nodes=1                    # Request N nodes
+#SBATCH --gres=gpu:1                 # Request M GPU per node
+#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
+#SBATCH --qos=gpu-normal           # Request QOS Type
+
+
+###
+### The system will alloc 8 or 16 cores per gpu by default.
+### If you need more or less, use following:
+### #SBATCH --cpus-per-task=K            # Request K cores
+###
+### 
+### Without specifying the constraint, any available nodes that meet the requirement will be allocated
+### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
+###
+### #SBATCH --nodelist=gpu-v00           # Request a specific list of hosts 
+### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
+###
+
+#- Log information
+
+echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
+echo "Job run at:"
+echo "$(hostnamectl)"
+
+#- Load environments
+source /tools/module_env.sh
+source ~/pyt1.5/bin/activate
+module list                       # list modules loaded
+
+##- Tools
+module load cluster-tools/v1.0
+module load slurm-tools/v1.0
+module load cmake/3.15.7
+module load git/2.17.1
+module load vim/8.1.2424
+
+##- language
+module load python3/3.6.8
+
+##- CUDA
+module load cuda-cudnn/11.1-8.1.1
+
+##- virtualenv
+# source xxxxx/activate
+
+echo $(module list)              # list modules loaded
+echo $(which gcc)
+echo $(which python)
+echo $(which python3)
+
+cluster-quota                    # nas quota
+
+nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
+
+#- Warning! Please not change your CUDA_VISIBLE_DEVICES
+#- in `.bashrc`, `env.sh`, or your job script
+echo "Use GPU ${CUDA_VISIBLE_DEVICES}"                              # which gpus
+#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
+
+#- Job step
+# [EDIT HERE(TODO)]
+sleep 2s
+hostname
+
+
+echo "python plot.py --model resnet50 --model_distill resnet50 --data cifar100"
+python plot.py --model resnet50 --model_distill resnet50 --data cifar100
+
+
+
+#- End
+echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
--- a/mzh/new_mzh/Loss_Trajectory_MIA/utils.py
+++ b/mzh/new_mzh/Loss_Trajectory_MIA/utils.py
+import torch
+import numpy as np
+import random
+import sys
+import time
+import os
+import dataset
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.nn import CrossEntropyLoss
+from torch.optim import SGD, Adam
+from torch.optim.lr_scheduler import _LRScheduler, CosineAnnealingLR
+from bisect import bisect_right
+from normal import save_model
+
+def set_random_seeds(seed):
+    np.random.seed(seed)
+    torch.manual_seed(seed) 
+    random.seed(seed)
+    torch.cuda.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed) 
+    torch.backends.cudnn.benchmark=False
+    torch.backends.cudnn.deterministic = True
+
+def get_pytorch_device():
+    device = 'cpu'
+    cuda = torch.cuda.is_available()
+    print('Using Pytorch version:', torch.__version__, 'CUDA:', cuda)
+    if cuda:
+        device = 'cuda'
+    return device
+
+class MultiStepMultiLR(_LRScheduler):
+    def __init__(self, optimizer, milestones, gammas, last_epoch=-1):
+        if not list(milestones) == sorted(milestones):
+            raise ValueError('Milestones should be a list of'
+                             ' increasing integers. Got {}', milestones)
+        self.milestones = milestones
+        self.gammas = gammas
+        super(MultiStepMultiLR, self).__init__(optimizer, last_epoch)
+
+    def get_lr(self):
+        lrs = []
+        for base_lr in self.base_lrs:
+            cur_milestone = bisect_right(self.milestones, self.last_epoch)
+            new_lr = base_lr * np.prod(self.gammas[:cur_milestone])
+            new_lr = round(new_lr,8)
+            lrs.append(new_lr)
+        return lrs
+
+class Logger(object):
+    def __init__(self, log_file, mode='out'):
+
+        # write to terminal
+        if mode == 'out':
+            self.terminal = sys.stdout
+        else:
+            self.terminal = sys.stderr
+        self.log= open('{}.{}'.format(log_file, mode), "a")
+    def write(self, message):
+        self.terminal.write(message)
+        self.terminal.flush()
+        self.log.write(message)
+        self.log.flush()
+
+    def flush(self):
+        self.terminal.flush()
+        self.log.flush()
+
+    def __del__(self):
+        self.log.close()
+
+def set_logger(log_file):
+    sys.stdout = Logger(log_file, 'out')
+
+def create_path(path):
+    if not os.path.exists(path):
+        os.makedirs(path, exist_ok=True)
+
+def get_lr(optimizers):
+    if isinstance(optimizers, dict):
+        return optimizers[list(optimizers.keys())[-1]].param_groups[-1]['lr']
+    else:
+        return optimizers.param_groups[-1]['lr']
+
+def get_loss_criterion():
+    return CrossEntropyLoss()
+
+class Flatten(nn.Module):
+    def forward(self, input):
+        return input.view(input.size(0), -1)
+
+def cnn_test(model, loader, device='cpu'):
+    model.eval()
+    top1 = dataset.AverageMeter()
+    top5 = dataset.AverageMeter()
+
+    with torch.no_grad():
+        for batch in loader:
+            b_x = batch[0].to(device)
+            b_y = batch[1].to(device)
+            output = model(b_x)
+            prec1, prec5 = dataset.accuracy(output, b_y, topk=(1, 5))
+            top1.update(prec1[0], b_x.size(0))
+            top5.update(prec5[0], b_x.size(0))
+
+    top1_acc = top1.avg.data.cpu().numpy()[()]
+    top5_acc = top5.avg.data.cpu().numpy()[()]
+
+    return top1_acc, top5_acc
+
+#  每一步的train
+def cnn_training_step(model, optimizer, data, labels, device='cpu'):
+    b_x = data.to(device) 
+    b_y = labels.to(device)  
+    output = model(b_x)         
+    criterion = get_loss_criterion()
+    loss = criterion(output, b_y) 
+    optimizer.zero_grad()           
+    loss.backward()                 
+    optimizer.step() 
+
+# 直接的train函数，具有通用性
+def cnn_train(args, model, data, epochs, optimizer, scheduler, model_params, model_path, trained_model_name, device='cpu'):
+    metrics = {'epoch_times':[], 'test_top1_acc':[], 'test_top5_acc':[], 'train_top1_acc':[], 'train_top5_acc':[], 'lrs':[]}
+
+    for epoch in range(1, epochs+1):
+        
+        cur_lr = get_lr(optimizer)
+
+        if not hasattr(model, 'augment_training') or model.augment_training:
+            if args.mode == 'target':
+                print('load aug_target_dataset ... ')
+                train_loader = data.aug_target_train_loader
+                test_loader = data.aug_target_test_loader
+            elif args.mode == 'shadow':
+                print('load aug_shadow_dataset ...')
+                train_loader = data.aug_shadow_train_loader
+                test_loader = data.aug_shadow_test_loader
+        else:
+            if args.mode == 'target':
+                print('load target_dataset ... ')
+                train_loader = data.target_train_loader
+                test_loader = data.target_test_loader
+            elif args.mode == 'shadow':
+                print('load shadow_dataset ...')
+                train_loader = data.shadow_train_loader
+                test_loader = data.shadow_test_loader
+
+        start_time = time.time()
+        model.train()
+        print('Epoch: {}/{}'.format(epoch, epochs))
+        print('Cur lr: {}'.format(cur_lr))
+        for x, y, idx in train_loader:
+            cnn_training_step(model, optimizer, x, y, device)
+        end_time = time.time()
+    
+       # 没有用单独的val (直接用的test)
+        top1_test, top5_test = cnn_test(model, test_loader, device)
+        print('Top1 Test accuracy: {}'.format(top1_test))
+        print('Top5 Test accuracy: {}'.format(top5_test))
+        metrics['test_top1_acc'].append(top1_test)
+        metrics['test_top5_acc'].append(top5_test)
+
+        top1_train, top5_train = cnn_test(model, train_loader, device)
+        print('Top1 Train accuracy: {}'.format(top1_train))
+        print('Top5 Train accuracy: {}'.format(top5_train))
+        metrics['train_top1_acc'].append(top1_train)
+        metrics['train_top5_acc'].append(top5_train)
+        epoch_time = int(end_time-start_time)
+        print('Epoch took {} seconds.'.format(epoch_time))
+        metrics['epoch_times'].append(epoch_time)
+
+        metrics['lrs'].append(cur_lr)
+        scheduler.step()
+        
+        model_params['train_top1_acc'] = metrics['train_top1_acc']
+        model_params['test_top1_acc'] = metrics['test_top1_acc']
+        model_params['train_top5_acc'] = metrics['train_top5_acc']
+        model_params['test_top5_acc'] = metrics['test_top5_acc']
+        model_params['epoch_times'] = metrics['epoch_times']
+        model_params['lrs'] = metrics['lrs']
+        total_training_time = sum(model_params['epoch_times'])
+        model_params['total_time'] = total_training_time
+        print('Training took {} seconds...'.format(total_training_time))
+
+    return metrics
+
+
+# 做model distill的每一步具体训练 (在对trained Target/Shadow Model通过KL散度做distill)
+def cnn_training_step_dis(model, model_dis, optimizer, data, labels, device='cpu'):
+    b_x = data.to(device)   
+    # 不会用label的
+    b_y_1 = labels.to(device)   
+    output = model_dis(b_x)    
+    # distill model和 target/shadow model 做蒸馏 学习   loss对比的不再是标签，而是output    (target/shadow model的output应该是采用的trained model的吧，optimizer不会对其参数更新，只会更新distill model的权值参数)
+    b_y = model(b_x)
+    loss = nn.KLDivLoss(reduction='batchmean')(F.log_softmax(output, dim=1), F.softmax(b_y, dim=1))
+    optimizer.zero_grad()           
+    loss.backward()                 
+    optimizer.step() 
+
+# 对distill model的训练
+def cnn_train_dis(args, model, model_dis, data, epochs, optimizer, scheduler, model_params, model_path, trained_model_name, device='cpu'):
+    metrics = {'epoch_times':[], 'test_top1_acc':[], 'test_top5_acc':[], 'train_top1_acc':[], 'train_top5_acc':[], 'lrs':[]}
+
+    for epoch in range(1, epochs+1):
+        
+        cur_lr = get_lr(optimizer)
+
+        if not hasattr(model, 'augment_training') or model.augment_training:
+            print(f'load aug_{args.mode}_dataset ...')
+            train_loader = data.aug_distill_train_loader
+            test_loader = data.aug_distill_test_loader 
+        else:
+            print(f'load {args.mode}_dataset ...')
+            train_loader = data.distill_train_loader
+            test_loader = data.distill_test_loader
+
+        start_time = time.time()
+        model = model.to(device)
+        model_dis = model_dis.to(device)
+        model_dis.train() # 只更新distill model
+        model.eval()  # 不会更新target model或者shadow model
+        print('Epoch: {}/{}'.format(epoch, epochs))
+        print('Cur lr: {}'.format(cur_lr))
+        for i, (x, y, idx)  in enumerate(train_loader):
+            cnn_training_step_dis(model, model_dis, optimizer, x, y, device)
+        end_time = time.time()
+    
+        top1_test, top5_test = cnn_test(model_dis, test_loader, device)
+        print('Top1 Test accuracy: {}'.format(top1_test))
+        print('Top5 Test accuracy: {}'.format(top5_test))
+        metrics['test_top1_acc'].append(top1_test)
+        metrics['test_top5_acc'].append(top5_test)
+
+        top1_train, top5_train = cnn_test(model_dis, train_loader, device)
+        print('Top1 Train accuracy: {}'.format(top1_train))
+        print('Top5 Train accuracy: {}'.format(top5_train))
+        metrics['train_top1_acc'].append(top1_train)
+        metrics['train_top5_acc'].append(top5_train)
+        epoch_time = int(end_time-start_time)
+        print('Epoch took {} seconds.'.format(epoch_time))
+        metrics['epoch_times'].append(epoch_time)
+
+        metrics['lrs'].append(cur_lr)
+        scheduler.step()
+        
+        model_params['train_top1_acc'] = metrics['train_top1_acc']
+        model_params['test_top1_acc'] = metrics['test_top1_acc']
+        model_params['train_top5_acc'] = metrics['train_top5_acc']
+        model_params['test_top5_acc'] = metrics['test_top5_acc']
+        model_params['epoch_times'] = metrics['epoch_times']
+        model_params['lrs'] = metrics['lrs']
+        total_training_time = sum(model_params['epoch_times'])
+        model_params['total_time'] = total_training_time
+        print('Training took {} seconds...'.format(total_training_time))
+        save_model(model_dis, model_params, model_path, trained_model_name, epoch=epoch)
+
+    return metrics
+
+def get_dataset(dataset, mode, aug=False, batch_size=512, add_trigger=False):
+    if dataset == 'cifar10':
+        return load_cifar10(mode, aug, batch_size, add_trigger)
+    elif dataset == 'gtsrb':
+        return load_gtsrb(mode, aug, batch_size, add_trigger)
+    elif dataset == 'cinic10':
+        return load_cinic10(mode, aug, batch_size, add_trigger)
+    elif dataset == 'cifar100':
+        return load_cifar100(mode, aug, batch_size)
+
+def load_gtsrb(mode, aug, batch_size, add_trigger=False):
+    gtsrb_data = dataset.GTSRB(mode, aug, batch_size=batch_size)
+    return gtsrb_data
+    
+def load_cinic10(mode, aug, batch_size, add_trigger=False):
+    cinic10_data = dataset.CINIC10(mode, aug, batch_size=batch_size, add_trigger=add_trigger)
+    return cinic10_data
+
+def load_cifar10(mode, aug, batch_size, add_trigger=False):
+    cifar10_data = dataset.CIFAR10(mode, aug, batch_size=batch_size, add_trigger=add_trigger)
+    return cifar10_data
+
+def load_cifar100(mode, aug, batch_size):
+    cifar100_data = dataset.CIFAR100(mode, aug, batch_size=batch_size)
+    return cifar100_data
+
+
+# TODO 修改训练策略
+def get_full_optimizer(model, lr_params, args):
+    lr=lr_params[0]
+    weight_decay=lr_params[1]
+    momentum=lr_params[2]
+    optimizer = SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=lr, momentum=momentum, weight_decay=weight_decay)
+    # optimizer = Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=lr)
+    scheduler = CosineAnnealingLR(optimizer, args.epochs)
+
+    return optimizer, scheduler