Merge branch 'master' of http://62.234.201.16/hao/Model-Transfer-Adaptability

268e9b5c · Klin · e13f1f29 · 204f7fcb · 268e9b5c · 268e9b5c
Commit 268e9b5c authored Jul 12, 2023 by Klin
30 changed files
--- a/mzh/new_mzh/ALL_2/MIA.py
+++ b/mzh/new_mzh/ALL_2/MIA.py
@@ -181,7 +181,7 @@ def build_trajectory_membership_dataset(args, ori_model_path, device='cpu'):
            gol.set_value(plist)
    # load这些非distill model只是为了获得最后一个loss
-    # 对于量化后的 这里应该要改吧，需要load量化后的模型来做推理
+    # 对于量化后的，需要load量化后的模型来做推理
    if args.mode == 'target':
        cnn_model, cnn_params = normal.load_model(args, ori_model_path+'/target', model_name, epoch=args.epochs)
@@ -206,22 +206,17 @@ def build_trajectory_membership_dataset(args, ori_model_path, device='cpu'):
    # params['task']中记录用什么数据集
-    # 尝试调小batch size看看resnet50/152是否还会爆显存
    # 这里调小了batchsize=128会导致ResNet_18的attack acc相比于batchsize=512时下降2%
-    dataset = mia_utils.get_dataset(cnn_params['task'], mode=args.mode, aug=True, batch_size=512)
+    dataset = mia_utils.get_dataset(cnn_params['task'], mode=args.mode, aug=True, batch_size=256)
    if args.mode == 'target':
        print('load target_dataset ... ')
        train_loader = dataset.aug_target_train_loader
-        # test set不再做data augmentation
-        # test_loader = dataset.aug_target_test_loader
        test_loader = dataset.target_test_loader
    elif args.mode == 'shadow':
        print('load shadow_dataset ... ')
        train_loader = dataset.aug_shadow_train_loader
-        # test_loader = dataset.aug_shadow_test_loader
        test_loader = dataset.shadow_test_loader
    model_top1 = None
@@ -352,7 +347,6 @@ def trajectory_black_box_membership_inference_attack(args, models_path, device='
    epoch = 0
    # 这里如果要加val set，就直接把trainset按8：2划分
-    # AttackModelTrainSet = np.load(models_path + f'/shadow/{model_name}/trajectory_train_data.npy', allow_pickle=True).item()
    if args.quant_type is None:
        AttackModelTrainSet = np.load(models_path + f'/shadow/{model_name}/trajectory_train_data.npy', allow_pickle=True).item()
    else:
@@ -377,7 +371,7 @@ def trajectory_black_box_membership_inference_attack(args, models_path, device='
            # train/test i.e. in or out
            torch.from_numpy(np.array(check_and_transform_label_format(AttackModelTrainSet['predicted_status'], nb_classes=2, return_one_hot=True)[:,:2])).type(torch.long),
            torch.from_numpy(np.array(AttackModelTrainSet['member_status'])).type(torch.long),)
-        attack_train_loader = torch.utils.data.DataLoader(train_set, batch_size=128, shuffle=True)
+        attack_train_loader = torch.utils.data.DataLoader(train_set, batch_size=512, shuffle=True)
    test_set = torch.utils.data.TensorDataset(
            torch.from_numpy(np.array(AttackModelTestSet['model_loss_ori'], dtype='f')),
@@ -387,7 +381,7 @@ def trajectory_black_box_membership_inference_attack(args, models_path, device='
            # train/test i.e. in or out
            torch.from_numpy(np.array(check_and_transform_label_format(AttackModelTestSet['predicted_status'], nb_classes=2, return_one_hot=True)[:,:2])).type(torch.long),
            torch.from_numpy(np.array(AttackModelTestSet['member_status'])).type(torch.long),)
-    attack_test_loader = torch.utils.data.DataLoader(test_set, batch_size=128, shuffle=True)
+    attack_test_loader = torch.utils.data.DataLoader(test_set, batch_size=512, shuffle=True)
    print(f'-------------------"Loss Trajectory"------------------')
     # 训练Attack Model
@@ -508,8 +502,6 @@ def get_trajectory(data, target, args, model_path, device='cpu'):
    print(f"MODEL NAME IS :{model_name}")
    trajectory = None
    # 创建一个形状为 (data.shape[0], 1) 的 NumPy 数组 predicted_label，并将其初始化为 -1  data.shape[0]即batch_size
-    # predicted_label = np.array([-1]).repeat(data.shape[0],0).reshape(data.shape[0],1)
    # TODO 需要适配由PTQ Target Model得到的Distill Target Model
    # 虽然是通过mode == shadow和target来区分，但load的model是ditill model
@@ -518,20 +510,6 @@ def get_trajectory(data, target, args, model_path, device='cpu'):
        model_path_current = 'mia_ckpt/{}'.format(s)
        # 对每个distill epoch (是看的distill model对target的loss并记录成为loss_trajectory)
        for i in range(1, args.epochs_distill+1):
-            # 通过load存储的distill model在各个epoch时候的权值参数来复现loss
-            # if args.mode == 'shadow':
-            #     cnn_model_target, cnn_params_target = normal.load_model(args, model_path_current+'/distill_shadow', model_name, epoch=i)
-            # elif args.mode == 'target':
-            #     if args.quant_type is None:
-            #         cnn_model_target, cnn_params_target = normal.load_model(args, model_path_current+'/distill_target', model_name, epoch=i)
-            #     # TODO 调整load的路径，把(ptq) Distill Target Model的权值参数load进来
-            #     else:
-            #         if args.quant_type == 'FLOAT':
-            #             title = '%s_%d_E%d' % (args.quant_type, args.num_bits, args.e_bits)
-            #         else:
-            #             title = '%s_%d' % (args.quant_type, args.num_bits)
-            #         cnn_model_target, cnn_params_target = normal.load_model(args, model_path_current+'/distill_target', model_name + '_' + title, epoch=i)
            if args.quant_type is None:
                cnn_model_target, cnn_params_target = normal.load_model(args, model_path_current+'/distill_'+args.mode, model_name, epoch=i)
                # TODO 调整load的路径，把(ptq) Distill Target Model的权值参数load进来
@@ -553,8 +531,6 @@ def get_trajectory(data, target, args, model_path, device='cpu'):
            # 都是distill model的output，因此不需要quantize_inference
            logit_target = MODEL_target(data)
            # 看target model的output与label的loss  （batch list中的各个数据分别算）
            loss = [F.cross_entropy(logit_target_i.unsqueeze(0), target_i.unsqueeze(0)) for (logit_target_i, target_i) in zip(logit_target, target)]
            # list -> nparray 一列的

--- a/mzh/new_mzh/ALL_2/VGG_16_mia_result.xlsx
+++ b/mzh/new_mzh/ALL_2/VGG_16_mia_result.xlsx
--- a/mzh/new_mzh/ALL_2/div.py
+++ b/mzh/new_mzh/ALL_2/div.py
@@ -16,7 +16,7 @@ if __name__ == '__main__':
    # 统一计算所有的js
    gol._init()
-    quant_type_list = ['INT','POT']
+    quant_type_list = ['INT','POT','FLOAT']
    filename =f'{args.model}_mia_result.xlsx'
    workbook = openpyxl.load_workbook(filename)

--- a/mzh/new_mzh/ALL_2/mia_one_int_s1.slurm
+++ b/mzh/new_mzh/ALL_2/mia_one_int_s1.slurm
--- a/mzh/new_mzh/ALL_2/mia_one_int_s2.slurm
+++ b/mzh/new_mzh/ALL_2/mia_one_int_s2.slurm
@@ -184,9 +184,6 @@ python mia_one.py --action 1 --mode target --mia_type build-dataset --model $Mod
 # ATTACK
-# for test full precision mia result
-# echo "python mia_one.py --action 1 --mia_type black-box --model $Model  --model_distill $Model --data $Dataset --load_attack --epochs_distill $Distill"
-# python mia_one.py --action 1 --mia_type black-box --model $Model  --model_distill $Model --data $Dataset --load_attack --epochs_distill $Distill
 echo "python mia_one.py --action 1 --mia_type black-box --model $Model  --model_distill $Model --data $Dataset  --quant_type INT --num_bits 9 --epochs_distill $Distill"
 python mia_one.py --action 1 --mia_type black-box --model $Model  --model_distill $Model --data $Dataset  --quant_type INT --num_bits 9 --epochs_distill $Distill

--- a/mzh/new_mzh/ALL_2/mia_one_pot.slurm
+++ b/mzh/new_mzh/ALL_2/mia_one_pot.slurm
@@ -166,7 +166,7 @@ echo "python mia_one.py --action 1 --mia_type black-box --model $Model  --model_
 python mia_one.py --action 1 --mia_type black-box --model $Model  --model_distill $Model --data $Dataset  --quant_type POT --num_bits 5 --epochs_distill $Distill
 echo "python mia_one.py --action 1 --mia_type black-box --model $Model  --model_distill $Model --data $Dataset  --quant_type POT --num_bits 6 --epochs_distill $Distill"
-python mia_one.py --action 1 --mia_type black-box --model $Model  --model_distill $Model --data $Dataset  --quant_type POT --num_bits 6 --epochs_distill $Distill
+python mia_one.py --action 1 --mia_type black-box --model $Model  --model_distill $Model --data $Dataset  --quant_type POT --num_bits 6 --epochs_dist在·ill $Distill
 #- End
 echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
--- a/mzh/new_mzh/ALL_2/mia_one_property_dis.slurm
+++ b/mzh/new_mzh/ALL_2/mia_one_property_dis.slurm
+#!/bin/bash
+#- Job parameters
+# (TODO)
+# Please modify job name
+#- Resources
+# (TODO)
+# Please modify your requirements
+#SBATCH -p nv-gpu                    # Submit to 'nv-gpu' Partitiion
+#SBATCH -t 0-01:00:00                # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
+#SBATCH --nodes=1                    # Request N nodes
+#SBATCH --gres=gpu:1                 # Request M GPU per node
+#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
+#SBATCH --qos=gpu-trial             # Request QOS Type
+###
+### The system will alloc 8 or 16 cores per gpu by default.
+### If you need more or less, use following:
+### #SBATCH --cpus-per-task=K            # Request K cores
+###
+### 
+### Without specifying the constraint, any available nodes that meet the requirement will be allocated
+### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
+###
+### #SBATCH --nodelist=gpu-v00           # Request a specific list of hosts 
+### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
+###
+# set constraint for RTX8000 to meet my cuda
+#SBATCH --constraint="Ampere|RTX8000"
+#- Log information
+echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
+echo "Job run at:"
+echo "$(hostnamectl)"
+#- Load environments
+source /tools/module_env.sh
+source ~/pyt1.5/bin/activate
+module list                       # list modules loaded
+##- Tools
+module load cluster-tools/v1.0
+module load slurm-tools/v1.0
+module load cmake/3.15.7
+module load git/2.17.1
+module load vim/8.1.2424
+##- language
+module load python3/3.6.8
+##- CUDA
+module load cuda-cudnn/10.2-7.6.5
+##- virtualenv
+# source xxxxx/activate
+echo $(module list)              # list modules loaded
+echo $(which gcc)
+echo $(which python)
+echo $(which python3)
+cluster-quota                    # nas quota
+nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
+#- Warning! Please not change your CUDA_VISIBLE_DEVICES
+#- in `.bashrc`, `env.sh`, or your job script
+echo "Use GPU ${CUDA_VISIBLE_DEVICES}"                              # which gpus
+#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
+#- Job step
+# [EDIT HERE(TODO)]
+echo "python property_div.py --model $Model --data $Dataset"
+python property_dis.py --model $Model --data $Dataset
+#- End
+echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
--- a/mzh/new_mzh/ALL_2/normal.py
+++ b/mzh/new_mzh/ALL_2/normal.py
@@ -110,10 +110,14 @@ def quantize_inference(args, model, data, device='cpu'):
-# 对 是否distill，有不同的train方法
+# 对是否distill，有不同的train方法
 # 之前已经创建了model并把params config一起存储到了相应路径，此处先把model和params config load出来再trian
-# model_path_tar, model_path_dis = 'mia_ckpt/{}/{}'.format(args.seed, args.mode)
-# untrained_model_tar, untrained_model_dis => model_name = '{}_mobilenetv2'.format(args.data)...
+''' 参数赋值的示例：
+ model_path_tar, model_path_dis = 'mia_ckpt/{}/{}'.format(args.seed, args.mode)
+ untrained_model_tar, untrained_model_dis => model_name = '{}_mobilenetv2'.format(args.data)...
+'''
 def train(args, model_path_tar, untrained_model_tar, model_path_dis = None, untrained_model_dis = None, device='cpu'):
    print('Training models...')
@@ -298,8 +302,7 @@ def train(args, model_path_tar, untrained_model_tar, model_path_dis = None, untr
    print('Training took {} seconds...'.format(total_training_time))
    # 存储训练后的模型权值参数和model_params (区分了用到的target model是否量化/什么量化)
-    # model_path_dis = 'mia_ckpt/{}/{}'.format(args.seed, args.mode)
    # 在networks/{}/{}'.format(args.seed, args.mode)/trained_model_name + title 下存储
    if args.quant_type is not None and 'distill' in args.mode:
@@ -307,9 +310,7 @@ def train(args, model_path_tar, untrained_model_tar, model_path_dis = None, untr
    else:
        if 'distill' in args.mode:
            save_model(trained_model, model_params, model_path_dis, trained_model_name, epoch=args.epochs_distill)
-        # 只保存了最后一个epoch的target model和shadow model 是不是不太合理 准备改成存best的试试？ (在mia_utils中的cnn_train)
-        # else:
-        #     save_model(trained_model, model_params, model_path_tar, trained_model_name, epoch=num_epochs)
 # 配置模型信息,创建并训练模型
 def train_models(args, model_path_tar, model_path_dis, device='cpu'):
@@ -389,8 +390,6 @@ def create_model(models_path,args):
 # 实例化model，并调用save_model存储，只在create model的时候用到
 def save_networks(args, model_name, model_params, model_path):
    print('Saving model...')
-    # model_params['base_model'] = model_name
-    # network_type = model_params['network_type']
    model = MIA_Model(args,model_params,args.model,args.data)  # args.model用部分大写的，args.data用小写的

--- a/mzh/new_mzh/ALL_2/property_dis.py
+++ b/mzh/new_mzh/ALL_2/property_dis.py
+import openpyxl        
+from mia_utils import *
+import module
+import gol
+import argparse
+import numpy as np
+import torch
+import math
+from sklearn.neighbors import KernelDensity
+from scipy.stats import pearsonr
+from scipy.spatial import distance
+def js_divergence_rows(p, q):
+    jsd = np.zeros(p.shape[0])
+    for i in range(p.shape[0]):
+        jsd[i] = module.js_div(p[i], q[i])
+    return jsd
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='DIV_TrajectoryMIA')
+    parser.add_argument('--model', type=str, default='resnet18', help=['AlexNet','AlexNet_BN','VGG_16','VGG_19','Inception_BN','ResNet_18','ResNet_50','ResNet_152','MobileNetV2'])
+    parser.add_argument('--data', type=str, default='cifar10', help=['cinic10', 'cifar10', 'cifar100'])
+    args = parser.parse_args()
+    # 打开excel
+    filename =f'{args.model}_mia_result.xlsx'
+    workbook = openpyxl.load_workbook(filename)
+    worksheet = workbook[args.data]
+    # fp32 js
+    data_path = f'mia_ckpt/0/target/{args.data}_{args.model}/trajectory_test_data.npy' 
+    dataSet = np.load(data_path, allow_pickle=True).item()
+    data = torch.from_numpy(np.array(dataSet['model_trajectory'], dtype='f'))
+    data_i = torch.from_numpy(np.array(dataSet['model_loss_ori'], dtype='f'))
+    # 根据 member_status 划分成两个 Tensor
+    # data_0 = data[dataSet['member_status'] == 0].transpose(0, 1)
+    # data_1 = data[dataSet['member_status'] == 1].transpose(0, 1)
+    data_0 = data[dataSet['member_status'] == 0]
+    data_1 = data[dataSet['member_status'] == 1]
+    data_i0 = data_i[dataSet['member_status'] == 0]
+    data_i1 = data_i[dataSet['member_status'] == 1]
+    # c0 = torch.cat((data_0, data_i0.unsqueeze(0)),0) 
+    # c1 = torch.cat((data_1, data_i1.unsqueeze(0)),0) 
+    c0 = torch.cat((data_0, data_i0.unsqueeze(1)),1) 
+    c1 = torch.cat((data_1, data_i1.unsqueeze(1)),1) 
+    # c0 = c0.t()
+    # c1 = c1.t()
+    js_distance = distance.jensenshannon(c0.numpy(),c1.numpy())
+    js_distance = js_distance.sum()
+    if math.isnan(js_distance):
+        js_distance = 0
+    # distance = module.js_div(data_0, data_1)
+    # distance = distance.item()
+    # if distance<0:P
+    #     distance = 0
+    model_name = f'{args.data}_{args.model}' 
+    print(f"js distance of {model_name}: {js_distance}")
+    worksheet.cell(row=2,column=1,value='js_distance')
+    worksheet.cell(row=2,column=2,value=js_distance)
+    # 统一计算所有的ptq后的js
+    gol._init()
+    quant_type_list = ['INT','POT','FLOAT']
+    for quant_type in quant_type_list:
+        num_bit_list = numbit_list(quant_type)
+        for num_bits in num_bit_list:
+            e_bit_list = ebit_list(quant_type,num_bits)
+            for e_bits in e_bit_list:
+                if quant_type == 'FLOAT':
+                    title = '%s_%d_E%d' % (quant_type, num_bits, e_bits)
+                else:
+                    title = '%s_%d' % (quant_type, num_bits)
+                model_name_ptq = f'{args.data}_{args.model}_{title}' 
+                p_data_path = f'mia_ckpt/0/target/{model_name_ptq}/trajectory_test_data.npy' 
+                p_dataSet = np.load(p_data_path, allow_pickle=True).item()
+                p_data = torch.from_numpy(np.array(p_dataSet['model_trajectory'], dtype='f'))
+                i_data = torch.from_numpy(np.array(p_dataSet['model_loss_ori'], dtype='f'))
+                # 根据 member_status 划分成两个 Tensor
+                # p_data_0 = p_data[p_dataSet['member_status'] == 0].transpose(0, 1)
+                # p_data_1 = p_data[p_dataSet['member_status'] == 1].transpose(0, 1)
+                # 10000 * 120
+                p_data_0 = p_data[p_dataSet['member_status'] == 0]
+                p_data_1 = p_data[p_dataSet['member_status'] == 1]
+                i_data_0 = i_data[p_dataSet['member_status'] == 0]
+                i_data_1 = i_data[p_dataSet['member_status'] == 1]
+                print(f"shape of p_data:{p_data_0.shape}")
+                # c0 = torch.cat((p_data_0, i_data_0.unsqueeze(0)),0) 
+                # c1 = torch.cat((p_data_1, i_data_1.unsqueeze(0)),0) 
+                c0 = torch.cat((p_data_0, i_data_0.unsqueeze(1)),1) 
+                c1 = torch.cat((p_data_1, i_data_1.unsqueeze(1)),1) 
+                print(f"shape of c_data:{c0.shape}")
+                # c0 = c0.t()
+                # c1 = c1.t()
+                js_distance = distance.jensenshannon(c0.numpy(),c1.numpy())
+                js_distance = js_distance.sum()
+                if math.isnan(js_distance):
+                    js_distance = 0
+                print(f"distance_sum:{js_distance}")
+                idx = GlobalVariables.title_list.index(title)
+                idx += 4
+                worksheet.cell(row=idx,column=2,value=js_distance)
+    workbook.save(filename)
--- a/mzh/new_mzh/ALL_2/public/alex_hat_acc_1.png
+++ b/mzh/new_mzh/ALL_2/public/alex_hat_acc_1.png
--- a/mzh/new_mzh/ALL_2/public/alex_hat_acc_3.png
+++ b/mzh/new_mzh/ALL_2/public/alex_hat_acc_3.png
--- a/mzh/new_mzh/ALL_2/public/alex_hat_auc_1.png
+++ b/mzh/new_mzh/ALL_2/public/alex_hat_auc_1.png
--- a/mzh/new_mzh/ALL_2/public/alex_hat_auc_3.png
+++ b/mzh/new_mzh/ALL_2/public/alex_hat_auc_3.png
--- a/mzh/new_mzh/ALL_2/public/all_0_acc_p3.png
+++ b/mzh/new_mzh/ALL_2/public/all_0_acc_p3.png
--- a/mzh/new_mzh/ALL_2/public/all_0_auc_p3.png
+++ b/mzh/new_mzh/ALL_2/public/all_0_auc_p3.png
--- a/mzh/new_mzh/ALL_2/public/all_1_acc.png
+++ b/mzh/new_mzh/ALL_2/public/all_1_acc.png
--- a/mzh/new_mzh/ALL_2/public/all_1_auc.png
+++ b/mzh/new_mzh/ALL_2/public/all_1_auc.png
--- a/mzh/new_mzh/ALL_2/public/back.jpg
+++ b/mzh/new_mzh/ALL_2/public/back.jpg
--- a/mzh/new_mzh/ALL_2/public/d_alexbn_acc.png
+++ b/mzh/new_mzh/ALL_2/public/d_alexbn_acc.png
--- a/mzh/new_mzh/ALL_2/public/d_alexbn_auc.png
+++ b/mzh/new_mzh/ALL_2/public/d_alexbn_auc.png
--- a/mzh/new_mzh/ALL_2/public/d_res18_acc.png
+++ b/mzh/new_mzh/ALL_2/public/d_res18_acc.png
--- a/mzh/new_mzh/ALL_2/public/d_res18_auc.png
+++ b/mzh/new_mzh/ALL_2/public/d_res18_auc.png
--- a/mzh/new_mzh/ALL_2/public/flops_lstmocr.png
+++ b/mzh/new_mzh/ALL_2/public/flops_lstmocr.png
--- a/mzh/new_mzh/ALL_2/public/i1_acc_loss_curve.png
+++ b/mzh/new_mzh/ALL_2/public/i1_acc_loss_curve.png
--- a/mzh/new_mzh/ALL_2/public/i1_auc_loss_curve.png
+++ b/mzh/new_mzh/ALL_2/public/i1_auc_loss_curve.png
--- a/mzh/new_mzh/ALL_2/public/js.png
+++ b/mzh/new_mzh/ALL_2/public/js.png
--- a/mzh/new_mzh/ALL_2/public/p1.png
+++ b/mzh/new_mzh/ALL_2/public/p1.png
--- a/mzh/new_mzh/ALL_2/public/params_lstmocr.png
+++ b/mzh/new_mzh/ALL_2/public/params_lstmocr.png
--- a/mzh/new_mzh/ALL_2/readme.md
+++ b/mzh/new_mzh/ALL_2/readme.md
--- a/mzh/new_mzh/ALL_2/scripts/mia_one_property_div.sh
+++ b/mzh/new_mzh/ALL_2/scripts/mia_one_property_div.sh
@@ -3,4 +3,4 @@ if [ ! -d "ret_one" ]; then
    mkdir -p "ret_one"
 fi 
-sbatch --job-name=$1 -o "ret_one/%x/%j.out" -e "ret_one/%x/%j.err" --export=Model=$1,Dataset=$2 mia_one_property_div.slurm
+sbatch --job-name=$1 -o "ret_one/%x/%j.out" -e "ret_one/%x/%j.err" --export=Model=$1,Dataset=$2 mia_one_property_dis.slurm
\ No newline at end of file