Commit 204f7fcb by Zhihong Ma

doc: readme

parent 74043eb6
...@@ -181,7 +181,7 @@ def build_trajectory_membership_dataset(args, ori_model_path, device='cpu'): ...@@ -181,7 +181,7 @@ def build_trajectory_membership_dataset(args, ori_model_path, device='cpu'):
gol.set_value(plist) gol.set_value(plist)
# load这些非distill model只是为了获得最后一个loss # load这些非distill model只是为了获得最后一个loss
# 对于量化后的 这里应该要改吧,需要load量化后的模型来做推理 # 对于量化后的,需要load量化后的模型来做推理
if args.mode == 'target': if args.mode == 'target':
cnn_model, cnn_params = normal.load_model(args, ori_model_path+'/target', model_name, epoch=args.epochs) cnn_model, cnn_params = normal.load_model(args, ori_model_path+'/target', model_name, epoch=args.epochs)
...@@ -206,22 +206,17 @@ def build_trajectory_membership_dataset(args, ori_model_path, device='cpu'): ...@@ -206,22 +206,17 @@ def build_trajectory_membership_dataset(args, ori_model_path, device='cpu'):
# params['task']中记录用什么数据集 # params['task']中记录用什么数据集
# 尝试调小batch size看看resnet50/152是否还会爆显存
# 这里调小了batchsize=128会导致ResNet_18的attack acc相比于batchsize=512时下降2% # 这里调小了batchsize=128会导致ResNet_18的attack acc相比于batchsize=512时下降2%
dataset = mia_utils.get_dataset(cnn_params['task'], mode=args.mode, aug=True, batch_size=512) dataset = mia_utils.get_dataset(cnn_params['task'], mode=args.mode, aug=True, batch_size=256)
if args.mode == 'target': if args.mode == 'target':
print('load target_dataset ... ') print('load target_dataset ... ')
train_loader = dataset.aug_target_train_loader train_loader = dataset.aug_target_train_loader
# test set不再做data augmentation
# test_loader = dataset.aug_target_test_loader
test_loader = dataset.target_test_loader test_loader = dataset.target_test_loader
elif args.mode == 'shadow': elif args.mode == 'shadow':
print('load shadow_dataset ... ') print('load shadow_dataset ... ')
train_loader = dataset.aug_shadow_train_loader train_loader = dataset.aug_shadow_train_loader
# test_loader = dataset.aug_shadow_test_loader
test_loader = dataset.shadow_test_loader test_loader = dataset.shadow_test_loader
model_top1 = None model_top1 = None
...@@ -352,7 +347,6 @@ def trajectory_black_box_membership_inference_attack(args, models_path, device=' ...@@ -352,7 +347,6 @@ def trajectory_black_box_membership_inference_attack(args, models_path, device='
epoch = 0 epoch = 0
# 这里如果要加val set,就直接把trainset按8:2划分 # 这里如果要加val set,就直接把trainset按8:2划分
# AttackModelTrainSet = np.load(models_path + f'/shadow/{model_name}/trajectory_train_data.npy', allow_pickle=True).item()
if args.quant_type is None: if args.quant_type is None:
AttackModelTrainSet = np.load(models_path + f'/shadow/{model_name}/trajectory_train_data.npy', allow_pickle=True).item() AttackModelTrainSet = np.load(models_path + f'/shadow/{model_name}/trajectory_train_data.npy', allow_pickle=True).item()
else: else:
...@@ -377,7 +371,7 @@ def trajectory_black_box_membership_inference_attack(args, models_path, device=' ...@@ -377,7 +371,7 @@ def trajectory_black_box_membership_inference_attack(args, models_path, device='
# train/test i.e. in or out # train/test i.e. in or out
torch.from_numpy(np.array(check_and_transform_label_format(AttackModelTrainSet['predicted_status'], nb_classes=2, return_one_hot=True)[:,:2])).type(torch.long), torch.from_numpy(np.array(check_and_transform_label_format(AttackModelTrainSet['predicted_status'], nb_classes=2, return_one_hot=True)[:,:2])).type(torch.long),
torch.from_numpy(np.array(AttackModelTrainSet['member_status'])).type(torch.long),) torch.from_numpy(np.array(AttackModelTrainSet['member_status'])).type(torch.long),)
attack_train_loader = torch.utils.data.DataLoader(train_set, batch_size=128, shuffle=True) attack_train_loader = torch.utils.data.DataLoader(train_set, batch_size=512, shuffle=True)
test_set = torch.utils.data.TensorDataset( test_set = torch.utils.data.TensorDataset(
torch.from_numpy(np.array(AttackModelTestSet['model_loss_ori'], dtype='f')), torch.from_numpy(np.array(AttackModelTestSet['model_loss_ori'], dtype='f')),
...@@ -387,7 +381,7 @@ def trajectory_black_box_membership_inference_attack(args, models_path, device=' ...@@ -387,7 +381,7 @@ def trajectory_black_box_membership_inference_attack(args, models_path, device='
# train/test i.e. in or out # train/test i.e. in or out
torch.from_numpy(np.array(check_and_transform_label_format(AttackModelTestSet['predicted_status'], nb_classes=2, return_one_hot=True)[:,:2])).type(torch.long), torch.from_numpy(np.array(check_and_transform_label_format(AttackModelTestSet['predicted_status'], nb_classes=2, return_one_hot=True)[:,:2])).type(torch.long),
torch.from_numpy(np.array(AttackModelTestSet['member_status'])).type(torch.long),) torch.from_numpy(np.array(AttackModelTestSet['member_status'])).type(torch.long),)
attack_test_loader = torch.utils.data.DataLoader(test_set, batch_size=128, shuffle=True) attack_test_loader = torch.utils.data.DataLoader(test_set, batch_size=512, shuffle=True)
print(f'-------------------"Loss Trajectory"------------------') print(f'-------------------"Loss Trajectory"------------------')
# 训练Attack Model # 训练Attack Model
...@@ -508,8 +502,6 @@ def get_trajectory(data, target, args, model_path, device='cpu'): ...@@ -508,8 +502,6 @@ def get_trajectory(data, target, args, model_path, device='cpu'):
print(f"MODEL NAME IS :{model_name}") print(f"MODEL NAME IS :{model_name}")
trajectory = None trajectory = None
# 创建一个形状为 (data.shape[0], 1) 的 NumPy 数组 predicted_label,并将其初始化为 -1 data.shape[0]即batch_size # 创建一个形状为 (data.shape[0], 1) 的 NumPy 数组 predicted_label,并将其初始化为 -1 data.shape[0]即batch_size
# predicted_label = np.array([-1]).repeat(data.shape[0],0).reshape(data.shape[0],1)
# TODO 需要适配由PTQ Target Model得到的Distill Target Model # TODO 需要适配由PTQ Target Model得到的Distill Target Model
# 虽然是通过mode == shadow和target来区分,但load的model是ditill model # 虽然是通过mode == shadow和target来区分,但load的model是ditill model
...@@ -518,20 +510,6 @@ def get_trajectory(data, target, args, model_path, device='cpu'): ...@@ -518,20 +510,6 @@ def get_trajectory(data, target, args, model_path, device='cpu'):
model_path_current = 'mia_ckpt/{}'.format(s) model_path_current = 'mia_ckpt/{}'.format(s)
# 对每个distill epoch (是看的distill model对target的loss并记录成为loss_trajectory) # 对每个distill epoch (是看的distill model对target的loss并记录成为loss_trajectory)
for i in range(1, args.epochs_distill+1): for i in range(1, args.epochs_distill+1):
# 通过load存储的distill model在各个epoch时候的权值参数来复现loss
# if args.mode == 'shadow':
# cnn_model_target, cnn_params_target = normal.load_model(args, model_path_current+'/distill_shadow', model_name, epoch=i)
# elif args.mode == 'target':
# if args.quant_type is None:
# cnn_model_target, cnn_params_target = normal.load_model(args, model_path_current+'/distill_target', model_name, epoch=i)
# # TODO 调整load的路径,把(ptq) Distill Target Model的权值参数load进来
# else:
# if args.quant_type == 'FLOAT':
# title = '%s_%d_E%d' % (args.quant_type, args.num_bits, args.e_bits)
# else:
# title = '%s_%d' % (args.quant_type, args.num_bits)
# cnn_model_target, cnn_params_target = normal.load_model(args, model_path_current+'/distill_target', model_name + '_' + title, epoch=i)
if args.quant_type is None: if args.quant_type is None:
cnn_model_target, cnn_params_target = normal.load_model(args, model_path_current+'/distill_'+args.mode, model_name, epoch=i) cnn_model_target, cnn_params_target = normal.load_model(args, model_path_current+'/distill_'+args.mode, model_name, epoch=i)
# TODO 调整load的路径,把(ptq) Distill Target Model的权值参数load进来 # TODO 调整load的路径,把(ptq) Distill Target Model的权值参数load进来
...@@ -553,8 +531,6 @@ def get_trajectory(data, target, args, model_path, device='cpu'): ...@@ -553,8 +531,6 @@ def get_trajectory(data, target, args, model_path, device='cpu'):
# 都是distill model的output,因此不需要quantize_inference # 都是distill model的output,因此不需要quantize_inference
logit_target = MODEL_target(data) logit_target = MODEL_target(data)
# 看target model的output与label的loss (batch list中的各个数据分别算) # 看target model的output与label的loss (batch list中的各个数据分别算)
loss = [F.cross_entropy(logit_target_i.unsqueeze(0), target_i.unsqueeze(0)) for (logit_target_i, target_i) in zip(logit_target, target)] loss = [F.cross_entropy(logit_target_i.unsqueeze(0), target_i.unsqueeze(0)) for (logit_target_i, target_i) in zip(logit_target, target)]
# list -> nparray 一列的 # list -> nparray 一列的
......
...@@ -16,7 +16,7 @@ if __name__ == '__main__': ...@@ -16,7 +16,7 @@ if __name__ == '__main__':
# 统一计算所有的js # 统一计算所有的js
gol._init() gol._init()
quant_type_list = ['INT','POT'] quant_type_list = ['INT','POT','FLOAT']
filename =f'{args.model}_mia_result.xlsx' filename =f'{args.model}_mia_result.xlsx'
workbook = openpyxl.load_workbook(filename) workbook = openpyxl.load_workbook(filename)
......
...@@ -184,9 +184,6 @@ python mia_one.py --action 1 --mode target --mia_type build-dataset --model $Mod ...@@ -184,9 +184,6 @@ python mia_one.py --action 1 --mode target --mia_type build-dataset --model $Mod
# ATTACK # ATTACK
# for test full precision mia result
# echo "python mia_one.py --action 1 --mia_type black-box --model $Model --model_distill $Model --data $Dataset --load_attack --epochs_distill $Distill"
# python mia_one.py --action 1 --mia_type black-box --model $Model --model_distill $Model --data $Dataset --load_attack --epochs_distill $Distill
echo "python mia_one.py --action 1 --mia_type black-box --model $Model --model_distill $Model --data $Dataset --quant_type INT --num_bits 9 --epochs_distill $Distill" echo "python mia_one.py --action 1 --mia_type black-box --model $Model --model_distill $Model --data $Dataset --quant_type INT --num_bits 9 --epochs_distill $Distill"
python mia_one.py --action 1 --mia_type black-box --model $Model --model_distill $Model --data $Dataset --quant_type INT --num_bits 9 --epochs_distill $Distill python mia_one.py --action 1 --mia_type black-box --model $Model --model_distill $Model --data $Dataset --quant_type INT --num_bits 9 --epochs_distill $Distill
......
...@@ -166,7 +166,7 @@ echo "python mia_one.py --action 1 --mia_type black-box --model $Model --model_ ...@@ -166,7 +166,7 @@ echo "python mia_one.py --action 1 --mia_type black-box --model $Model --model_
python mia_one.py --action 1 --mia_type black-box --model $Model --model_distill $Model --data $Dataset --quant_type POT --num_bits 5 --epochs_distill $Distill python mia_one.py --action 1 --mia_type black-box --model $Model --model_distill $Model --data $Dataset --quant_type POT --num_bits 5 --epochs_distill $Distill
echo "python mia_one.py --action 1 --mia_type black-box --model $Model --model_distill $Model --data $Dataset --quant_type POT --num_bits 6 --epochs_distill $Distill" echo "python mia_one.py --action 1 --mia_type black-box --model $Model --model_distill $Model --data $Dataset --quant_type POT --num_bits 6 --epochs_distill $Distill"
python mia_one.py --action 1 --mia_type black-box --model $Model --model_distill $Model --data $Dataset --quant_type POT --num_bits 6 --epochs_distill $Distill python mia_one.py --action 1 --mia_type black-box --model $Model --model_distill $Model --data $Dataset --quant_type POT --num_bits 6 --epochs_dist在·ill $Distill
#- End #- End
echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")" echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
#!/bin/bash
#- Job parameters
# (TODO)
# Please modify job name
#- Resources
# (TODO)
# Please modify your requirements
#SBATCH -p nv-gpu # Submit to 'nv-gpu' Partitiion
#SBATCH -t 0-01:00:00 # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
#SBATCH --nodes=1 # Request N nodes
#SBATCH --gres=gpu:1 # Request M GPU per node
#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
#SBATCH --qos=gpu-trial # Request QOS Type
###
### The system will alloc 8 or 16 cores per gpu by default.
### If you need more or less, use following:
### #SBATCH --cpus-per-task=K # Request K cores
###
###
### Without specifying the constraint, any available nodes that meet the requirement will be allocated
### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
###
### #SBATCH --nodelist=gpu-v00 # Request a specific list of hosts
### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
###
# set constraint for RTX8000 to meet my cuda
#SBATCH --constraint="Ampere|RTX8000"
#- Log information
echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
echo "Job run at:"
echo "$(hostnamectl)"
#- Load environments
source /tools/module_env.sh
source ~/pyt1.5/bin/activate
module list # list modules loaded
##- Tools
module load cluster-tools/v1.0
module load slurm-tools/v1.0
module load cmake/3.15.7
module load git/2.17.1
module load vim/8.1.2424
##- language
module load python3/3.6.8
##- CUDA
module load cuda-cudnn/10.2-7.6.5
##- virtualenv
# source xxxxx/activate
echo $(module list) # list modules loaded
echo $(which gcc)
echo $(which python)
echo $(which python3)
cluster-quota # nas quota
nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
#- Warning! Please not change your CUDA_VISIBLE_DEVICES
#- in `.bashrc`, `env.sh`, or your job script
echo "Use GPU ${CUDA_VISIBLE_DEVICES}" # which gpus
#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
#- Job step
# [EDIT HERE(TODO)]
echo "python property_div.py --model $Model --data $Dataset"
python property_dis.py --model $Model --data $Dataset
#- End
echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
...@@ -110,10 +110,14 @@ def quantize_inference(args, model, data, device='cpu'): ...@@ -110,10 +110,14 @@ def quantize_inference(args, model, data, device='cpu'):
# 对 是否distill,有不同的train方法
# 对是否distill,有不同的train方法
# 之前已经创建了model并把params config一起存储到了相应路径,此处先把model和params config load出来再trian # 之前已经创建了model并把params config一起存储到了相应路径,此处先把model和params config load出来再trian
# model_path_tar, model_path_dis = 'mia_ckpt/{}/{}'.format(args.seed, args.mode)
# untrained_model_tar, untrained_model_dis => model_name = '{}_mobilenetv2'.format(args.data)... ''' 参数赋值的示例:
model_path_tar, model_path_dis = 'mia_ckpt/{}/{}'.format(args.seed, args.mode)
untrained_model_tar, untrained_model_dis => model_name = '{}_mobilenetv2'.format(args.data)...
'''
def train(args, model_path_tar, untrained_model_tar, model_path_dis = None, untrained_model_dis = None, device='cpu'): def train(args, model_path_tar, untrained_model_tar, model_path_dis = None, untrained_model_dis = None, device='cpu'):
print('Training models...') print('Training models...')
...@@ -298,7 +302,6 @@ def train(args, model_path_tar, untrained_model_tar, model_path_dis = None, untr ...@@ -298,7 +302,6 @@ def train(args, model_path_tar, untrained_model_tar, model_path_dis = None, untr
print('Training took {} seconds...'.format(total_training_time)) print('Training took {} seconds...'.format(total_training_time))
# 存储训练后的模型权值参数和model_params (区分了用到的target model是否量化/什么量化) # 存储训练后的模型权值参数和model_params (区分了用到的target model是否量化/什么量化)
# model_path_dis = 'mia_ckpt/{}/{}'.format(args.seed, args.mode)
# 在networks/{}/{}'.format(args.seed, args.mode)/trained_model_name + title 下存储 # 在networks/{}/{}'.format(args.seed, args.mode)/trained_model_name + title 下存储
...@@ -307,9 +310,7 @@ def train(args, model_path_tar, untrained_model_tar, model_path_dis = None, untr ...@@ -307,9 +310,7 @@ def train(args, model_path_tar, untrained_model_tar, model_path_dis = None, untr
else: else:
if 'distill' in args.mode: if 'distill' in args.mode:
save_model(trained_model, model_params, model_path_dis, trained_model_name, epoch=args.epochs_distill) save_model(trained_model, model_params, model_path_dis, trained_model_name, epoch=args.epochs_distill)
# 只保存了最后一个epoch的target model和shadow model 是不是不太合理 准备改成存best的试试? (在mia_utils中的cnn_train)
# else:
# save_model(trained_model, model_params, model_path_tar, trained_model_name, epoch=num_epochs)
# 配置模型信息,创建并训练模型 # 配置模型信息,创建并训练模型
def train_models(args, model_path_tar, model_path_dis, device='cpu'): def train_models(args, model_path_tar, model_path_dis, device='cpu'):
...@@ -389,8 +390,6 @@ def create_model(models_path,args): ...@@ -389,8 +390,6 @@ def create_model(models_path,args):
# 实例化model,并调用save_model存储,只在create model的时候用到 # 实例化model,并调用save_model存储,只在create model的时候用到
def save_networks(args, model_name, model_params, model_path): def save_networks(args, model_name, model_params, model_path):
print('Saving model...') print('Saving model...')
# model_params['base_model'] = model_name
# network_type = model_params['network_type']
model = MIA_Model(args,model_params,args.model,args.data) # args.model用部分大写的,args.data用小写的 model = MIA_Model(args,model_params,args.model,args.data) # args.model用部分大写的,args.data用小写的
......
import openpyxl
from mia_utils import *
import module
import gol
import argparse
import numpy as np
import torch
import math
from sklearn.neighbors import KernelDensity
from scipy.stats import pearsonr
from scipy.spatial import distance
def js_divergence_rows(p, q):
jsd = np.zeros(p.shape[0])
for i in range(p.shape[0]):
jsd[i] = module.js_div(p[i], q[i])
return jsd
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='DIV_TrajectoryMIA')
parser.add_argument('--model', type=str, default='resnet18', help=['AlexNet','AlexNet_BN','VGG_16','VGG_19','Inception_BN','ResNet_18','ResNet_50','ResNet_152','MobileNetV2'])
parser.add_argument('--data', type=str, default='cifar10', help=['cinic10', 'cifar10', 'cifar100'])
args = parser.parse_args()
# 打开excel
filename =f'{args.model}_mia_result.xlsx'
workbook = openpyxl.load_workbook(filename)
worksheet = workbook[args.data]
# fp32 js
data_path = f'mia_ckpt/0/target/{args.data}_{args.model}/trajectory_test_data.npy'
dataSet = np.load(data_path, allow_pickle=True).item()
data = torch.from_numpy(np.array(dataSet['model_trajectory'], dtype='f'))
data_i = torch.from_numpy(np.array(dataSet['model_loss_ori'], dtype='f'))
# 根据 member_status 划分成两个 Tensor
# data_0 = data[dataSet['member_status'] == 0].transpose(0, 1)
# data_1 = data[dataSet['member_status'] == 1].transpose(0, 1)
data_0 = data[dataSet['member_status'] == 0]
data_1 = data[dataSet['member_status'] == 1]
data_i0 = data_i[dataSet['member_status'] == 0]
data_i1 = data_i[dataSet['member_status'] == 1]
# c0 = torch.cat((data_0, data_i0.unsqueeze(0)),0)
# c1 = torch.cat((data_1, data_i1.unsqueeze(0)),0)
c0 = torch.cat((data_0, data_i0.unsqueeze(1)),1)
c1 = torch.cat((data_1, data_i1.unsqueeze(1)),1)
# c0 = c0.t()
# c1 = c1.t()
js_distance = distance.jensenshannon(c0.numpy(),c1.numpy())
js_distance = js_distance.sum()
if math.isnan(js_distance):
js_distance = 0
# distance = module.js_div(data_0, data_1)
# distance = distance.item()
# if distance<0:P
# distance = 0
model_name = f'{args.data}_{args.model}'
print(f"js distance of {model_name}: {js_distance}")
worksheet.cell(row=2,column=1,value='js_distance')
worksheet.cell(row=2,column=2,value=js_distance)
# 统一计算所有的ptq后的js
gol._init()
quant_type_list = ['INT','POT','FLOAT']
for quant_type in quant_type_list:
num_bit_list = numbit_list(quant_type)
for num_bits in num_bit_list:
e_bit_list = ebit_list(quant_type,num_bits)
for e_bits in e_bit_list:
if quant_type == 'FLOAT':
title = '%s_%d_E%d' % (quant_type, num_bits, e_bits)
else:
title = '%s_%d' % (quant_type, num_bits)
model_name_ptq = f'{args.data}_{args.model}_{title}'
p_data_path = f'mia_ckpt/0/target/{model_name_ptq}/trajectory_test_data.npy'
p_dataSet = np.load(p_data_path, allow_pickle=True).item()
p_data = torch.from_numpy(np.array(p_dataSet['model_trajectory'], dtype='f'))
i_data = torch.from_numpy(np.array(p_dataSet['model_loss_ori'], dtype='f'))
# 根据 member_status 划分成两个 Tensor
# p_data_0 = p_data[p_dataSet['member_status'] == 0].transpose(0, 1)
# p_data_1 = p_data[p_dataSet['member_status'] == 1].transpose(0, 1)
# 10000 * 120
p_data_0 = p_data[p_dataSet['member_status'] == 0]
p_data_1 = p_data[p_dataSet['member_status'] == 1]
i_data_0 = i_data[p_dataSet['member_status'] == 0]
i_data_1 = i_data[p_dataSet['member_status'] == 1]
print(f"shape of p_data:{p_data_0.shape}")
# c0 = torch.cat((p_data_0, i_data_0.unsqueeze(0)),0)
# c1 = torch.cat((p_data_1, i_data_1.unsqueeze(0)),0)
c0 = torch.cat((p_data_0, i_data_0.unsqueeze(1)),1)
c1 = torch.cat((p_data_1, i_data_1.unsqueeze(1)),1)
print(f"shape of c_data:{c0.shape}")
# c0 = c0.t()
# c1 = c1.t()
js_distance = distance.jensenshannon(c0.numpy(),c1.numpy())
js_distance = js_distance.sum()
if math.isnan(js_distance):
js_distance = 0
print(f"distance_sum:{js_distance}")
idx = GlobalVariables.title_list.index(title)
idx += 4
worksheet.cell(row=idx,column=2,value=js_distance)
workbook.save(filename)
...@@ -3,4 +3,4 @@ if [ ! -d "ret_one" ]; then ...@@ -3,4 +3,4 @@ if [ ! -d "ret_one" ]; then
mkdir -p "ret_one" mkdir -p "ret_one"
fi fi
sbatch --job-name=$1 -o "ret_one/%x/%j.out" -e "ret_one/%x/%j.err" --export=Model=$1,Dataset=$2 mia_one_property_div.slurm sbatch --job-name=$1 -o "ret_one/%x/%j.out" -e "ret_one/%x/%j.err" --export=Model=$1,Dataset=$2 mia_one_property_dis.slurm
\ No newline at end of file \ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment