Commit 68add01f by Klin
parents a2ea6085 4c8bc7fe
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import tqdm
import utils
import normal
import dataset as DATA
from typing import TYPE_CHECKING, Callable, List, Optional, Tuple, Union
from sklearn import metrics
class MLP_BLACKBOX(nn.Module):
def __init__(self, dim_in):
super(MLP_BLACKBOX, self).__init__()
self.dim_in = dim_in
self.fc1 = nn.Linear(self.dim_in, 512)
self.fc2 = nn.Linear(512, 128)
self.fc3 = nn.Linear(128, 32)
self.fc4 = nn.Linear(32, 2)
def forward(self, x):
x = x.view(-1, self.dim_in)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = F.relu(self.fc3(x))
x = F.softmax(self.fc4(x), dim=1)
return x
def train_mia_attack_model(args, epoch, model, attack_train_loader, optimizer, loss_fn, device):
model.train()
train_loss = 0
correct = 0
for batch_idx, (model_loss_ori, model_trajectory, orginal_labels, predicted_labels, predicted_status, member_status) in enumerate(attack_train_loader):
# 拼接trajectory 和 最终的loss 作为input
input = torch.cat((model_trajectory, model_loss_ori.unsqueeze(1)),1)
input = input.to(device)
output = model(input)
# member_status被当成了label? 但其明明不是这个含义?
member_status = member_status.to(device)
# cross entropy
loss = loss_fn(output, member_status)
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_loss += loss.item()
# 取得是idx,相当于label代表的类
pred = output.max(1, keepdim=True)[1]
# 用member_status来去和pred做eq : 因为是in/out 正好匹配
correct += pred.eq(member_status.view_as(pred)).sum().item()
train_loss /= len(attack_train_loader.dataset)
accuracy = 100. * correct / len(attack_train_loader.dataset)
return train_loss, accuracy/100.
def test_mia_attack_model(args, epoch, model, attack_test_loader, loss_fn, max_auc, max_acc, device):
model.eval()
test_loss = 0
correct = 0
auc_ground_truth = None
auc_pred = None
with torch.no_grad():
for batch_idx, (model_loss_ori, model_trajectory, orginal_labels, predicted_labels, predicted_status, member_status) in enumerate(attack_test_loader):
input = torch.cat((model_trajectory, model_loss_ori.unsqueeze(1)),1)
input = input.to(device)
output = model(input)
member_status = member_status.to(device)
test_loss += loss_fn(output, member_status).item()
pred0, pred1 = output.max(1, keepdim=True) # idx
correct += pred1.eq(member_status.view_as(pred1)).sum().item()
# output 概率向量
auc_pred_current = output[:, -1]
auc_ground_truth = member_status.cpu().numpy() if batch_idx == 0 else np.concatenate((auc_ground_truth, member_status.cpu().numpy()), axis=0)
auc_pred = auc_pred_current.cpu().numpy() if batch_idx == 0 else np.concatenate((auc_pred, auc_pred_current.cpu().numpy()), axis=0)
test_loss /= len(attack_test_loader.dataset)
accuracy = 100. * correct / len(attack_test_loader.dataset)
fpr, tpr, thresholds = metrics.roc_curve(auc_ground_truth, auc_pred, pos_label=1)
auc = metrics.auc(fpr, tpr)
if auc > max_auc:
max_auc = auc
save_data = {
'fpr': fpr,
'tpr': tpr
}
np.save(f'./outputs/{args.data}_{args.model}_{args.model_distill}_trajectory_auc', save_data)
if accuracy > max_acc:
max_acc = accuracy
return test_loss, accuracy/100., auc, max_auc, max_acc
def check_and_transform_label_format(
labels: np.ndarray, nb_classes: Optional[int] = None, return_one_hot: bool = True
) -> np.ndarray:
"""
Check label format and transform to one-hot-encoded labels if necessary
:param labels: An array of integer labels of shape `(nb_samples,)`, `(nb_samples, 1)` or `(nb_samples, nb_classes)`.
:param nb_classes: The number of classes.
:param return_one_hot: True if returning one-hot encoded labels, False if returning index labels.
:return: Labels with shape `(nb_samples, nb_classes)` (one-hot) or `(nb_samples,)` (index).
"""
if labels is not None:
if len(labels.shape) == 2 and labels.shape[1] > 1:
if not return_one_hot:
labels = np.argmax(labels, axis=1)
elif len(labels.shape) == 2 and labels.shape[1] == 1 and nb_classes is not None and nb_classes > 2:
labels = np.squeeze(labels)
if return_one_hot:
labels = to_categorical(labels, nb_classes)
elif len(labels.shape) == 2 and labels.shape[1] == 1 and nb_classes is not None and nb_classes == 2:
pass
elif len(labels.shape) == 1:
if return_one_hot:
if nb_classes == 2:
labels = np.expand_dims(labels, axis=1)
else:
labels = to_categorical(labels, nb_classes)
else:
raise ValueError(
"Shape of labels not recognised."
"Please provide labels in shape (nb_samples,) or (nb_samples, nb_classes)"
)
return labels
def to_categorical(labels: Union[np.ndarray, List[float]], nb_classes: Optional[int] = None) -> np.ndarray:
"""
Convert an array of labels to binary class matrix.
:param labels: An array of integer labels of shape `(nb_samples,)`.
:param nb_classes: The number of classes (possible labels).
:return: A binary matrix representation of `y` in the shape `(nb_samples, nb_classes)`.
exp:
labels = [0, 1, 2, 0, 2, 1]
=>
array([[1., 0., 0.],
[0., 1., 0.],
[0., 0., 1.],
[1., 0., 0.],
[0., 0., 1.],
[0., 1., 0.]], dtype=float32)
"""
labels = np.array(labels, dtype=np.int32)
if nb_classes is None:
nb_classes = np.max(labels) + 1
categorical = np.zeros((labels.shape[0], nb_classes), dtype=np.float32)
categorical[np.arange(labels.shape[0]), np.squeeze(labels)] = 1
return categorical
def build_trajectory_membership_dataset(args, ori_model_path, device='cpu'):
# if args.model == 'vgg':
# model_name = '{}_vgg16bn'.format(args.data)
# elif args.model == 'mobilenet':
# model_name = '{}_mobilenet'.format(args.data)
# elif args.model == 'resnet':
# model_name = '{}_resnet56'.format(args.data)
# elif args.model == 'wideresnet':
# model_name = '{}_wideresnet'.format(args.data)
# 用数据集 + 结构名作为model_name
if args.model == 'resnet18':
model_name = '{}_resnet18'.format(args.data)
elif args.model == 'resnet50':
model_name = '{}_resnet50'.format(args.data)
elif args.model == 'resnet152':
model_name = '{}_resnet152'.format(args.data)
elif args.model == 'mobilenetv2':
model_name = '{}_mobilenetv2'.format(args.data)
if args.mode == 'shadow':
cnn_model, cnn_params = normal.load_model(args, ori_model_path+'/shadow', model_name, epoch=args.epochs)
elif args.mode == 'target':
cnn_model, cnn_params = normal.load_model(args, ori_model_path+'/target', model_name, epoch=args.epochs)
MODEL = cnn_model.to(device)
# params['task']中记录用什么数据集
dataset = utils.get_dataset(cnn_params['task'], mode=args.mode, aug=True, batch_size=384)
if args.mode == 'target':
print('load target_dataset ... ')
train_loader = dataset.aug_target_train_loader
test_loader = dataset.aug_target_test_loader
elif args.mode == 'shadow':
print('load shadow_dataset ... ')
train_loader = dataset.aug_shadow_train_loader
test_loader = dataset.aug_shadow_test_loader
model_top1 = None
model_loss = None
orginal_labels = None
predicted_labels = None
predicted_status = None
member_status = None
def normalization(data):
_range = np.max(data) - np.min(data)
return (data - np.min(data)) / _range
MODEL.eval()
for loader_idx, data_loader in enumerate([train_loader, test_loader]):
top1 = DATA.AverageMeter()
#ori_idx 是一个形状为 [batch_size] 的张量,表示一个批次中每个样本在原始数据集中的索引(即该样本在整个数据集中的编号)
for data_idx, (data, target, ori_idx) in enumerate(data_loader):
# distill model的output与label得到的loss trajectory 按一列列的形式组织为一个tensor
batch_trajectory = get_trajectory(data, target, args, ori_model_path, device)
data, target = data.to(device), target.to(device)
batch_logit_target = MODEL(data)
# 每行的最大值 (predict label)
_, batch_predict_label = batch_logit_target.max(1)
batch_predicted_label = batch_predict_label.long().cpu().detach().numpy()
batch_original_label = target.long().cpu().detach().numpy()
# 相当于最后一次loss,可以拼接到loss trajectory末尾
batch_loss_target = [F.cross_entropy(batch_logit_target_i.unsqueeze(0), target_i.unsqueeze(0)) for (batch_logit_target_i, target_i) in zip(batch_logit_target, target)]
batch_loss_target = np.array([batch_loss_target_i.cpu().detach().numpy() for batch_loss_target_i in batch_loss_target])
# (batch_size,) 布尔张量,其中第i个元素表示模型对第i个样本预测的最大值是否与真实标签相等
batch_predicted_status = (torch.argmax(batch_logit_target, dim=1) == target).float().cpu().detach().numpy()
# (batch_size) => (batch_size,1) 方便后续concat
batch_predicted_status = np.expand_dims(batch_predicted_status, axis=1)
# in / out label for attack model
member = np.repeat(np.array(int(1 - loader_idx)), batch_trajectory.shape[0], 0)
batch_loss_ori = batch_loss_target
# 直接赋值或concat
model_loss_ori = batch_loss_ori if loader_idx == 0 and data_idx == 0 else np.concatenate((model_loss_ori, batch_loss_ori), axis=0)
model_trajectory = batch_trajectory if loader_idx == 0 and data_idx == 0 else np.concatenate((model_trajectory, batch_trajectory), axis=0)
original_labels = batch_original_label if loader_idx == 0 and data_idx == 0 else np.concatenate((original_labels, batch_original_label), axis=0)
predicted_labels = batch_predicted_label if loader_idx == 0 and data_idx == 0 else np.concatenate((predicted_labels, batch_predicted_label), axis=0)
predicted_status = batch_predicted_status if loader_idx == 0 and data_idx == 0 else np.concatenate((predicted_status, batch_predicted_status), axis=0)
member_status = member if loader_idx == 0 and data_idx == 0 else np.concatenate((member_status, member), axis=0)
print(f'------------Loading trajectory {args.mode} dataset successfully!---------')
data = {
'model_loss_ori':model_loss_ori,
'model_trajectory':model_trajectory,
'original_labels':original_labels,
'predicted_labels':predicted_labels,
'predicted_status':predicted_status,
'member_status':member_status,
'nb_classes':dataset.num_classes
}
# target model和shadow model的分别作为trajector_train_data, trajectory_test_data保存
dataset_type = 'trajectory_train_data' if args.mode == 'shadow' else 'trajectory_test_data'
utils.create_path(ori_model_path + f'/{args.mode}/{model_name}')
np.save(ori_model_path + f'/{args.mode}/{model_name}/{dataset_type}', data)
def trajectory_black_box_membership_inference_attack(args, models_path, device='cpu'):
# if args.model == 'vgg':
# model_name = '{}_vgg16bn'.format(args.data)
# elif args.model == 'mobilenet':
# model_name = '{}_mobilenet'.format(args.data)
# elif args.model == 'resnet':
# model_name = '{}_resnet56'.format(args.data)
# elif args.model == 'wideresnet':
# model_name = '{}_wideresnet'.format(args.data)
if args.model == 'resnet18':
model_name = '{}_resnet18'.format(args.data)
elif args.model == 'resnet50':
model_name = '{}_resnet50'.format(args.data)
elif args.model == 'resnet152':
model_name = '{}_resnet152'.format(args.data)
elif args.model == 'mobilenetv2':
model_name = '{}_mobilenetv2'.format(args.data)
print(f"MODEL NAME IS :{model_name}")
# if args.model_distill == 'vgg':
# model_distill_name = '{}_vgg16bn'.format(args.data)
# elif args.model_distill == 'mobilenet':
# model_distill_name = '{}_mobilenet'.format(args.data)
# elif args.model_distill == 'resnet':
# model_distill_name = '{}_resnet56'.format(args.data)
# elif args.model_distill == 'wideresnet':
# model_disltill_name = '{}_wideresnet'.format(args.data)
if args.model_distill == 'resnet18':
model_distill_name = '{}_resnet18'.format(args.data)
elif args.model_distill == 'resnet50':
model_distill_name = '{}_resnet50'.format(args.data)
elif args.model_distill == 'resnet152':
model_distill_name = '{}_resnet152'.format(args.data)
elif args.model_distill == 'mobilenetv2':
model_distill_name = '{}_mobilenetv2'.format(args.data)
print(f"MODEL DISTILL NAME IS :{model_distill_name}")
cnn = model_name
print(f'------------------model: {model_name}-------------------')
orgin_model_name = model_name
save_path = models_path + '/attack/' + model_name
utils.create_path(save_path)
best_prec1 = 0.0
best_auc = 0.0
AttackModelTrainSet = np.load(models_path + f'/shadow/{model_name}/trajectory_train_data.npy', allow_pickle=True).item()
AttackModelTestSet = np.load(models_path + f'/target/{model_name}/trajectory_test_data.npy', allow_pickle=True).item()
train_set = torch.utils.data.TensorDataset(
torch.from_numpy(np.array(AttackModelTrainSet['model_loss_ori'], dtype='f')),
torch.from_numpy(np.array(AttackModelTrainSet['model_trajectory'], dtype='f')),
# 转换为one-hot编码
torch.from_numpy(np.array(check_and_transform_label_format(AttackModelTrainSet['original_labels'], nb_classes=AttackModelTrainSet['nb_classes'], return_one_hot=True))).type(torch.float),
torch.from_numpy(np.array(check_and_transform_label_format(AttackModelTrainSet['predicted_labels'], nb_classes=AttackModelTrainSet['nb_classes'], return_one_hot=True))).type(torch.long),
# train/test i.e. in or out
torch.from_numpy(np.array(check_and_transform_label_format(AttackModelTrainSet['predicted_status'], nb_classes=2, return_one_hot=True)[:,:2])).type(torch.long),
torch.from_numpy(np.array(AttackModelTrainSet['member_status'])).type(torch.long),)
test_set = torch.utils.data.TensorDataset(
torch.from_numpy(np.array(AttackModelTestSet['model_loss_ori'], dtype='f')),
torch.from_numpy(np.array(AttackModelTestSet['model_trajectory'], dtype='f')),
torch.from_numpy(np.array(check_and_transform_label_format(AttackModelTestSet['original_labels'], nb_classes=AttackModelTestSet['nb_classes'], return_one_hot=True))).type(torch.float),
torch.from_numpy(np.array(check_and_transform_label_format(AttackModelTestSet['predicted_labels'], nb_classes=AttackModelTestSet['nb_classes'], return_one_hot=True))).type(torch.long),
# train/test i.e. in or out
torch.from_numpy(np.array(check_and_transform_label_format(AttackModelTestSet['predicted_status'], nb_classes=2, return_one_hot=True)[:,:2])).type(torch.long),
torch.from_numpy(np.array(AttackModelTestSet['member_status'])).type(torch.long),)
attack_train_loader = torch.utils.data.DataLoader(train_set, batch_size=128, shuffle=True)
attack_test_loader = torch.utils.data.DataLoader(test_set, batch_size=128, shuffle=True)
print(f'-------------------"Loss Trajectory"------------------')
# 训练Attack Model
attack_model = MLP_BLACKBOX(dim_in = args.epochs_distill + 1)
attack_optimizer = torch.optim.SGD(attack_model.parameters(), lr=0.01, momentum=0.9, weight_decay=0.0001)
attack_model = attack_model.to(device)
loss_fn = nn.CrossEntropyLoss()
max_auc = 0
max_acc = 0
for epoch in range(100):
train_loss, train_prec1 = train_mia_attack_model(args, epoch, attack_model, attack_train_loader, attack_optimizer, loss_fn, device)
val_loss, val_prec1, val_auc, max_auc, max_acc = test_mia_attack_model(args, epoch, attack_model, attack_test_loader, loss_fn, max_auc, max_acc, device)
is_best_prec1 = val_prec1 > best_prec1
is_best_auc = val_auc > best_auc
if is_best_prec1:
best_prec1 = val_prec1
if is_best_auc:
best_auc = val_auc
if epoch % 10 == 0:
print(('epoch:{} \t train_loss:{:.4f} \t test_loss:{:.4f} \t train_prec1:{:.4f} \t test_prec1:{:.4f} \t val_prec1:{:.4f} \t val_auc:{:.4f}')
.format(epoch, train_loss, val_loss,
train_prec1, val_prec1, val_prec1, val_auc))
print('Max AUC: ', max_auc)
print('Max ACC: ', max_acc/100)
# 这里可以改 仅存储is_best_auc or is_best_prec1的
torch.save(attack_model.state_dict(), save_path + '/' + 'trajectory' + '.pkl')
# 在 test_mia_attack_model存储的最优的数据
data_auc = np.load(f'./outputs/{args.data}_{args.model}_{args.model_distill}_trajectory_auc.npy', allow_pickle=True).item()
for i in range(len(data_auc['fpr'])):
if data_auc['fpr'][i] > 0.001:
print('TPR at 0.1% FPR: {:.1%}'.format(data_auc['tpr'][i-1]))
break
#
def get_trajectory(data, target, args, model_path, device='cpu'):
# if args.model_distill == 'vgg':
# model_name = '{}_vgg16bn'.format(args.data)
# elif args.model_distill == 'mobilenet':
# model_name = '{}_mobilenet'.format(args.data)
# elif args.model_distill == 'resnet':
# model_name = '{}_resnet56'.format(args.data)
# elif args.model_distill == 'wideresnet':
# model_name = '{}_wideresnet'.format(args.data)
if args.model_distill == 'resnet18':
model_name = '{}_resnet18'.format(args.data)
elif args.model_distill == 'resnet50':
model_name = '{}_resnet50'.format(args.data)
elif args.model_distill == 'resnet152':
model_name = '{}_resnet152'.format(args.data)
elif args.model_distill == 'mobilenetv2':
model_name = '{}_mobilenetv2'.format(args.data)
print(f"MODEL NAME IS :{model_name}")
trajectory = None
# 创建一个形状为 (data.shape[0], 1) 的 NumPy 数组 predicted_label,并将其初始化为 -1 data.shape[0]即batch_size
predicted_label = np.array([-1]).repeat(data.shape[0],0).reshape(data.shape[0],1)
for s in range(1):
trajectory_current = None
model_path_current = 'networks/{}'.format(s)
# 对每个distill epoch (是看的distill model对target的loss并记录成为loss_trajectory)
for i in range(1, args.epochs_distill+1):
# 通过load存储的distill model在各个epoch时候的权值参数来复现loss
if args.mode == 'shadow':
cnn_model_target, cnn_params_target = normal.load_model(args, model_path_current+'/distill_shadow', model_name, epoch=i)
elif args.mode == 'target':
cnn_model_target, cnn_params_target = normal.load_model(args, model_path_current+'/distill_target', model_name, epoch=i)
MODEL_target = cnn_model_target.to(device)
# data是一个包含batch个向量的list
data = data.to(device)
# label
target = target.to(device)
# 获取target model的输出
logit_target = MODEL_target(data)
# 看target model的output与label的loss (batch list中的各个数据分别算)
loss = [F.cross_entropy(logit_target_i.unsqueeze(0), target_i.unsqueeze(0)) for (logit_target_i, target_i) in zip(logit_target, target)]
# list -> nparray 一列的
loss = np.array([loss_i.detach().cpu().numpy() for loss_i in loss]).reshape(-1, 1)
# 按列拼接
trajectory_current = loss if i == 1 else np.concatenate((trajectory_current, loss), 1)
# 累加
trajectory = trajectory_current if s == 0 else trajectory + trajectory_current
return trajectory
\ No newline at end of file
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
import numpy as np
import utils
from module import *
import module
from global_var import GlobalVariables
# 定义 ResNet 模型
# 适用于Cifar10
class ResNet(nn.Module):
# TODO 需要调整接口
def __init__(self, args, params): # 这里将类别数设置为10
super(ResNet, self).__init__()
self.augment_training = params['augment_training']
self.train_func = utils.cnn_train
self.test_func = utils.cnn_test
# self.initialize_weights()
self.input_size = int(params['input_size'])
self.num_blocks = params['num_blocks']
self.block_type = params['block_type']
self.num_classes = int(params['num_classes'])
self.num_blocks = params['num_blocks']
self.augment_training = params['augment_training']
self.block_type = params['block_type']
self.train_func = utils.cnn_train
self.test_func = utils.cnn_test
if self.block_type == 'basic':
self.block = BasicBlock
elif self.block_type == 'bottle':
self.block = Bottleneck
self.inplanes = 16 # 因为 CIFAR-10 图片较小,所以开始时需要更少的通道数
GlobalVariables.SELF_INPLANES = self.inplanes
# print('resnet init:'+ str(GlobalVariables.SELF_INPLANES))
# 输入层
self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1,
bias=False)
self.bn1 = nn.BatchNorm2d(16)
self.relu = nn.ReLU()
# 残差层(4 个阶段,每个阶段包含 6n+2 个卷积层)
self.layer1 = MakeLayer_ResNet(self.block, 16, self.num_blocks[0])
self.layer2 = MakeLayer_ResNet(self.block, 32, self.num_blocks[1], stride=2)
self.layer3 = MakeLayer_ResNet(self.block, 64, self.num_blocks[2], stride=2)
self.layer4 = MakeLayer_ResNet(self.block, 128, self.num_blocks[3], stride=2)
# 分类层
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(128 * self.block.expansion, self.num_classes)
self.initialize_weights()
# preserve
self.augment_training = params['augment_training']
if 'distill' in args.mode:
self.train_func = utils.cnn_train_dis
else:
self.train_func = utils.cnn_train
self.test_func = utils.cnn_test
# # 参数初始化
# for m in self.modules():
# if isinstance(m, nn.Conv2d):
# nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
# elif isinstance(m, nn.BatchNorm2d):
# nn.init.constant_(m.weight, 1)
# nn.init.constant_(m.bias, 0)
def forward(self, x):
# 输入层
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
# 这里相比于imagenet的,少了一个maxpool,因为cifar10本身图片就小,如果再pool就太小了
# 残差层
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
# 分类层
x = self.avgpool(x) # 输出的尺寸为 B,C,1,1
x = x.view(x.size(0), -1)
x = self.fc(x)
# out = F.softmax(x,dim = 1) # 这里不softmax也行 影响不大
return x
def quantize(self, quant_type, num_bits=8, e_bits=3):
self.qconvbnrelu1 = QConvBNReLU(quant_type,self.conv1,self.bn1,qi=True,qo=True,num_bits=num_bits,e_bits=e_bits)
# 没有输入num_bits 需修改
self.layer1.quantize(quant_type=quant_type,num_bits=num_bits, e_bits=e_bits)
self.layer2.quantize(quant_type=quant_type,num_bits=num_bits, e_bits=e_bits)
self.layer3.quantize(quant_type=quant_type,num_bits=num_bits, e_bits=e_bits)
self.layer4.quantize(quant_type=quant_type,num_bits=num_bits, e_bits=e_bits)
self.qavgpool1 = QAdaptiveAvgPool2d(quant_type,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
self.qfc1 = QLinear(quant_type, self.fc,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
# self.qfc1 = QLinear(quant_type, self.fc,qi=True,qo=True,num_bits=num_bits,e_bits=e_bits)
def quantize_forward(self, x):
# for _, layer in self.quantize_layers.items():
# x = layer(x)
# out = F.softmax(x, dim=1)
# return out
x = self.qconvbnrelu1(x)
x = self.layer1.quantize_forward(x)
x = self.layer2.quantize_forward(x)
x = self.layer3.quantize_forward(x)
x = self.layer4.quantize_forward(x)
x = self.qavgpool1(x)
x = x.view(x.size(0), -1)
x = self.qfc1(x)
# out = F.softmax(x,dim = 1) # 这里不softmax也行 影响不大
return x
def freeze(self):
self.qconvbnrelu1.freeze() # 因为作为第一层是有qi的,所以freeze的时候无需再重新提供qi
qo = self.layer1.freeze(qinput = self.qconvbnrelu1.qo)
qo = self.layer2.freeze(qinput = qo)
qo = self.layer3.freeze(qinput = qo)
qo = self.layer4.freeze(qinput = qo)
self.qavgpool1.freeze(qi=qo)
self.qfc1.freeze(qi=self.qavgpool1.qo)
# self.qfc1.freeze()
def fakefreeze(self):
self.qconvbnrelu1.fakefreeze()
self.layer1.fakefreeze()
self.layer2.fakefreeze()
self.layer3.fakefreeze()
self.layer4.fakefreeze()
self.qfc1.fakefreeze()
def quantize_inference(self, x):
qx = self.qconvbnrelu1.qi.quantize_tensor(x)
qx = self.qconvbnrelu1.quantize_inference(qx)
qx = self.layer1.quantize_inference(qx)
qx = self.layer2.quantize_inference(qx)
qx = self.layer3.quantize_inference(qx)
qx = self.layer4.quantize_inference(qx)
qx = self.qavgpool1.quantize_inference(qx)
qx = qx.view(qx.size(0), -1)
qx = self.qfc1.quantize_inference(qx)
qx = self.qfc1.qo.dequantize_tensor(qx)
# out = F.softmax(qx,dim = 1) # 这里不softmax也行 影响不大
return qx
def initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
elif isinstance(m, nn.Linear):
m.weight.data.normal_(0, 0.01)
m.bias.data.zero_()
# BasicBlock 类
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
# 第一个卷积层
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride,
padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
# 第二个卷积层
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1,
padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
# shortcut
self.relu = nn.ReLU()
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
identity = self.downsample(identity)
out += identity
out = self.relu(out)
return out
def quantize(self, quant_type ,num_bits=8, e_bits=3):
self.qconvbnrelu1 = QConvBNReLU(quant_type,self.conv1,self.bn1,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
self.qconvbn1 = QConvBN(quant_type,self.conv2,self.bn2,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
if self.downsample is not None:
self.qconvbn2 = QConvBN(quant_type,self.downsample[0],self.downsample[1],qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
self.qelementadd = QElementwiseAdd(quant_type,qi0=False, qi1=False, qo=True,num_bits=num_bits,e_bits=e_bits)
self.qrelu1 = QReLU(quant_type,qi= False,num_bits=num_bits,e_bits=e_bits) # 需要qi
def quantize_forward(self, x):
identity = x
out = self.qconvbnrelu1(x)
out = self.qconvbn1(out)
if self.downsample is not None:
identity = self.qconvbn2(identity)
# residual add
# out = identity + out # 这里是需要写一个elementwiseadd的变换的,待后续修改
out = self.qelementadd(out,identity)
out = self.qrelu1(out)
return out
def freeze(self, qinput):
# 这里的qconvbnrelu1其实是可以用前一层的qo的,但感觉不太好传参,就没用
# 还需仔细检查
self.qconvbnrelu1.freeze(qi= qinput) # 需要接前一个module的最后一个qo
self.qconvbn1.freeze(qi = self.qconvbnrelu1.qo)
if self.downsample is not None:
self.qconvbn2.freeze(qi = qinput) # 一条支路
self.qelementadd.freeze(qi0 = self.qconvbn1.qo, qi1 = self.qconvbn2.qo)
else:
self.qelementadd.freeze(qi0 = self.qconvbn1.qo, qi1 = qinput)
# 这里或许需要补充个层来处理elementwise add
self.qrelu1.freeze(qi = self.qelementadd.qo)
return self.qrelu1.qi # relu后的qo可用relu统计的qi
def fakefreeze(self):
# 这里的qconvbnrelu1其实是可以用前一层的qo的,但感觉不太好传参,就没用
# 还需仔细检查
self.qconvbnrelu1.fakefreeze() # 需要接前一个module的最后一个qo
self.qconvbn1.fakefreeze()
if self.downsample is not None:
self.qconvbn2.fakefreeze() # 一条支路
def quantize_inference(self, x):
# 感觉是不需要进行初始的quantize_tensor和dequantize_tensor,因为他不是最前/后一层,只要中间的每层都在量化后的领域内,就不需要这种处理。
identity = x
out = self.qconvbnrelu1.quantize_inference(x)
out = self.qconvbn1.quantize_inference(out)
if self.downsample is not None:
identity = self.qconvbn2.quantize_inference(identity)
# out = identity + out # 这里可能需要写一个elementwiseadd的变换的,待后续修改
out = self.qelementadd.quantize_inference(out,identity)
out = self.qrelu1.quantize_inference(out)
return out
# Bottleneck 类
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
# 1x1 卷积层
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
# 3x3 卷积层
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
# 1x1 卷积层
self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
bias=False)
self.bn3 = nn.BatchNorm2d(planes * self.expansion)
# shortcut
self.relu = nn.ReLU()
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity # 相加是在这里处理的
out = self.relu(out)
return out
def quantize(self, quant_type ,num_bits=8, e_bits=3):
self.qconvbnrelu1 = QConvBNReLU(quant_type,self.conv1,self.bn1,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
self.qconvbnrelu2 = QConvBNReLU(quant_type,self.conv2,self.bn2,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
self.qconvbn1 = QConvBN(quant_type,self.conv3,self.bn3,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
if self.downsample is not None:
self.qconvbn2 = QConvBN(quant_type,self.downsample[0],self.downsample[1],qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
self.qelementadd = QElementwiseAdd(quant_type,qi0=False, qi1=False, qo=True,num_bits=num_bits,e_bits=e_bits)
self.qrelu1 = QReLU(quant_type,qi= False,num_bits=num_bits,e_bits=e_bits) # 需要qi
def quantize_forward(self, x):
identity = x
out = self.qconvbnrelu1(x)
out = self.qconvbnrelu2(out)
out = self.qconvbn1(out)
if self.downsample is not None:
identity = self.qconvbn2(identity)
# residual add
# out = identity + out # 这里是需要写一个elementwiseadd的变换的,待后续修改
out = self.qelementadd(out,identity)
out = self.qrelu1(out)
return out
def freeze(self, qinput):
# 这里的qconvbnrelu1其实是可以用前一层的qo的,但感觉不太好传参,就没用
# 还需仔细检查
self.qconvbnrelu1.freeze(qi= qinput) # 需要接前一个module的最后一个qo
self.qconvbnrelu2.freeze(qi=self.qconvbnrelu1.qo)
self.qconvbn1.freeze(qi = self.qconvbnrelu2.qo)
if self.downsample is not None:
self.qconvbn2.freeze(qi = qinput) # 一条支路
self.qelementadd.freeze(qi0 = self.qconvbn1.qo, qi1 = self.qconvbn2.qo)
else:
self.qelementadd.freeze(qi0 = self.qconvbn1.qo, qi1 = qinput)
# 这里或许需要补充个层来处理elementwise add
self.qrelu1.freeze(qi = self.qelementadd.qo) # 需要自己统计qi
return self.qrelu1.qi # relu后的qo可用relu统计的qi
def fakefreeze(self):
# 这里的qconvbnrelu1其实是可以用前一层的qo的,但感觉不太好传参,就没用
# 还需仔细检查
self.qconvbnrelu1.fakefreeze()
self.qconvbnrelu2.fakefreeze()
self.qconvbn1.fakefreeze()
if self.downsample is not None:
self.qconvbn2.fakefreeze() # 一条支路
def quantize_inference(self, x):
# 感觉是不需要进行初始的quantize_tensor和dequantize_tensor,因为他不是最前/后一层,只要中间的每层都在量化后的领域内,就不需要这种处理。
identity = x
out = self.qconvbnrelu1.quantize_inference(x)
out = self.qconvbnrelu2.quantize_inference(out)
out = self.qconvbn1.quantize_inference(out)
if self.downsample is not None:
identity = self.qconvbn2.quantize_inference(identity)
# out = identity + out # 这里可能需要写一个elementwiseadd的变换的,待后续修改
out = self.qelementadd.quantize_inference(out,identity)
out = self.qrelu1.quantize_inference(out)
return out
class MakeLayer_ResNet(nn.Module):
def __init__(self, block, planes, blocks, stride=1):
super(MakeLayer_ResNet, self).__init__()
# print('makelayer init:'+ str(GlobalVariables.SELF_INPLANES))
self.downsample = None
if stride != 1 or GlobalVariables.SELF_INPLANES != planes * block.expansion:
self.downsample = nn.Sequential(
nn.Conv2d(GlobalVariables.SELF_INPLANES, planes * block.expansion,kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion)
)
self.blockdict = nn.ModuleDict()
self.blockdict['block1'] = block(inplanes=GlobalVariables.SELF_INPLANES, planes=planes, stride=stride, downsample=self.downsample)
GlobalVariables.SELF_INPLANES = planes * block.expansion
for i in range(1, blocks): # block的个数 这里只能用字典了
self.blockdict['block' + str(i+1)] = block(inplanes=GlobalVariables.SELF_INPLANES, planes=planes) # 此处进行实例化了
def forward(self,x):
for _, layer in self.blockdict.items():
x = layer(x)
return x
def quantize(self, quant_type, num_bits=8, e_bits=3):
# 需检查
for _, layer in self.blockdict.items():
layer.quantize(quant_type=quant_type,num_bits=num_bits,e_bits=e_bits) # 这里是因为每一块都是block,而block中有具体的quantize策略, n_exp和mode已经在__init__中赋值了
def quantize_forward(self, x):
for _, layer in self.blockdict.items():
x = layer.quantize_forward(x) # 各个block中有具体的quantize_forward
return x
def freeze(self, qinput): # 需要在 Module Resnet的freeze里传出来
# 这里的qconvbnrelu1其实是可以用前一层的qo的,但感觉不太好传参,就没用
# 还需仔细检查
cnt = 0
for _, layer in self.blockdict.items():
if cnt == 0:
qo = layer.freeze(qinput = qinput)
cnt = 1
else:
qo = layer.freeze(qinput = qo) # 各个block中有具体的freeze
return qo # 供后续的层用
def fakefreeze(self):
for _, layer in self.blockdict.items():
layer.fakefreeze()
def quantize_inference(self, x):
# 感觉是不需要进行初始的quantize_tensor和dequantize_tensor,因为他不是最前/后一层,只要中间的每层都在量化后的领域内,就不需要这种处理。
for _, layer in self.blockdict.items():
x = layer.quantize_inference(x) # 每个block中有具体的quantize_inference
return x
# 使用 ResNet18 模型
def resnet18(args, model_params):
model = ResNet(args, model_params)
return model
# 使用 ResNet50 模型
def resnet50(args, model_params):
model = ResNet(args, model_params)
return model
# 使用 ResNet152 模型
def resnet152(args, model_params):
model = ResNet(args, model_params)
return model
class MobileNetV2(nn.Module):
def __init__(self, args,params):
super(MobileNetV2, self).__init__()
self.num_classes = int(params['num_classes'])
self.augment_training = params['augment_training']
self.input_size = int(params['input_size'])
if 'distill' in args.mode:
self.train_func = utils.cnn_train_dis
else:
self.train_func = utils.cnn_train
self.test_func = utils.cnn_test
self.conv1 = nn.Conv2d(3, 32, 3, stride=1, padding=1)
self.bn1 = nn.BatchNorm2d(32)
self.relu1 = nn.ReLU6(inplace=True)
# Bottleneck 层次, t指channel扩充系数
self.layer1 = MakeLayer_MobileNet(32, 16, 1, t=1, stride=1)
self.layer2 = MakeLayer_MobileNet(16, 24, 2, t=6, stride=2)
self.layer3 = MakeLayer_MobileNet(24, 32, 3, t=6, stride=2)
# 根据CIFAR-10图像大小调整层数
self.layer4 = MakeLayer_MobileNet(32, 96, 3, t=6, stride=1)
self.layer5 = MakeLayer_MobileNet(96, 160, 3, t=6, stride=2)
self.layer6 = MakeLayer_MobileNet(160, 320, 1, t=6, stride=1)
self.conv2 = nn.Conv2d(320, 1280, 1)
self.avg1 = nn.AdaptiveAvgPool2d(1)
self.fc = nn.Linear(1280, self.num_classes)
def forward(self, x):
# x = self.layers(x)
x = self.conv1(x)
x = self.bn1(x)
x = self.relu1(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.layer5(x)
x = self.layer6(x)
x = self.conv2(x)
x = self.avg1(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
def quantize(self, quant_type, num_bits=8, e_bits=3):
self.qconvbnrelu1 = QConvBNReLU6(quant_type,self.conv1,self.bn1,qi=True,qo=True,num_bits=num_bits,e_bits=e_bits)
# 没有输入num_bits 需修改
self.layer1.quantize(quant_type=quant_type,num_bits=num_bits, e_bits=e_bits)
self.layer2.quantize(quant_type=quant_type,num_bits=num_bits, e_bits=e_bits)
self.layer3.quantize(quant_type=quant_type,num_bits=num_bits, e_bits=e_bits)
self.layer4.quantize(quant_type=quant_type,num_bits=num_bits, e_bits=e_bits)
self.layer5.quantize(quant_type=quant_type,num_bits=num_bits, e_bits=e_bits)
self.layer6.quantize(quant_type=quant_type,num_bits=num_bits, e_bits=e_bits)
self.qconv1 = QConv2d(quant_type, self.conv2, qi=False, qo=True, num_bits=num_bits, e_bits=e_bits)
self.qavgpool1 = QAdaptiveAvgPool2d(quant_type,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
self.qfc1 = QLinear(quant_type, self.fc,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
def quantize_forward(self, x):
# for _, layer in self.quantize_layers.items():
# x = layer(x)
# out = F.softmax(x, dim=1)
# return out
x = self.qconvbnrelu1(x)
x = self.layer1.quantize_forward(x)
x = self.layer2.quantize_forward(x)
x = self.layer3.quantize_forward(x)
x = self.layer4.quantize_forward(x)
x = self.layer5.quantize_forward(x)
x = self.layer6.quantize_forward(x)
x = self.qconv1(x)
x = self.qavgpool1(x)
x = x.view(x.size(0), -1)
x = self.qfc1(x)
# out = F.softmax(x,dim = 1) # 这里不softmax也行 影响不大
return x
def freeze(self):
self.qconvbnrelu1.freeze() # 因为作为第一层是有qi的,所以freeze的时候无需再重新提供qi
qo = self.layer1.freeze(qinput = self.qconvbnrelu1.qo)
qo = self.layer2.freeze(qinput = qo)
qo = self.layer3.freeze(qinput = qo)
qo = self.layer4.freeze(qinput = qo)
qo = self.layer5.freeze(qinput = qo)
qo = self.layer6.freeze(qinput = qo)
self.qconv1.freeze(qi = qo)
self.qavgpool1.freeze(qi=self.qconv1.qo)
self.qfc1.freeze(qi=self.qavgpool1.qo)
# self.qfc1.freeze()
def fakefreeze(self):
self.qconvbnrelu1.fakefreeze()
self.layer1.fakefreeze()
self.layer2.fakefreeze()
self.layer3.fakefreeze()
self.layer4.fakefreeze()
self.layer5.fakefreeze()
self.layer6.fakefreeze()
self.qconv1.fakefreeze()
self.qfc1.fakefreeze()
def quantize_inference(self, x):
qx = self.qconvbnrelu1.qi.quantize_tensor(x)
qx = self.qconvbnrelu1.quantize_inference(qx)
qx = self.layer1.quantize_inference(qx)
qx = self.layer2.quantize_inference(qx)
qx = self.layer3.quantize_inference(qx)
qx = self.layer4.quantize_inference(qx)
qx = self.layer5.quantize_inference(qx)
qx = self.layer6.quantize_inference(qx)
qx = self.qconv1.quantize_inference(qx)
qx = self.qavgpool1.quantize_inference(qx)
qx = qx.view(qx.size(0), -1)
qx = self.qfc1.quantize_inference(qx)
qx = self.qfc1.qo.dequantize_tensor(qx)
# out = F.softmax(qx,dim = 1) # 这里不softmax也行 影响不大
return qx
class InvertedResidual(nn.Module):
def __init__(self, in_channels, out_channels, stride, expand_ratio):
super(InvertedResidual, self).__init__()
hidden_dims = int(in_channels * expand_ratio)
self.identity_flag = stride == 1 and in_channels == out_channels
# self.bottleneck = nn.Sequential(
# # Pointwise Convolution
# nn.Conv2d(in_channels, hidden_dims, 1),
# nn.BatchNorm2d(hidden_dims),
# nn.ReLU6(inplace=True),
# # Depthwise Convolution
# nn.Conv2d(hidden_dims, hidden_dims, 3, stride=stride, padding=1, groups=hidden_dims),
# nn.BatchNorm2d(hidden_dims),
# nn.ReLU6(inplace=True),
# # Pointwise & Linear Convolution
# nn.Conv2d(hidden_dims, out_channels, 1),
# nn.BatchNorm2d(out_channels),
# )
self.conv1 = nn.Conv2d(in_channels, hidden_dims, 1)
self.bn1 = nn.BatchNorm2d(hidden_dims)
self.relu1 = nn.ReLU6(inplace=True)
# Depthwise Convolution
self.conv2 = nn.Conv2d(hidden_dims, hidden_dims, 3, stride=stride, padding=1, groups=hidden_dims)
self.bn2 = nn.BatchNorm2d(hidden_dims)
self.relu2 = nn.ReLU6(inplace=True)
# Pointwise & Linear Convolution
self.conv3 = nn.Conv2d(hidden_dims, out_channels, 1)
self.bn3 = nn.BatchNorm2d(out_channels)
def forward(self, x):
# if self.identity_flag:
# return x + self.bottleneck(x)
# else:
# return self.bottleneck(x)
identity = x
x = self.conv1(x)
x = self.bn1(x)
x = self.relu1(x)
x = self.conv2(x)
x = self.bn2(x)
x = self.relu2(x)
x = self.conv3(x)
x = self.bn3(x)
if self.identity_flag:
return identity + x
else:
return x
def quantize(self, quant_type ,num_bits=8, e_bits=3):
self.qconvbnrelu1 = QConvBNReLU6(quant_type,self.conv1,self.bn1,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
self.qconvbnrelu2 = QConvBNReLU6(quant_type,self.conv2,self.bn2,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
self.qconvbn1 = QConvBN(quant_type,self.conv3,self.bn3,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
self.qelementadd = QElementwiseAdd(quant_type,qi0=False, qi1=False, qo=True,num_bits=num_bits,e_bits=e_bits)
def quantize_forward(self, x):
identity = x
out = self.qconvbnrelu1(x)
out = self.qconvbnrelu2(out)
out = self.qconvbn1(out)
if self.identity_flag:
out = self.qelementadd(out, identity)
return out
def freeze(self, qinput):
# 这里的qconvbnrelu1其实是可以用前一层的qo的,但感觉不太好传参,就没用
# 还需仔细检查
self.qconvbnrelu1.freeze(qi= qinput) # 需要接前一个module的最后一个qo
self.qconvbnrelu2.freeze(qi=self.qconvbnrelu1.qo)
self.qconvbn1.freeze(qi = self.qconvbnrelu2.qo)
if self.identity_flag:
self.qelementadd.freeze(qi0 = self.qconvbn1.qo, qi1 = qinput)
return self.qelementadd.qo
else:
return self.qconvbn1.qo
def fakefreeze(self):
self.qconvbnrelu1.fakefreeze()
self.qconvbnrelu2.fakefreeze()
self.qconvbn1.fakefreeze()
def quantize_inference(self, x):
# 感觉是不需要进行初始的quantize_tensor和dequantize_tensor,因为他不是最前/后一层,只要中间的每层都在量化后的领域内,就不需要这种处理。
identity = x
out = self.qconvbnrelu1.quantize_inference(x)
out = self.qconvbnrelu2.quantize_inference(out)
out = self.qconvbn1.quantize_inference(out)
if self.identity_flag:
out = self.qelementadd.quantize_inference(out, identity)
return out
class MakeLayer_MobileNet(nn.Module):
# def _make_bottleneck(self, in_channels, out_channels, n_repeat, t, stride):
# layers = []
# for i in range(n_repeat):
# if i == 0:
# layers.append(InvertedResidual(in_channels, out_channels, stride, t))
# else:
# layers.append(InvertedResidual(in_channels, out_channels, 1, t))
# in_channels = out_channels
# return nn.Sequential(*layers)
def __init__(self, in_channels, out_channels, n_repeat, t, stride):
super(MakeLayer_MobileNet, self).__init__()
# print('makelayer init:'+ str(GlobalVariables.SELF_INPLANES))
self.layers = nn.ModuleList()
for i in range(n_repeat):
if i == 0:
self.layers.append(InvertedResidual(in_channels, out_channels, stride, t))
else:
self.layers.append(InvertedResidual(in_channels, out_channels, 1, t))
in_channels = out_channels
# for l in self.layers:
# print(l)
def forward(self,x):
for layer in self.layers:
x = layer(x)
return x
def quantize(self, quant_type, num_bits=8, e_bits=3):
# 需检查
# print('CHECK======')
for layer in self.layers:
layer.quantize(quant_type=quant_type,num_bits=num_bits,e_bits=e_bits) # 这里是因为每一块都是block,而block中有具体的quantize策略, n_exp和mode已经在__init__中赋值了
# print(layer)
# print('CHECK======')
def quantize_forward(self, x):
for layer in self.layers:
x = layer.quantize_forward(x) # 各个block中有具体的quantize_forward
return x
def freeze(self, qinput): # 需要在 Module Resnet的freeze里传出来
# 这里的qconvbnrelu1其实是可以用前一层的qo的,但感觉不太好传参,就没用
# 还需仔细检查
cnt = 0
for layer in self.layers:
if cnt == 0:
qo = layer.freeze(qinput = qinput)
cnt = 1
else:
qo = layer.freeze(qinput = qo) # 各个block中有具体的freeze
return qo # 供后续的层用
def fakefreeze(self):
for layer in self.layers:
layer.fakefreeze()
def quantize_inference(self, x):
# 感觉是不需要进行初始的quantize_tensor和dequantize_tensor,因为他不是最前/后一层,只要中间的每层都在量化后的领域内,就不需要这种处理。
for layer in self.layers:
x = layer.quantize_inference(x) # 每个block中有具体的quantize_inference
return x
\ No newline at end of file
import torch
import os
from torchvision import datasets, transforms, utils
from torch.utils.data import sampler
from PIL import Image
from torch.utils.data import Subset, DataLoader, ConcatDataset
import torch.utils.data as data
from torch._utils import _accumulate
from torch import randperm
import numpy as np
import pandas as pd
def dataset_split(dataset, lengths):
if sum(lengths) != len(dataset):
raise ValueError("Sum of input lengths does not equal the length of the input dataset!")
indices = list(range(sum(lengths)))
np.random.seed(1)
np.random.shuffle(indices)
return [Subset(dataset, indices[offset - length:offset]) for offset, length in zip(_accumulate(lengths), lengths)]
return all_data
class GTSRB_ORI(data.Dataset):
base_folder = 'GTSRB'
def __init__(self, root_dir, train=False, transform=None):
self.root_dir = root_dir
self.sub_directory = 'trainingset' if train else 'testset'
self.csv_file_name = 'training.csv' if train else 'test.csv'
csv_file_path = os.path.join(
root_dir, self.base_folder, self.sub_directory, self.csv_file_name)
self.csv_data = pd.read_csv(csv_file_path)
self.transform = transform
def __len__(self):
return len(self.csv_data)
def __getitem__(self, idx):
img_path = os.path.join(self.root_dir, self.base_folder, self.sub_directory,
self.csv_data.iloc[idx, 0])
img = Image.open(img_path)
classId = self.csv_data.iloc[idx, 1]
if self.transform is not None:
img = self.transform(img)
return img, classId
class SUBGTSRB(data.Dataset):
def __init__(self, mode, aug, train):
self.img_size = 32
self.num_classes = 43
self.mean = [0.3403, 0.3121, 0.3214]
self.std = [0.2724, 0.2608, 0.2669]
normalize = transforms.Normalize(mean=self.mean, std=self.std)
self.augmented = transforms.Compose([transforms.Resize((32,32)), transforms.ToTensor(), normalize])
self.normalized = transforms.Compose([transforms.ToTensor(), normalize])
self.aug_trainset = GTSRB_ORI(root_dir='/home/c01yili/datasets/common_dataset', train=True, transform=self.augmented)
self.aug_testset = GTSRB_ORI(root_dir='/home/c01yili/datasets/common_dataset', train=False, transform=self.augmented)
self.trainset = GTSRB_ORI(root_dir='/home/c01yili/datasets/common_dataset', train=True, transform=self.normalized)
self.testset = GTSRB_ORI(root_dir='/home/c01yili/datasets/common_dataset', train=False, transform=self.normalized)
self.aug_dataset = ConcatDataset([self.aug_trainset, self.aug_testset])
self.dataset = ConcatDataset([self.trainset, self.testset])
self.aug_target_trainset, self.aug_target_testset, self.aug_shadow_trainset, self.aug_shadow_testset, self.aug_distill_trainset = dataset_split(self.aug_dataset, [1500, 1500, 1500, 1500, 45838])
self.aug_distill_testset = self.aug_shadow_testset
self.target_trainset, self.target_testset, self.shadow_trainset, self.shadow_testset, self.distill_trainset = dataset_split(self.dataset, [1500, 1500, 1500, 1500, 45838])
self.distill_testset = self.shadow_testset
if mode == 'target':
if aug:
if train:
self.dataset = self.aug_target_trainset
else:
self.dataset = self.aug_target_testset
else:
if train:
self.dataset = self.target_trainset
else:
self.dataset = self.target_testset
elif mode == 'shadow':
if aug:
if train:
self.dataset = self.aug_shadow_trainset
else:
self.dataset = self.aug_shadow_testset
else:
if train:
self.dataset = self.shadow_trainset
else:
self.dataset = self.shadow_testset
elif 'distill' in mode:
if aug:
if train:
self.dataset = self.aug_distill_trainset
else:
self.dataset = self.aug_distill_testset
else:
if train:
self.dataset = self.distill_trainset
else:
self.dataset = self.distill_testset
self.index = range(int(len(self.dataset)))
def __getitem__(self, idx):
return self.dataset[idx][0], self.dataset[idx][1], self.index[idx]
def __len__(self):
return len(self.index)
class GTSRB:
def __init__(self, mode, aug, batch_size=128):
self.batch_size = batch_size
self.img_size = 32
self.num_classes = 43
if aug:
if mode == 'target':
self.aug_target_trainset = SUBGTSRB(mode, aug, True)
self.aug_target_train_loader = torch.utils.data.DataLoader(self.aug_target_trainset, batch_size=batch_size, shuffle=True, num_workers=1)
self.aug_target_testset = SUBGTSRB(mode, aug, False)
self.aug_target_test_loader = torch.utils.data.DataLoader(self.aug_target_testset, batch_size=batch_size, shuffle=True, num_workers=1)
elif mode == 'shadow':
self.aug_shadow_trainset = SUBGTSRB(mode, aug, True)
self.aug_shadow_train_loader = torch.utils.data.DataLoader(self.aug_shadow_trainset, batch_size=batch_size, shuffle=True, num_workers=1)
self.aug_shadow_testset = SUBGTSRB(mode, aug, False)
self.aug_shadow_test_loader = torch.utils.data.DataLoader(self.aug_shadow_testset, batch_size=batch_size, shuffle=True, num_workers=1)
elif 'distill' in mode:
self.aug_distill_trainset = SUBGTSRB(mode, aug, True)
self.aug_distill_train_loader = torch.utils.data.DataLoader(self.aug_distill_trainset, batch_size=batch_size, shuffle=True, num_workers=1)
self.aug_distill_testset = SUBGTSRB(mode, aug, False)
self.aug_distill_test_loader = torch.utils.data.DataLoader(self.aug_distill_testset, batch_size=batch_size, shuffle=True, num_workers=1)
else:
if mode == 'target':
self.target_trainset = SUBGTSRB(mode, aug, True)
self.target_train_loader = torch.utils.data.DataLoader(self.target_trainset, batch_size=batch_size, shuffle=True, num_workers=1)
self.target_testset = SUBGTSRB(mode, aug, False)
self.target_test_loader = torch.utils.data.DataLoader(self.target_testset, batch_size=batch_size, shuffle=True, num_workers=1)
elif mode == 'shadow':
self.shadow_trainset = SUBGTSRB(mode, aug, True)
self.shadow_train_loader = torch.utils.data.DataLoader(self.shadow_trainset, batch_size=batch_size, shuffle=True, num_workers=1)
self.shadow_testset = SUBGTSRB(mode, aug, False)
self.shadow_test_loader = torch.utils.data.DataLoader(self.shadow_testset, batch_size=batch_size, shuffle=True, num_workers=1)
elif 'distill' in mode:
self.distill_trainset = SUBGTSRB(mode, aug, True)
self.distill_train_loader = torch.utils.data.DataLoader(self.distill_trainset, batch_size=batch_size, shuffle=True, num_workers=1)
self.distill_testset = SUBGTSRB(mode, aug, False)
self.distill_test_loader = torch.utils.data.DataLoader(self.distill_testset, batch_size=batch_size, shuffle=True, num_workers=1)
class SUBCINIC10(data.Dataset):
def __init__(self, mode, aug, train):
self.img_size = 32
self.num_classes = 10
self.mean = [0.47889522, 0.47227842, 0.43047404]
self.std = [0.24205776, 0.23828046, 0.25874835]
normalize = transforms.Normalize(mean=self.mean, std=self.std)
self.augmented = transforms.Compose([transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4),transforms.ToTensor(), normalize])
self.normalized = transforms.Compose([transforms.ToTensor(), normalize])
# test set, val_set上是否还需要数据增强?
self.aug_trainset = datasets.ImageFolder(root='/lustre/I/mazhihong/data/CINIC-10/train', transform=self.augmented)
self.aug_testset = datasets.ImageFolder(root='/lustre/I/mazhihong/data/CINIC-10/test', transform=self.augmented)
self.aug_validset = datasets.ImageFolder(root='/lustre/I/mazhihong/data/CINIC-10/valid', transform=self.augmented)
self.trainset = datasets.ImageFolder(root='/lustre/I/mazhihong/data/CINIC-10/train', transform=self.normalized)
self.testset = datasets.ImageFolder(root='/lustre/I/mazhihong/data/CINIC-10/test', transform=self.normalized)
self.validset = datasets.ImageFolder(root='/lustre/I/mazhihong/data/CINIC-10/valid', transform=self.normalized)
self.aug_dataset = ConcatDataset([self.aug_trainset, self.aug_testset, self.aug_validset])
self.dataset = ConcatDataset([self.trainset, self.testset, self.validset])
self.aug_target_trainset, self.aug_target_testset, self.aug_shadow_trainset, self.aug_shadow_testset, self.aug_distill_trainset, self.aug_distill_testset = dataset_split(self.aug_dataset, [10000, 10000, 10000, 10000, 220000, 10000])
self.target_trainset, self.target_testset, self.shadow_trainset, self.shadow_testset, self.distill_trainset, self.distill_testset = dataset_split(self.dataset, [10000, 10000, 10000, 10000, 220000, 10000])
if mode == 'target':
if aug:
if train:
self.dataset = self.aug_target_trainset
else:
self.dataset = self.aug_target_testset
else:
if train:
self.dataset = self.target_trainset
else:
self.dataset = self.target_testset
elif mode == 'shadow':
if aug:
if train:
self.dataset = self.aug_shadow_trainset
else:
self.dataset = self.aug_shadow_testset
else:
if train:
self.dataset = self.shadow_trainset
else:
self.dataset = self.shadow_testset
elif 'distill' in mode:
if aug:
if train:
self.dataset = self.aug_distill_trainset
else:
self.dataset = self.aug_distill_testset
else:
if train:
self.dataset = self.distill_trainset
else:
self.dataset = self.distill_testset
self.index = range(int(len(self.dataset)))
def __getitem__(self, idx):
return self.dataset[idx][0], self.dataset[idx][1], self.index[idx]
def __len__(self):
return len(self.index)
class CINIC10:
def __init__(self, mode, aug, batch_size=128, add_trigger=False):
self.batch_size = batch_size
self.img_size = 32
self.num_classes = 10
if aug:
if mode == 'target':
self.aug_target_trainset = SUBCINIC10(mode, aug, True)
self.aug_target_train_loader = torch.utils.data.DataLoader(self.aug_target_trainset, batch_size=batch_size, shuffle=True, num_workers=2)
self.aug_target_testset = SUBCINIC10(mode, aug, False)
self.aug_target_test_loader = torch.utils.data.DataLoader(self.aug_target_testset, batch_size=batch_size, shuffle=True, num_workers=2)
elif mode == 'shadow':
self.aug_shadow_trainset = SUBCINIC10(mode, aug, True)
self.aug_shadow_train_loader = torch.utils.data.DataLoader(self.aug_shadow_trainset, batch_size=batch_size, shuffle=True, num_workers=2)
self.aug_shadow_testset = SUBCINIC10(mode, aug, False)
self.aug_shadow_test_loader = torch.utils.data.DataLoader(self.aug_shadow_testset, batch_size=batch_size, shuffle=True, num_workers=2)
elif 'distill' in mode:
self.aug_distill_trainset = SUBCINIC10(mode, aug, True)
self.aug_distill_train_loader = torch.utils.data.DataLoader(self.aug_distill_trainset, batch_size=batch_size, shuffle=True, num_workers=2)
self.aug_distill_testset = SUBCINIC10(mode, aug, False)
self.aug_distill_test_loader = torch.utils.data.DataLoader(self.aug_distill_testset, batch_size=batch_size, shuffle=True, num_workers=2)
else:
if mode == 'target':
self.target_trainset = SUBCINIC10(mode, aug, True)
self.target_train_loader = torch.utils.data.DataLoader(self.target_trainset, batch_size=batch_size, shuffle=True, num_workers=2)
self.target_testset = SUBCINIC10(mode, aug, False)
self.target_test_loader = torch.utils.data.DataLoader(self.target_testset, batch_size=batch_size, shuffle=True, num_workers=2)
elif mode == 'shadow':
self.shadow_trainset = SUBCINIC10(mode, aug, True)
self.shadow_train_loader = torch.utils.data.DataLoader(self.shadow_trainset, batch_size=batch_size, shuffle=True, num_workers=2)
self.shadow_testset = SUBCINIC10(mode, aug, False)
self.shadow_test_loader = torch.utils.data.DataLoader(self.shadow_testset, batch_size=batch_size, shuffle=True, num_workers=2)
elif 'distill' in mode:
self.distill_trainset = SUBCINIC10(mode, aug, True)
self.distill_train_loader = torch.utils.data.DataLoader(self.distill_trainset, batch_size=batch_size, shuffle=True, num_workers=2)
self.distill_testset = SUBCINIC10(mode, aug, False)
self.distill_test_loader = torch.utils.data.DataLoader(self.distill_testset, batch_size=batch_size, shuffle=True, num_workers=2)
class SUBCIFAR10(data.Dataset):
def __init__(self, mode, aug, train):
self.img_size = 32
self.num_classes = 10
self.num_test = 10000
self.num_train = 50000
self.mean = [0.485, 0.456, 0.406]
self.std = [0.229, 0.224, 0.225]
normalize = transforms.Normalize(mean=self.mean, std=self.std)
self.augmented = transforms.Compose([transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4),transforms.ToTensor(), normalize])
self.normalized = transforms.Compose([transforms.ToTensor(), normalize])
self.aug_trainset = datasets.CIFAR10(root='/lustre/I/mazhihong/data/CIFAR10', train=True, download=True, transform=self.augmented)
self.aug_testset = datasets.CIFAR10(root='/lustre/I/mazhihong/data/CIFAR10', train=False, download=True, transform=self.augmented)
self.trainset = datasets.CIFAR10(root='/lustre/I/mazhihong/data/CIFAR10', train=True, download=False, transform=self.normalized)
self.testset = datasets.CIFAR10(root='/lustre/I/mazhihong/data/CIFAR10', train=False, download=False, transform=self.normalized)
# 合并train,test (在MIA下,原始的train,test划分无意义,需自行重新构建)
self.aug_dataset = ConcatDataset([self.aug_trainset, self.aug_testset])
self.dataset = ConcatDataset([self.trainset, self.testset])
# 切分数据集
self.aug_target_trainset, self.aug_target_testset, self.aug_shadow_trainset, self.aug_shadow_testset, self.aug_distill_trainset = dataset_split(self.aug_dataset, [10000, 10000, 10000, 10000, 20000])
self.aug_distill_testset = self.aug_shadow_testset
self.target_trainset, self.target_testset, self.shadow_trainset, self.shadow_testset, self.distill_trainset = dataset_split(self.dataset, [10000, 10000, 10000, 10000, 20000])
self.distill_testset = self.shadow_testset
if mode == 'target':
if aug:
if train:
self.dataset = self.aug_target_trainset
else:
self.dataset = self.aug_target_testset
else:
if train:
self.dataset = self.target_trainset
else:
self.dataset = self.target_testset
elif mode == 'shadow':
if aug:
if train:
self.dataset = self.aug_shadow_trainset
else:
self.dataset = self.aug_shadow_testset
else:
if train:
self.dataset = self.shadow_trainset
else:
self.dataset = self.shadow_testset
elif 'distill' in mode:
if aug:
if train:
self.dataset = self.aug_distill_trainset
else:
self.dataset = self.aug_distill_testset
else:
if train:
self.dataset = self.distill_trainset
else:
self.dataset = self.distill_testset
self.index = range(int(len(self.dataset)))
def __getitem__(self, idx):
return self.dataset[idx][0], self.dataset[idx][1], self.index[idx]
def __len__(self):
return len(self.index)
class CIFAR10:
def __init__(self, mode, aug, batch_size=128, add_trigger=False):
self.batch_size = batch_size
self.img_size = 32
self.num_classes = 10
if aug:
if mode == 'target':
self.aug_target_trainset = SUBCIFAR10(mode, aug, True)
self.aug_target_train_loader = torch.utils.data.DataLoader(self.aug_target_trainset, batch_size=batch_size, shuffle=True, num_workers=2)
self.aug_target_testset = SUBCIFAR10(mode, aug, False)
self.aug_target_test_loader = torch.utils.data.DataLoader(self.aug_target_testset, batch_size=batch_size, shuffle=True, num_workers=2)
elif mode == 'shadow':
self.aug_shadow_trainset = SUBCIFAR10(mode, aug, True)
self.aug_shadow_train_loader = torch.utils.data.DataLoader(self.aug_shadow_trainset, batch_size=batch_size, shuffle=True, num_workers=2)
self.aug_shadow_testset = SUBCIFAR10(mode, aug, False)
self.aug_shadow_test_loader = torch.utils.data.DataLoader(self.aug_shadow_testset, batch_size=batch_size, shuffle=True, num_workers=2)
elif 'distill' in mode:
self.aug_distill_trainset = SUBCIFAR10(mode, aug, True)
self.aug_distill_train_loader = torch.utils.data.DataLoader(self.aug_distill_trainset, batch_size=batch_size, shuffle=True, num_workers=2)
self.aug_distill_testset = SUBCIFAR10(mode, aug, False)
self.aug_distill_test_loader = torch.utils.data.DataLoader(self.aug_distill_testset, batch_size=batch_size, shuffle=True, num_workers=2)
else:
if mode == 'target':
self.target_trainset = SUBCIFAR10(mode, aug, True)
self.target_train_loader = torch.utils.data.DataLoader(self.target_trainset, batch_size=batch_size, shuffle=True, num_workers=2)
self.target_testset = SUBCIFAR10(mode, aug, False)
self.target_test_loader = torch.utils.data.DataLoader(self.target_testset, batch_size=batch_size, shuffle=True, num_workers=2)
elif mode == 'shadow':
self.shadow_trainset = SUBCIFAR10(mode, aug, True)
self.shadow_train_loader = torch.utils.data.DataLoader(self.shadow_trainset, batch_size=batch_size, shuffle=True, num_workers=2)
self.shadow_testset = SUBCIFAR10(mode, aug, False)
self.shadow_test_loader = torch.utils.data.DataLoader(self.shadow_testset, batch_size=batch_size, shuffle=True, num_workers=2)
elif 'distill' in mode:
self.distill_trainset = SUBCIFAR10(mode, aug, True)
self.distill_train_loader = torch.utils.data.DataLoader(self.distill_trainset, batch_size=batch_size, shuffle=True, num_workers=2)
self.distill_testset = SUBCIFAR10(mode, aug, False)
self.distill_test_loader = torch.utils.data.DataLoader(self.distill_testset, batch_size=batch_size, shuffle=True, num_workers=2)
class SUBCIFAR100(data.Dataset):
def __init__(self, mode, aug, train):
self.img_size = 32
self.num_classes = 100
self.num_test = 10000
self.num_train = 50000
self.mean=[0.507, 0.487, 0.441]
self.std=[0.267, 0.256, 0.276]
normalize = transforms.Normalize(mean=self.mean, std=self.std)
self.augmented = transforms.Compose([transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4),transforms.ToTensor(), normalize])
self.normalized = transforms.Compose([transforms.ToTensor(), normalize])
self.aug_trainset = datasets.CIFAR100(root='/lustre/I/mazhihong/data/CIFAR100', train=True, download=True, transform=self.augmented)
self.aug_testset = datasets.CIFAR100(root='/lustre/I/mazhihong/data/CIFAR100', train=False, download=True, transform=self.augmented)
self.trainset = datasets.CIFAR100(root='/lustre/I/mazhihong/data/CIFAR100', train=True, download=True, transform=self.normalized)
self.testset = datasets.CIFAR100(root='/lustre/I/mazhihong/data/CIFAR100', train=False, download=True, transform=self.normalized)
self.aug_dataset = ConcatDataset([self.aug_trainset, self.aug_testset])
self.dataset = ConcatDataset([self.trainset, self.testset])
self.aug_target_trainset, self.aug_target_testset, self.aug_shadow_trainset, self.aug_shadow_testset, self.aug_distill_trainset = dataset_split(self.aug_dataset, [10000, 10000, 10000, 10000, 20000])
self.aug_distill_testset = self.aug_shadow_testset
self.target_trainset, self.target_testset, self.shadow_trainset, self.shadow_testset, self.distill_trainset = dataset_split(self.dataset, [10000, 10000, 10000, 10000, 20000])
self.distill_testset = self.shadow_testset
if mode == 'target':
if aug:
if train:
self.dataset = self.aug_target_trainset
else:
self.dataset = self.aug_target_testset
else:
if train:
self.dataset = self.target_trainset
else:
self.dataset = self.target_testset
elif mode == 'shadow':
if aug:
if train:
self.dataset = self.aug_shadow_trainset
else:
self.dataset = self.aug_shadow_testset
else:
if train:
self.dataset = self.shadow_trainset
else:
self.dataset = self.shadow_testset
elif 'distill' in mode:
if aug:
if train:
self.dataset = self.aug_distill_trainset
else:
self.dataset = self.aug_distill_testset
else:
if train:
self.dataset = self.distill_trainset
else:
self.dataset = self.distill_testset
self.index = range(int(len(self.dataset)))
def __getitem__(self, idx):
return self.dataset[idx][0], self.dataset[idx][1], self.index[idx]
def __len__(self):
return len(self.index)
class CIFAR100:
def __init__(self, mode, aug, batch_size=128):
self.batch_size = batch_size
self.img_size = 32
self.num_classes = 100
if aug:
if mode == 'target':
self.aug_target_trainset = SUBCIFAR100(mode, aug, True)
self.aug_target_train_loader = torch.utils.data.DataLoader(self.aug_target_trainset, batch_size=batch_size, shuffle=True, num_workers=1)
self.aug_target_testset = SUBCIFAR100(mode, aug, False)
self.aug_target_test_loader = torch.utils.data.DataLoader(self.aug_target_testset, batch_size=batch_size, shuffle=True, num_workers=1)
elif mode == 'shadow':
self.aug_shadow_trainset = SUBCIFAR100(mode, aug, True)
self.aug_shadow_train_loader = torch.utils.data.DataLoader(self.aug_shadow_trainset, batch_size=batch_size, shuffle=True, num_workers=1)
self.aug_shadow_testset = SUBCIFAR100(mode, aug, False)
self.aug_shadow_test_loader = torch.utils.data.DataLoader(self.aug_shadow_testset, batch_size=batch_size, shuffle=True, num_workers=1)
elif 'distill' in mode:
self.aug_distill_trainset = SUBCIFAR100(mode, aug, True)
self.aug_distill_train_loader = torch.utils.data.DataLoader(self.aug_distill_trainset, batch_size=batch_size, shuffle=True, num_workers=1)
self.aug_distill_testset = SUBCIFAR100(mode, aug, False)
self.aug_distill_test_loader = torch.utils.data.DataLoader(self.aug_distill_testset, batch_size=batch_size, shuffle=True, num_workers=1)
else:
if mode == 'target':
self.target_trainset = SUBCIFAR100(mode, aug, True)
self.target_train_loader = torch.utils.data.DataLoader(self.target_trainset, batch_size=batch_size, shuffle=True, num_workers=1)
self.target_testset = SUBCIFAR100(mode, aug, False)
self.target_test_loader = torch.utils.data.DataLoader(self.target_testset, batch_size=batch_size, shuffle=True, num_workers=1)
elif mode == 'shadow':
self.shadow_trainset = SUBCIFAR100(mode, aug, True)
self.shadow_train_loader = torch.utils.data.DataLoader(self.shadow_trainset, batch_size=batch_size, shuffle=True, num_workers=1)
self.shadow_testset = SUBCIFAR100(mode, aug, False)
self.shadow_test_loader = torch.utils.data.DataLoader(self.shadow_testset, batch_size=batch_size, shuffle=True, num_workers=1)
elif 'distill' in mode:
self.distill_trainset = SUBCIFAR100(mode, aug, True)
self.distill_train_loader = torch.utils.data.DataLoader(self.distill_trainset, batch_size=batch_size, shuffle=True, num_workers=1)
self.distill_testset = SUBCIFAR100(mode, aug, False)
self.distill_test_loader = torch.utils.data.DataLoader(self.distill_testset, batch_size=batch_size, shuffle=True, num_workers=1)
class AverageMeter(object):
"""Computes and stores the average and current value"""
def __init__(self):
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
self.avg = self.sum / self.count
def accuracy(output, target, topk=(1,)):
"""Computes the precision@k for the specified values of k"""
with torch.no_grad():
maxk = max(topk)
batch_size = target.size(0)
_, pred = output.topk(maxk, 1, True, True)
pred = pred.t()
correct = pred.eq(target.view(1, -1).expand_as(pred))
res = []
for k in topk:
correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
res.append(correct_k.mul_(100.0 / batch_size))
return res
\ No newline at end of file
from torch.autograd import Function
class FakeQuantize(Function):
@staticmethod
def forward(ctx, x, qparam):
x = qparam.quantize_tensor(x)
x = qparam.dequantize_tensor(x)
return x
@staticmethod
def backward(ctx, grad_output):
return grad_output, None
class GlobalVariables:
SELF_INPLANES = 0
# -*- coding: utf-8 -*-
# 用于多个module之间共享全局变量
def _init(): # 初始化
global _global_dict
_global_dict = {}
def set_value(value,is_bias=False):
# 定义一个全局变量
if is_bias:
_global_dict[0] = value
else:
_global_dict[1] = value
def get_value(is_bias=False): # 给bias独立于各变量外的精度
if is_bias:
return _global_dict[0]
else:
return _global_dict[1]
import os
import argparse
import utils
import normal
import MIA
def train_networks(args):
device = utils.get_pytorch_device()
utils.create_path('./outputs')
if 'distill' in args.mode:
model_path_tar = 'networks/{}/{}'.format(0, args.mode.split('_')[-1])
utils.create_path(model_path_tar)
model_path_dis = 'networks/{}/{}'.format(args.seed, args.mode)
utils.create_path(model_path_dis)
else:
model_path_tar = 'networks/{}/{}'.format(args.seed, args.mode)
utils.create_path(model_path_tar)
model_path_dis = None
utils.set_logger('outputs/train_models'.format(args.seed))
normal.train_models(args, model_path_tar, model_path_dis, device)
def membership_inference_attack(args):
print(f'--------------{args.mia_type}-------------')
device = utils.get_pytorch_device()
if args.mia_type == 'build-dataset':
models_path = 'networks/{}'.format(0)
MIA.build_trajectory_membership_dataset(args, models_path, device)
if args.mia_type == 'black-box':
trained_models_path = 'networks/{}'.format(args.seed)
MIA.trajectory_black_box_membership_inference_attack(args, trained_models_path, device)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='TrajectoryMIA')
parser.add_argument('--action', type=int, default=0, help=[0, 1])
parser.add_argument('--seed', type=int, default=0)
parser.add_argument('--mode', type=str, default='target', help=['target', 'shadow', 'distill_target', 'distill_shadow'])
parser.add_argument('--model', type=str, default='resnet18', help=['resnet18','resnet50','resnet152','mobilenetv2'])
parser.add_argument('--data', type=str, default='cifar10', help=['cinic10', 'cifar10', 'cifar100', 'gtsrb'])
parser.add_argument('--epochs', type=int, default=100)
parser.add_argument('--model_distill', type=str, default='resnet18',help=['resnet18','resnet50','resnet152','mobilenetv2'])
parser.add_argument('--epochs_distill', type=int, default=100)
parser.add_argument('--mia_type', type=str, help=['build-dataset', 'black-box'])
args = parser.parse_args()
utils.set_random_seeds(args.seed)
print('random seed:{}'.format(args.seed))
if args.action == 0:
train_networks(args)
elif args.action == 1:
membership_inference_attack(args)
import math
import numpy as np
import gol
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from function import FakeQuantize
# 获取最近的量化值
# def get_nearest_val(quant_type,x,is_bias=False):
# if quant_type=='INT':
# return x.round_()
# plist = gol.get_value(is_bias)
# # print('get')
# # print(plist)
# # x = x / 64
# shape = x.shape
# xhard = x.view(-1)
# plist = plist.type_as(x)
# # 取最近幂次作为索引
# idx = (xhard.unsqueeze(0) - plist.unsqueeze(1)).abs().min(dim=0)[1]
# xhard = plist[idx].view(shape)
# xout = (xhard - x).detach() + x
# # xout = xout * 64
# return xout
def get_nearest_val(quant_type, x, is_bias=False, block_size=1000000):
if quant_type == 'INT':
return x.round_()
plist = gol.get_value(is_bias)
shape = x.shape
xhard = x.view(-1)
xout = torch.zeros_like(xhard)
plist = plist.type_as(x)
n_blocks = (x.numel() + block_size - 1) // block_size
for i in range(n_blocks):
start_idx = i * block_size
end_idx = min(start_idx + block_size, xhard.numel())
block_size_i = end_idx - start_idx
# print(x.numel())
# print(block_size_i)
# print(start_idx)
# print(end_idx)
xblock = xhard[start_idx:end_idx]
# xblock = xblock.view(shape[start_idx:end_idx])
plist_block = plist.unsqueeze(1) #.expand(-1, block_size_i)
idx = (xblock.unsqueeze(0) - plist_block).abs().min(dim=0)[1]
# print(xblock.shape)
xhard_block = plist[idx].view(xblock.shape)
xout[start_idx:end_idx] = (xhard_block - xblock).detach() + xblock
xout = xout.view(shape)
return xout
# 采用对称有符号量化时,获取量化范围最大值
def get_qmax(quant_type,num_bits=None, e_bits=None):
if quant_type == 'INT':
qmax = 2. ** (num_bits - 1) - 1
elif quant_type == 'POT':
qmax = 1
else: #FLOAT
m_bits = num_bits - 1 - e_bits
dist_m = 2 ** (-m_bits)
e = 2 ** (e_bits - 1)
expo = 2 ** e
m = 2 ** m_bits -1
frac = 1. + m * dist_m
qmax = frac * expo
return qmax
# 都采用有符号量化,zeropoint都置为0
def calcScaleZeroPoint(min_val, max_val, qmax):
scale = torch.max(max_val.abs(),min_val.abs()) / qmax
zero_point = torch.tensor(0.)
return scale, zero_point
# 将输入进行量化,输入输出都为tensor
def quantize_tensor(quant_type, x, scale, zero_point, qmax, is_bias=False):
# 量化后范围,直接根据位宽确定
qmin = -qmax
q_x = zero_point + x / scale
q_x.clamp_(qmin, qmax)
q_x = get_nearest_val(quant_type, q_x, is_bias)
return q_x
# bias使用不同精度,需要根据量化类型指定num_bits/e_bits
def bias_qmax(quant_type):
if quant_type == 'INT':
return get_qmax(quant_type, 64)
elif quant_type == 'POT':
return get_qmax(quant_type)
else:
return get_qmax(quant_type, 16, 7)
# 转化为FP32,不需再做限制
def dequantize_tensor(q_x, scale, zero_point):
return scale * (q_x - zero_point)
class QParam(nn.Module):
def __init__(self,quant_type, num_bits=8, e_bits=3):
super(QParam, self).__init__()
self.quant_type = quant_type
self.num_bits = num_bits
self.e_bits = e_bits
self.qmax = get_qmax(quant_type, num_bits, e_bits)
scale = torch.tensor([], requires_grad=False)
zero_point = torch.tensor([], requires_grad=False)
min = torch.tensor([], requires_grad=False)
max = torch.tensor([], requires_grad=False)
# 通过注册为register,使得buffer可以被记录到state_dict
self.register_buffer('scale', scale)
self.register_buffer('zero_point', zero_point)
self.register_buffer('min', min)
self.register_buffer('max', max)
# 更新统计范围及量化参数
def update(self, tensor):
if self.max.nelement() == 0 or self.max.data < tensor.max().data:
self.max.data = tensor.max().data
self.max.clamp_(min=0)
if self.min.nelement() == 0 or self.min.data > tensor.min().data:
self.min.data = tensor.min().data
self.min.clamp_(max=0)
self.scale, self.zero_point = calcScaleZeroPoint(self.min, self.max, self.qmax)
def quantize_tensor(self, tensor):
return quantize_tensor(self.quant_type, tensor, self.scale, self.zero_point, self.qmax)
def dequantize_tensor(self, q_x):
return dequantize_tensor(q_x, self.scale, self.zero_point)
# 该方法保证了可以从state_dict里恢复
def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys,
error_msgs):
key_names = ['scale', 'zero_point', 'min', 'max']
for key in key_names:
value = getattr(self, key)
value.data = state_dict[prefix + key].data
state_dict.pop(prefix + key)
# 该方法返回值将是打印该对象的结果
def __str__(self):
info = 'scale: %.10f ' % self.scale
info += 'zp: %.6f ' % self.zero_point
info += 'min: %.6f ' % self.min
info += 'max: %.6f' % self.max
return info
# 作为具体量化层的父类,qi和qo分别为量化输入/输出
class QModule(nn.Module):
def __init__(self,quant_type, qi=True, qo=True, num_bits=8, e_bits=3):
super(QModule, self).__init__()
if qi:
self.qi = QParam(quant_type,num_bits, e_bits)
if qo:
self.qo = QParam(quant_type,num_bits, e_bits)
self.quant_type = quant_type
self.num_bits = num_bits
self.e_bits = e_bits
self.bias_qmax = bias_qmax(quant_type)
def freeze(self):
pass # 空语句
def fakefreeze(self):
pass
def quantize_inference(self, x):
raise NotImplementedError('quantize_inference should be implemented.')
"""
QModule 量化卷积
:quant_type: 量化类型
:conv_module: 卷积模块
:qi: 是否量化输入特征图
:qo: 是否量化输出特征图
:num_bits: 8位bit数
"""
class QConv2d(QModule):
def __init__(self, quant_type, conv_module, qi=True, qo=True, num_bits=8, e_bits=3):
super(QConv2d, self).__init__(quant_type, qi, qo, num_bits, e_bits)
self.conv_module = conv_module
self.qw = QParam(quant_type, num_bits,e_bits)
self.register_buffer('M', torch.tensor([], requires_grad=False)) # 将M注册为buffer
# freeze方法可以固定真量化的权重参数,并将该值更新到原全精度层上,便于散度计算
def freeze(self, qi=None, qo=None):
if hasattr(self, 'qi') and qi is not None:
raise ValueError('qi has been provided in init function.')
if not hasattr(self, 'qi') and qi is None:
raise ValueError('qi is not existed, should be provided.')
if hasattr(self, 'qo') and qo is not None:
raise ValueError('qo has been provided in init function.')
if not hasattr(self, 'qo') and qo is None:
raise ValueError('qo is not existed, should be provided.')
# 这里因为在池化或者激活的输入,不需要对最大值和最小是进行额外的统计,会共享相同的输出
if qi is not None:
self.qi = qi
if qo is not None:
self.qo = qo
# 根据https://zhuanlan.zhihu.com/p/156835141, 这是式3 的系数
self.M.data = (self.qw.scale * self.qi.scale / self.qo.scale).data
self.conv_module.weight.data = self.qw.quantize_tensor(self.conv_module.weight.data)
self.conv_module.weight.data = self.conv_module.weight.data - self.qw.zero_point
self.conv_module.bias.data = quantize_tensor(self.quant_type,
self.conv_module.bias.data, scale=self.qi.scale * self.qw.scale,
zero_point=0.,qmax=self.bias_qmax, is_bias=True)
def fakefreeze(self):
self.conv_module.weight.data = self.qw.dequantize_tensor(self.conv_module.weight.data)
self.conv_module.bias.data = dequantize_tensor(self.conv_module.bias.data,scale=self.qi.scale * self.qw.scale, zero_point=0.)
def forward(self, x): # 前向传播,输入张量,x为浮点型数据
if hasattr(self, 'qi'):
self.qi.update(x)
x = FakeQuantize.apply(x, self.qi) # 对输入张量X完成量化
# foward前更新qw,保证量化weight时候scale正确
self.qw.update(self.conv_module.weight.data)
# 注意:此处主要为了统计各层x和weight范围,未对bias进行量化操作
# tmp_wgt = FakeQuantize.apply(self.conv_module.weight, self.qw)
# x = F.conv2d(x, tmp_wgt, self.conv_module.bias,
# stride=self.conv_module.stride,
# padding=self.conv_module.padding, dilation=self.conv_module.dilation,
# groups=self.conv_module.groups)
x = F.conv2d(x, FakeQuantize.apply(self.conv_module.weight, self.qw), self.conv_module.bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding, dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
if hasattr(self, 'qo'):
self.qo.update(x)
x = FakeQuantize.apply(x, self.qo)
return x
# 利用公式 q_a = M(\sigma(q_w-Z_w)(q_x-Z_x) + q_b)
def quantize_inference(self, x): # 此处input为已经量化的qx
x = x - self.qi.zero_point
x = self.conv_module(x)
x = self.M * x
# if self.quant_type is 'INT':
x = get_nearest_val(self.quant_type,x)
x = x + self.qo.zero_point
return x
class QLinear(QModule):
def __init__(self, quant_type, fc_module, qi=True, qo=True, num_bits=8, e_bits=3):
super(QLinear, self).__init__(quant_type, qi, qo, num_bits, e_bits)
self.fc_module = fc_module
self.qw = QParam(quant_type, num_bits, e_bits)
self.register_buffer('M', torch.tensor([], requires_grad=False)) # 将M注册为buffer
def freeze(self, qi=None, qo=None):
if hasattr(self, 'qi') and qi is not None:
raise ValueError('qi has been provided in init function.')
if not hasattr(self, 'qi') and qi is None:
raise ValueError('qi is not existed, should be provided.')
if hasattr(self, 'qo') and qo is not None:
raise ValueError('qo has been provided in init function.')
if not hasattr(self, 'qo') and qo is None:
raise ValueError('qo is not existed, should be provided.')
if qi is not None:
self.qi = qi
if qo is not None:
self.qo = qo
self.M.data = (self.qw.scale * self.qi.scale / self.qo.scale).data
self.fc_module.weight.data = self.qw.quantize_tensor(self.fc_module.weight.data)
self.fc_module.weight.data = self.fc_module.weight.data - self.qw.zero_point
self.fc_module.bias.data = quantize_tensor(self.quant_type,
self.fc_module.bias.data, scale=self.qi.scale * self.qw.scale,
zero_point=0., qmax=self.bias_qmax, is_bias=True)
def fakefreeze(self):
self.fc_module.weight.data = self.qw.dequantize_tensor(self.fc_module.weight.data)
self.fc_module.bias.data = dequantize_tensor(self.fc_module.bias.data, scale=self.qi.scale * self.qw.scale, zero_point=0.)
def forward(self, x):
if hasattr(self, 'qi'):
self.qi.update(x)
x = FakeQuantize.apply(x, self.qi)
self.qw.update(self.fc_module.weight.data)
# tmp_wgt = FakeQuantize.apply(self.fc_module.weight, self.qw)
# x = F.linear(x, tmp_wgt, self.fc_module.bias)
x = F.linear(x, FakeQuantize.apply(self.fc_module.weight, self.qw), self.fc_module.bias)
if hasattr(self, 'qo'):
self.qo.update(x)
x = FakeQuantize.apply(x, self.qo)
return x
def quantize_inference(self, x):
x = x - self.qi.zero_point
x = self.fc_module(x)
x = self.M * x
# if self.quant_type is 'INT':
x = get_nearest_val(self.quant_type,x)
x = x + self.qo.zero_point
return x
class QReLU(QModule):
def __init__(self,quant_type, qi=False, qo=True, num_bits=8, e_bits=3):
super(QReLU, self).__init__(quant_type, qi, qo, num_bits, e_bits)
def freeze(self, qi=None):
if hasattr(self, 'qi') and qi is not None:
raise ValueError('qi has been provided in init function.')
if not hasattr(self, 'qi') and qi is None:
raise ValueError('qi is not existed, should be provided.')
if qi is not None:
self.qi = qi
def forward(self, x):
if hasattr(self, 'qi'):
self.qi.update(x)
x = FakeQuantize.apply(x, self.qi)
x = F.relu(x)
return x
def quantize_inference(self, x):
x = x.clone()
# x[x < self.qi.zero_point] = self.qi.zero_point
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
a = self.qi.zero_point.float().to(device)
x[x < a] = a
return x
class QMaxPooling2d(QModule):
def __init__(self, quant_type, kernel_size=3, stride=1, padding=0, qi=False, qo=True, num_bits=8,e_bits=3):
super(QMaxPooling2d, self).__init__(quant_type, qi, qo, num_bits, e_bits)
self.kernel_size = kernel_size
self.stride = stride
self.padding = padding
def freeze(self, qi=None):
if hasattr(self, 'qi') and qi is not None:
raise ValueError('qi has been provided in init function.')
if not hasattr(self, 'qi') and qi is None:
raise ValueError('qi is not existed, should be provided.')
if qi is not None:
self.qi = qi
def forward(self, x):
if hasattr(self, 'qi'):
self.qi.update(x)
x = FakeQuantize.apply(x, self.qi)
x = F.max_pool2d(x, self.kernel_size, self.stride, self.padding)
return x
def quantize_inference(self, x):
return F.max_pool2d(x, self.kernel_size, self.stride, self.padding)
class QConvBNReLU(QModule):
def __init__(self, quant_type, conv_module, bn_module, qi=True, qo=True, num_bits=8, e_bits=3):
super(QConvBNReLU, self).__init__(quant_type, qi, qo, num_bits, e_bits)
self.conv_module = conv_module
self.bn_module = bn_module
self.qw = QParam(quant_type, num_bits,e_bits)
self.register_buffer('M', torch.tensor([], requires_grad=False)) # 将M注册为buffer
def fold_bn(self, mean, std):
if self.bn_module.affine:
gamma_ = self.bn_module.weight / std
weight = self.conv_module.weight * gamma_.view(self.conv_module.out_channels, 1, 1, 1)
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean + self.bn_module.bias
else:
bias = self.bn_module.bias - gamma_ * mean
else:
gamma_ = 1 / std
weight = self.conv_module.weight * gamma_
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean
else:
bias = -gamma_ * mean
return weight, bias
def freeze(self, qi=None, qo=None):
if hasattr(self, 'qi') and qi is not None:
raise ValueError('qi has been provided in init function.')
if not hasattr(self, 'qi') and qi is None:
raise ValueError('qi is not existed, should be provided.')
if hasattr(self, 'qo') and qo is not None:
raise ValueError('qo has been provided in init function.')
if not hasattr(self, 'qo') and qo is None:
raise ValueError('qo is not existed, should be provided.')
if qi is not None:
self.qi = qi
if qo is not None:
self.qo = qo
self.M.data = (self.qw.scale * self.qi.scale / self.qo.scale).data
std = torch.sqrt(self.bn_module.running_var + self.bn_module.eps)
weight, bias = self.fold_bn(self.bn_module.running_mean, std)
self.conv_module.weight.data = self.qw.quantize_tensor(weight.data)
self.conv_module.weight.data = self.conv_module.weight.data - self.qw.zero_point
if self.conv_module.bias is None:
self.conv_module.bias = nn.Parameter(quantize_tensor(self.quant_type,
bias, scale=self.qi.scale * self.qw.scale,
zero_point=0., qmax=self.bias_qmax,is_bias=True))
else:
self.conv_module.bias.data = quantize_tensor(self.quant_type,
bias, scale=self.qi.scale * self.qw.scale,
zero_point=0., qmax=self.bias_qmax,is_bias=True)
def fakefreeze(self):
self.conv_module.weight.data = self.qw.dequantize_tensor(self.conv_module.weight.data)
self.conv_module.bias.data = dequantize_tensor(self.conv_module.bias.data,scale=self.qi.scale * self.qw.scale, zero_point=0.)
def forward(self, x):
if hasattr(self, 'qi'):
self.qi.update(x)
x = FakeQuantize.apply(x, self.qi)
if self.training:
y = F.conv2d(x, self.conv_module.weight, self.conv_module.bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding,
dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
y = y.permute(1, 0, 2, 3) # NCHW -> CNHW
y = y.contiguous().view(self.conv_module.out_channels, -1) # CNHW -> C,NHW
# mean = y.mean(1)
# var = y.var(1)
mean = y.mean(1).detach()
var = y.var(1).detach()
self.bn_module.running_mean = \
(1 - self.bn_module.momentum) * self.bn_module.running_mean + \
self.bn_module.momentum * mean
self.bn_module.running_var = \
(1 - self.bn_module.momentum) * self.bn_module.running_var + \
self.bn_module.momentum * var
else:
mean = Variable(self.bn_module.running_mean)
var = Variable(self.bn_module.running_var)
std = torch.sqrt(var + self.bn_module.eps)
weight, bias = self.fold_bn(mean, std)
self.qw.update(weight.data)
x = F.conv2d(x, FakeQuantize.apply(weight, self.qw), bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding, dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
x = F.relu(x)
if hasattr(self, 'qo'):
self.qo.update(x)
x = FakeQuantize.apply(x, self.qo)
return x
def quantize_inference(self, x):
x = x - self.qi.zero_point
x = self.conv_module(x)
x = self.M * x
# if self.quant_type is 'INT':
x = get_nearest_val(self.quant_type,x)
x = x + self.qo.zero_point
x.clamp_(min=0)
return x
class QConvBN(QModule):
def __init__(self, quant_type, conv_module, bn_module, qi=True, qo=True, num_bits=8, e_bits=3):
super(QConvBN, self).__init__(quant_type, qi, qo, num_bits, e_bits)
self.conv_module = conv_module
self.bn_module = bn_module
self.qw = QParam(quant_type, num_bits,e_bits)
self.register_buffer('M', torch.tensor([], requires_grad=False)) # 将M注册为buffer
def fold_bn(self, mean, std):
if self.bn_module.affine:
gamma_ = self.bn_module.weight / std
weight = self.conv_module.weight * gamma_.view(self.conv_module.out_channels, 1, 1, 1)
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean + self.bn_module.bias
else:
bias = self.bn_module.bias - gamma_ * mean
else:
gamma_ = 1 / std
weight = self.conv_module.weight * gamma_
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean
else:
bias = -gamma_ * mean
return weight, bias
def freeze(self, qi=None, qo=None):
if hasattr(self, 'qi') and qi is not None:
raise ValueError('qi has been provided in init function.')
if not hasattr(self, 'qi') and qi is None:
raise ValueError('qi is not existed, should be provided.')
if hasattr(self, 'qo') and qo is not None:
raise ValueError('qo has been provided in init function.')
if not hasattr(self, 'qo') and qo is None:
raise ValueError('qo is not existed, should be provided.')
if qi is not None:
self.qi = qi
if qo is not None:
self.qo = qo
self.M.data = (self.qw.scale * self.qi.scale / self.qo.scale).data
std = torch.sqrt(self.bn_module.running_var + self.bn_module.eps)
weight, bias = self.fold_bn(self.bn_module.running_mean, std)
self.conv_module.weight.data = self.qw.quantize_tensor(weight.data)
self.conv_module.weight.data = self.conv_module.weight.data - self.qw.zero_point
if self.conv_module.bias is None:
self.conv_module.bias = nn.Parameter(quantize_tensor(self.quant_type,
bias, scale=self.qi.scale * self.qw.scale,
zero_point=0., qmax=self.bias_qmax,is_bias=True))
else:
self.conv_module.bias.data = quantize_tensor(self.quant_type,
bias, scale=self.qi.scale * self.qw.scale,
zero_point=0., qmax=self.bias_qmax,is_bias=True)
def fakefreeze(self):
self.conv_module.weight.data = self.qw.dequantize_tensor(self.conv_module.weight.data)
self.conv_module.bias.data = dequantize_tensor(self.conv_module.bias.data,scale=self.qi.scale * self.qw.scale, zero_point=0.)
def forward(self, x):
if hasattr(self, 'qi'):
self.qi.update(x)
x = FakeQuantize.apply(x, self.qi)
if self.training:
y = F.conv2d(x, self.conv_module.weight, self.conv_module.bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding,
dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
y = y.permute(1, 0, 2, 3) # NCHW -> CNHW
y = y.contiguous().view(self.conv_module.out_channels, -1) # CNHW -> C,NHW
# mean = y.mean(1)
# var = y.var(1)
mean = y.mean(1).detach()
var = y.var(1).detach()
self.bn_module.running_mean = \
(1 - self.bn_module.momentum) * self.bn_module.running_mean + \
self.bn_module.momentum * mean
self.bn_module.running_var = \
(1 - self.bn_module.momentum) * self.bn_module.running_var + \
self.bn_module.momentum * var
else:
mean = Variable(self.bn_module.running_mean)
var = Variable(self.bn_module.running_var)
std = torch.sqrt(var + self.bn_module.eps)
weight, bias = self.fold_bn(mean, std)
self.qw.update(weight.data)
x = F.conv2d(x, FakeQuantize.apply(weight, self.qw), bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding, dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
# x = F.relu(x)
if hasattr(self, 'qo'):
self.qo.update(x)
x = FakeQuantize.apply(x, self.qo)
return x
def quantize_inference(self, x):
x = x - self.qi.zero_point
x = self.conv_module(x)
x = self.M * x
# if self.quant_type is 'INT':
x = get_nearest_val(self.quant_type,x)
x = x + self.qo.zero_point
# x.clamp_(min=0)
return x
# 待修改 需要有qo吧
class QAdaptiveAvgPool2d(QModule):
def __init__(self, quant_type, qi=False, qo=True, num_bits=8, e_bits=3):
super(QAdaptiveAvgPool2d, self).__init__(quant_type,qi,qo,num_bits,e_bits)
self.register_buffer('M', torch.tensor([], requires_grad=False)) # 将M注册为buffer
def freeze(self, qi=None, qo=None):
if hasattr(self, 'qi') and qi is not None:
raise ValueError('qi has been provided in init function.')
if not hasattr(self, 'qi') and qi is None:
raise ValueError('qi is not existed, should be provided.')
if qi is not None:
self.qi = qi
if hasattr(self, 'qo') and qo is not None:
raise ValueError('qo has been provided in init function.')
if not hasattr(self, 'qo') and qo is None:
raise ValueError('qo is not existed, should be provided.')
if qo is not None:
self.qo = qo
self.M.data = (self.qi.scale / self.qo.scale).data
def forward(self, x):
if hasattr(self, 'qi'):
self.qi.update(x)
x = FakeQuantize.apply(x, self.qi) # 与ReLu一样,先更新qi的scale,再将x用PoT表示了 (不过一般前一层的qo都是True,则x已经被PoT表示了)
x = F.adaptive_avg_pool2d(x,(1, 1)) # 对输入输出都量化一下就算是量化了
if hasattr(self, 'qo'):
self.qo.update(x)
x = FakeQuantize.apply(x, self.qo)
return x
def quantize_inference(self, x):
x = F.adaptive_avg_pool2d(x,(1, 1)) # 对输入输出都量化一下就算是量化了
x = self.M * x
# if self.quant_type is 'INT':
x = get_nearest_val(self.quant_type,x)
return x
class QConvBNReLU6(QModule):
def __init__(self, quant_type, conv_module, bn_module, qi=True, qo=True, num_bits=8, e_bits=3):
super(QConvBNReLU6, self).__init__(quant_type, qi, qo, num_bits, e_bits)
self.conv_module = conv_module
self.bn_module = bn_module
self.qw = QParam(quant_type, num_bits,e_bits)
self.register_buffer('M', torch.tensor([], requires_grad=False)) # 将M注册为buffer
def fold_bn(self, mean, std):
if self.bn_module.affine:
gamma_ = self.bn_module.weight / std
weight = self.conv_module.weight * gamma_.view(self.conv_module.out_channels, 1, 1, 1)
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean + self.bn_module.bias
else:
bias = self.bn_module.bias - gamma_ * mean
else:
gamma_ = 1 / std
weight = self.conv_module.weight * gamma_
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean
else:
bias = -gamma_ * mean
return weight, bias
def freeze(self, qi=None, qo=None):
if hasattr(self, 'qi') and qi is not None:
raise ValueError('qi has been provided in init function.')
if not hasattr(self, 'qi') and qi is None:
raise ValueError('qi is not existed, should be provided.')
if hasattr(self, 'qo') and qo is not None:
raise ValueError('qo has been provided in init function.')
if not hasattr(self, 'qo') and qo is None:
raise ValueError('qo is not existed, should be provided.')
if qi is not None:
self.qi = qi
if qo is not None:
self.qo = qo
self.M.data = (self.qw.scale * self.qi.scale / self.qo.scale).data
std = torch.sqrt(self.bn_module.running_var + self.bn_module.eps)
weight, bias = self.fold_bn(self.bn_module.running_mean, std)
self.conv_module.weight.data = self.qw.quantize_tensor(weight.data)
self.conv_module.weight.data = self.conv_module.weight.data - self.qw.zero_point
self.conv_module.bias.data = quantize_tensor(self.quant_type,
bias, scale=self.qi.scale * self.qw.scale,
zero_point=0., qmax=self.bias_qmax,is_bias=True)
def fakefreeze(self):
self.conv_module.weight.data = self.qw.dequantize_tensor(self.conv_module.weight.data)
self.conv_module.bias.data = dequantize_tensor(self.conv_module.bias.data,scale=self.qi.scale * self.qw.scale, zero_point=0.)
def forward(self, x):
if hasattr(self, 'qi'):
self.qi.update(x)
x = FakeQuantize.apply(x, self.qi)
if self.training:
y = F.conv2d(x, self.conv_module.weight, self.conv_module.bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding,
dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
y = y.permute(1, 0, 2, 3) # NCHW -> CNHW
y = y.contiguous().view(self.conv_module.out_channels, -1) # CNHW -> C,NHW
# mean = y.mean(1)
# var = y.var(1)
mean = y.mean(1).detach()
var = y.var(1).detach()
self.bn_module.running_mean = \
(1 - self.bn_module.momentum) * self.bn_module.running_mean + \
self.bn_module.momentum * mean
self.bn_module.running_var = \
(1 - self.bn_module.momentum) * self.bn_module.running_var + \
self.bn_module.momentum * var
else:
mean = Variable(self.bn_module.running_mean)
var = Variable(self.bn_module.running_var)
std = torch.sqrt(var + self.bn_module.eps)
weight, bias = self.fold_bn(mean, std)
self.qw.update(weight.data)
x = F.conv2d(x, FakeQuantize.apply(weight, self.qw), bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding, dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
x = F.relu6(x)
if hasattr(self, 'qo'):
self.qo.update(x)
x = FakeQuantize.apply(x, self.qo)
return x
def quantize_inference(self, x):
a = torch.tensor(6)
a = self.qo.quantize_tensor(a)
x = x - self.qi.zero_point
x = self.conv_module(x)
x = self.M * x
# if self.quant_type is not 'POT':
# x = get_nearest_val(self.quant_type,x)
x = get_nearest_val(self.quant_type,x)
x = x + self.qo.zero_point # 属于qo范围的数据
x.clamp_(min=0, max=a.item())
return x
class QModule_2(nn.Module):
def __init__(self,quant_type, qi0=True, qi1=True, qo=True, num_bits=8, e_bits=3):
super(QModule_2, self).__init__()
if qi0:
self.qi0 = QParam(quant_type,num_bits, e_bits) # qi在此处就已经被num_bits和mode赋值了
if qi1:
self.qi1 = QParam(quant_type,num_bits, e_bits) # qi在此处就已经被num_bits和mode赋值了
if qo:
self.qo = QParam(quant_type,num_bits, e_bits) # qo在此处就已经被num_bits和mode赋值了
self.quant_type = quant_type
self.num_bits = num_bits
self.e_bits = e_bits
self.bias_qmax = bias_qmax(quant_type)
def freeze(self):
pass
def fakefreeze(self):
pass
def quantize_inference(self, x):
raise NotImplementedError('quantize_inference should be implemented.')
class QElementwiseAdd(QModule_2):
def __init__(self, quant_type, qi0=True, qi1=True, qo=True, num_bits=8, e_bits=3):
super(QElementwiseAdd, self).__init__(quant_type, qi0, qi1, qo, num_bits, e_bits)
self.register_buffer('M0', torch.tensor([], requires_grad=False)) # 将M注册为buffer
self.register_buffer('M1', torch.tensor([], requires_grad=False)) # 将M注册为buffer
def freeze(self, qi0=None, qi1=None ,qo=None):
if hasattr(self, 'qi') and qi0 is not None:
raise ValueError('qi0 has been provided in init function.')
if not hasattr(self, 'qi') and qi0 is None:
raise ValueError('qi0 is not existed, should be provided.')
if hasattr(self, 'qi1') and qi0 is not None:
raise ValueError('qi1 has been provided in init function.')
if not hasattr(self, 'qi1') and qi0 is None:
raise ValueError('qi1 is not existed, should be provided.')
if hasattr(self, 'qo') and qo is not None:
raise ValueError('qo has been provided in init function.')
if not hasattr(self, 'qo') and qo is None:
raise ValueError('qo is not existed, should be provided.')
# 这里因为在池化或者激活的输入,不需要对最大值和最小是进行额外的统计,会共享相同的输出
if qi0 is not None:
self.qi0 = qi0
if qi1 is not None:
self.qi1 = qi1
if qo is not None:
self.qo = qo
# 根据https://zhuanlan.zhihu.com/p/156835141, 这是式3 的系数
self.M0.data = self.qi0.scale / self.qo.scale
self.M1.data = self.qi1.scale / self.qi0.scale
# self.M0.data = self.qi0.scale / self.qo.scale
# self.M1.data = self.qi1.scale / self.qo.scale
def forward(self, x0, x1): # 前向传播,输入张量,x为浮点型数据
if hasattr(self, 'qi0'):
self.qi0.update(x0)
x0 = FakeQuantize.apply(x0, self.qi0) # 对输入张量X完成量化
if hasattr(self, 'qi1'):
self.qi1.update(x1)
x1 = FakeQuantize.apply(x1, self.qi1) # 对输入张量X完成量化
x = x0 + x1
if hasattr(self, 'qo'):
self.qo.update(x)
x = FakeQuantize.apply(x, self.qo)
return x
def quantize_inference(self, x0, x1): # 此处input为已经量化的qx
x0 = x0 - self.qi0.zero_point
x1 = x1 - self.qi1.zero_point
x = self.M0 * (x0 + x1*self.M1)
# if self.quant_type is 'INT':
x = get_nearest_val(self.quant_type,x)
x = x + self.qo.zero_point
return x
# new modules for full-precision model - fold bn
# inference应该也需要相应的适配
class ConvBNReLU(nn.Module):
def __init__(self,conv_module, bn_module):
super(ConvBNReLU, self).__init__()
self.conv_module = conv_module
self.bn_module = bn_module
def fold_bn(self, mean, std):
if self.bn_module.affine:
gamma_ = self.bn_module.weight / std
weight = self.conv_module.weight * gamma_.view(self.conv_module.out_channels, 1, 1, 1)
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean + self.bn_module.bias
else:
bias = self.bn_module.bias - gamma_ * mean
else:
gamma_ = 1 / std
weight = self.conv_module.weight * gamma_
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean
else:
bias = -gamma_ * mean
return weight, bias
def freeze(self):
std = torch.sqrt(self.bn_module.running_var + self.bn_module.eps)
weight, bias = self.fold_bn(self.bn_module.running_mean, std)
self.conv_module.weight.data = weight.data
if self.conv_module.bias is None:
self.conv_module.bias = nn.Parameter(bias)
else:
self.conv_module.bias.data = bias
def fakefreeze(self):
pass
def forward(self, x):
if self.training:
y = F.conv2d(x, self.conv_module.weight, self.conv_module.bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding,
dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
y = y.permute(1, 0, 2, 3) # NCHW -> CNHW
y = y.contiguous().view(self.conv_module.out_channels, -1) # CNHW -> C,NHW
# mean = y.mean(1)
# var = y.var(1)
mean = y.mean(1).detach()
var = y.var(1).detach()
self.bn_module.running_mean = \
(1 - self.bn_module.momentum) * self.bn_module.running_mean + \
self.bn_module.momentum * mean
self.bn_module.running_var = \
(1 - self.bn_module.momentum) * self.bn_module.running_var + \
self.bn_module.momentum * var
else:
mean = Variable(self.bn_module.running_mean)
var = Variable(self.bn_module.running_var)
std = torch.sqrt(var + self.bn_module.eps)
weight, bias = self.fold_bn(mean, std)
x = F.conv2d(x, weight, bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding, dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
x = F.relu(x)
return x
def quantize_inference(self, x):
x = self.conv_module(x)
x.clamp_(min=0)
return x
class ConvBN(nn.Module):
def __init__(self,conv_module, bn_module):
super(ConvBN, self).__init__()
self.conv_module = conv_module
self.bn_module = bn_module
def fold_bn(self, mean, std):
if self.bn_module.affine:
gamma_ = self.bn_module.weight / std
weight = self.conv_module.weight * gamma_.view(self.conv_module.out_channels, 1, 1, 1)
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean + self.bn_module.bias
else:
bias = self.bn_module.bias - gamma_ * mean
else:
gamma_ = 1 / std
weight = self.conv_module.weight * gamma_
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean
else:
bias = -gamma_ * mean
return weight, bias
def freeze(self):
std = torch.sqrt(self.bn_module.running_var + self.bn_module.eps)
weight, bias = self.fold_bn(self.bn_module.running_mean, std)
self.conv_module.weight.data = weight.data
if self.conv_module.bias is None:
self.conv_module.bias = nn.Parameter(bias)
else:
self.conv_module.bias.data = bias
def fakefreeze(self):
pass
def forward(self, x):
if self.training:
y = F.conv2d(x, self.conv_module.weight, self.conv_module.bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding,
dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
y = y.permute(1, 0, 2, 3) # NCHW -> CNHW
y = y.contiguous().view(self.conv_module.out_channels, -1) # CNHW -> C,NHW
# mean = y.mean(1)
# var = y.var(1)
mean = y.mean(1).detach()
var = y.var(1).detach()
self.bn_module.running_mean = \
(1 - self.bn_module.momentum) * self.bn_module.running_mean + \
self.bn_module.momentum * mean
self.bn_module.running_var = \
(1 - self.bn_module.momentum) * self.bn_module.running_var + \
self.bn_module.momentum * var
else:
mean = Variable(self.bn_module.running_mean)
var = Variable(self.bn_module.running_var)
std = torch.sqrt(var + self.bn_module.eps)
weight, bias = self.fold_bn(mean, std)
x = F.conv2d(x, weight, bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding, dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
return x
def quantize_inference(self, x):
x = self.conv_module(x)
return x
class ConvBNReLU6(nn.Module):
def __init__(self,conv_module, bn_module):
super(ConvBNReLU6, self).__init__()
self.conv_module = conv_module
self.bn_module = bn_module
def fold_bn(self, mean, std):
if self.bn_module.affine:
gamma_ = self.bn_module.weight / std
weight = self.conv_module.weight * gamma_.view(self.conv_module.out_channels, 1, 1, 1)
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean + self.bn_module.bias
else:
bias = self.bn_module.bias - gamma_ * mean
else:
gamma_ = 1 / std
weight = self.conv_module.weight * gamma_
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean
else:
bias = -gamma_ * mean
return weight, bias
def freeze(self):
std = torch.sqrt(self.bn_module.running_var + self.bn_module.eps)
weight, bias = self.fold_bn(self.bn_module.running_mean, std)
self.conv_module.weight.data = weight.data
if self.conv_module.bias is None:
self.conv_module.bias = nn.Parameter(bias)
else:
self.conv_module.bias.data = bias
def fakefreeze(self):
pass
def forward(self, x):
if self.training:
y = F.conv2d(x, self.conv_module.weight, self.conv_module.bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding,
dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
y = y.permute(1, 0, 2, 3) # NCHW -> CNHW
y = y.contiguous().view(self.conv_module.out_channels, -1) # CNHW -> C,NHW
# mean = y.mean(1)
# var = y.var(1)
mean = y.mean(1).detach()
var = y.var(1).detach()
self.bn_module.running_mean = \
(1 - self.bn_module.momentum) * self.bn_module.running_mean + \
self.bn_module.momentum * mean
self.bn_module.running_var = \
(1 - self.bn_module.momentum) * self.bn_module.running_var + \
self.bn_module.momentum * var
else:
mean = Variable(self.bn_module.running_mean)
var = Variable(self.bn_module.running_var)
std = torch.sqrt(var + self.bn_module.eps)
weight, bias = self.fold_bn(mean, std)
x = F.conv2d(x, weight, bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding, dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
x = F.relu6(x)
return x
def quantize_inference(self, x):
x = self.conv_module(x)
x.clamp_(min=0,max=6)
return x
import os
import torch
import time
import random
import numpy as np
import pickle
import utils
from architectures import *
# 对 是否distill,有不同的train方法
# 之前已经创建了model并把params config一起存储到了相应路径,此处先把model和params config load出来再trian
def train(args, model_path_tar, untrained_model_tar, model_path_dis = None, untrained_model_dis = None, device='cpu'):
print('Training models...')
# 蒸馏训练
if 'distill' in args.mode:
# load进来还没train的target model (create的时候就会save untrained)
trained_model, model_params = load_model(args, model_path_dis, untrained_model_dis, epoch=0)
# load进来已经训好的target model
trained_model_tar, model_params_tar = load_model(args, model_path_tar, untrained_model_tar, epoch=args.epochs)
# 正常训练
else:
# load进来还没train的target model (create的时候就会save untrained)
trained_model, model_params = load_model(args, model_path_tar, untrained_model_tar, epoch=0)
print(model_params)
# 获得以划分好的且做了数据增强dataset
dataset = utils.get_dataset(model_params['task'], args.mode, aug=True)
# 做一系列超参数设置
# TODO 调整学习策略 看看改进后的效果
learning_rate = model_params['learning_rate']
momentum = model_params['momentum']
weight_decay = model_params['weight_decay']
num_epochs = model_params['epochs']
model_params['optimizer'] = 'SGD'
optimization_params = (learning_rate, weight_decay, momentum)
optimizer, scheduler = utils.get_full_optimizer(trained_model, optimization_params, args)
if 'distill' in args.mode:
trained_model_name = untrained_model_dis
else:
trained_model_name = untrained_model_tar
print('Training: {}...'.format(trained_model_name))
trained_model.to(device)
# 具体训练
# metric中可以不断append,记录每一步的具体数据
if 'distill' in args.mode:
metrics = trained_model.train_func(args, trained_model_tar, trained_model, dataset, num_epochs, optimizer, scheduler, model_params, model_path_dis, trained_model_name, device=device)
else:
metrics = trained_model.train_func(args, trained_model, dataset, num_epochs, optimizer, scheduler, model_params, model_path_tar, trained_model_name, device=device)
# 记录结果
model_params['train_top1_acc'] = metrics['train_top1_acc']
model_params['test_top1_acc'] = metrics['test_top1_acc']
model_params['train_top5_acc'] = metrics['train_top5_acc']
model_params['test_top5_acc'] = metrics['test_top5_acc']
model_params['epoch_times'] = metrics['epoch_times']
model_params['lrs'] = metrics['lrs']
total_training_time = sum(model_params['epoch_times'])
model_params['total_time'] = total_training_time
print('Training took {} seconds...'.format(total_training_time))
# 存储训练后的模型权值参数和model_params
if 'distill' in args.mode:
save_model(trained_model, model_params, model_path_dis, trained_model_name, epoch=num_epochs)
else:
save_model(trained_model, model_params, model_path_tar, trained_model_name, epoch=num_epochs)
# 配置模型信息,创建并训练模型
def train_models(args, model_path_tar, model_path_dis, device='cpu'):
# if args.model == 'vgg':
# cnn_tar = create_vgg16bn(model_path_tar, args)
# elif args.model == 'mobilenet':
# cnn_tar = create_mobile(model_path_tar, args)
# 返回model name 实例化model,存储untrained model, model_params
if args.model == 'resnet18':
cnn_tar = create_resnet18(model_path_tar, args)
elif args.model == 'resnet50':
cnn_tar = create_resnet50(model_path_tar, args)
elif args.model == 'resnet152':
cnn_tar = create_resnet152(model_path_tar, args)
elif args.model == 'mobilenetv2':
cnn_tar = create_mobilenetv2(model_path_tar, args)
# elif args.model == 'resnet':
# cnn_tar = create_resnet56(model_path_tar, args)
# elif args.model == 'wideresnet':
# cnn_tar = create_wideresnet32_4(model_path_tar, args)
if 'distill' in args.mode:
# if args.model == 'vgg':
# cnn_dis = create_vgg16bn(model_path_dis, args)
# elif args.model == 'mobilenet':
# cnn_dis = create_mobile(model_path_dis, args)
# elif args.model == 'resnet':
# cnn_dis = create_resnet56(model_path_dis, args)
# elif args.model == 'wideresnet':
# cnn_dis = create_wideresnet32_4(model_path_dis, args)
if args.model == 'resnet18':
cnn_dis = create_resnet18(model_path_dis, args)
elif args.model == 'resnet50':
cnn_dis = create_resnet50(model_path_dis, args)
elif args.model == 'resnet152':
cnn_dis = create_resnet152(model_path_dis, args)
elif args.model == 'mobilenetv2':
cnn_dis = create_mobilenetv2(model_path_dis, args)
# load untrained model和model_params,开始训练
train(args, model_path_tar, cnn_tar, model_path_dis, cnn_dis, device = device)
else:
train(args, model_path_tar, cnn_tar, device=device)
# 恢复模型的权值参数
def load_model(args, model_path, model_name, epoch=0):
model_params = load_params(model_path, model_name, epoch)
architecture = 'empty' if 'architecture' not in model_params else model_params['architecture']
network_type = model_params['network_type']
# if 'vgg' in network_type:
# model = VGG(args, model_params)
# elif 'mobilenet' in network_type:
# model = MobileNet(args, model_params)
# elif 'resnet56' in network_type:
# model = ResNet(args, model_params)
# elif 'wideresnet' in network_type:
# model = WideResNet(args,model_params)
if 'resnet18' in network_type:
model = resnet18(args, model_params)
elif 'resnet50' in network_type:
model = resnet50(args, model_params)
elif 'resnet152' in network_type:
model = resnet152(args, model_params)
elif 'mobilenetv2' in network_type:
model = MobileNetV2(args, model_params)
# model_name是用于区分路径的关键
network_path = model_path + '/' + model_name
if epoch == 0:
load_path = network_path + '/untrained'
elif epoch == -1:
load_path = network_path + '/last'
else:
load_path = network_path + '/' + str(epoch)
if torch.cuda.is_available():
model.load_state_dict(torch.load(load_path), strict=False)
else:
model.load_state_dict(torch.load(load_path, map_location=torch.device('cpu')), strict=False)
return model, model_params
# 将model_params这个记录字典恢复
def load_params(models_path, model_name, epoch=0):
params_path = models_path + '/' + model_name
if epoch == 0:
params_path = params_path + '/parameters_untrained'
elif epoch == -1:
params_path = params_path + '/parameters_last'
else:
params_path = params_path + f'/parameters_{epoch}'
with open(params_path, 'rb') as f:
model_params = pickle.load(f)
return model_params
def create_vgg16bn(model_path, args):
print('Creating VGG16BN untrained {} models...'.format(args.data))
model_params = get_data_params(args.data)
model_params['fc_layers'] = [512, 512]
model_params['conv_channels'] = [64, 64, 128, 128, 256, 256, 256, 512, 512, 512, 512, 512, 512]
model_name = '{}_vgg16bn'.format(args.data)
model_params['network_type'] = 'vgg16'
model_params['max_pool_sizes'] = [1, 2, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2]
model_params['conv_batch_norm'] = True
model_params['init_weights'] = True
model_params['augment_training'] = True
get_lr_params(model_params, args)
model_name = save_networks(args, model_name, model_params, model_path)
return model_name
# def create_mobile(model_path, args):
# print('Creating MobileNet untrained {} models...'.format(args.data))
# model_params = get_data_params(args.data)
# model_name = '{}_mobilenet'.format(args.data)
# model_params['network_type'] = 'mobilenet'
# model_params['cfg'] = [64, (128,2), 128, (256,2), 256, (512,2), 512, 512, 512, 512, 512, (1024,2), 1024]
# model_params['augment_training'] = True
# model_params['init_weights'] = True
# get_lr_params(model_params, args)
# model_name = save_networks(args, model_name, model_params, model_path)
# return model_name
def create_resnet56(models_path, args):
print('Creating resnet56 untrained {} models...'.format(args.data))
model_params = get_data_params(args.data)
#
model_params['block_type'] = 'basic'
#
model_params['num_blocks'] = [9,9,9]
model_name = '{}_resnet56'.format(args.data)
model_params['network_type'] = 'resnet56'
model_params['augment_training'] = True
model_params['init_weights'] = True
get_lr_params(model_params, args)
model_name = save_networks(args, model_name, model_params, models_path)
return model_name
# 设置model信息,实例化model,存储untrained model
# model_name会体现在路径中标识
def create_resnet18(models_path, args):
print('Creating resnet18 untrained {} models...'.format(args.data))
model_params = get_data_params(args.data)
#
model_params['block_type'] = 'basic'
#
model_params['num_blocks'] = [2, 2, 2, 2]
model_name = '{}_resnet18'.format(args.data)
# 用于区分load什么model
model_params['network_type'] = 'resnet18'
model_params['augment_training'] = True
model_params['init_weights'] = True
get_lr_params(model_params, args)
model_name = save_networks(args, model_name, model_params, models_path)
return model_name
def create_resnet50(models_path, args):
print('Creating resnet50 untrained {} models...'.format(args.data))
model_params = get_data_params(args.data)
#
model_params['block_type'] = 'bottle'
#
model_params['num_blocks'] = [3, 4, 6, 3]
model_name = '{}_resnet50'.format(args.data)
model_params['network_type'] = 'resnet50'
model_params['augment_training'] = True
model_params['init_weights'] = True
get_lr_params(model_params, args)
model_name = save_networks(args, model_name, model_params, models_path)
return model_name
def create_resnet152(models_path, args):
print('Creating resnet152 untrained {} models...'.format(args.data))
model_params = get_data_params(args.data)
#
model_params['block_type'] = 'bottle'
#
model_params['num_blocks'] = [3, 8, 36, 3]
model_name = '{}_resnet152'.format(args.data)
model_params['network_type'] = 'resnet152'
model_params['augment_training'] = True
model_params['init_weights'] = True
get_lr_params(model_params, args)
model_name = save_networks(args, model_name, model_params, models_path)
return model_name
def create_mobilenetv2(model_path, args):
print('Creating MobileNetV2 untrained {} models...'.format(args.data))
model_params = get_data_params(args.data)
model_name = '{}_mobilenetv2'.format(args.data)
model_params['network_type'] = 'mobilenetv2'
model_params['augment_training'] = True
model_params['init_weights'] = True
get_lr_params(model_params, args)
model_name = save_networks(args, model_name, model_params, model_path)
return model_name
def create_wideresnet32_4(models_path, args):
print('Creating wideresnet32_4 untrained {} models...'.format(args.data))
model_params = get_data_params(args.data)
model_params['block_type'] = 'bottle'
model_params['num_blocks'] = [5,5,5]
model_params['widen_factor'] = 4
model_params['dropout_rate'] = 0.3
model_name = '{}_wideresnet'.format(args.data)
model_params['network_type'] = 'wideresnet'
model_params['augment_training'] = True
model_params['init_weights'] = True
get_lr_params(model_params, args)
model_name = save_networks(args, model_name, model_params, models_path)
return model_name
# 实例化model,并调用save_model存储
def save_networks(args, model_name, model_params, model_path):
print('Saving CNN...')
model_params['base_model'] = model_name
network_type = model_params['network_type']
# if 'vgg' in network_type:
# model = VGG(args, model_params)
# elif 'mobilenet' in network_type:
# model = MobileNet(args, model_params)
# elif 'resnet56' in network_type:
# model = ResNet(args, model_params)
# elif 'wideresnet' in network_type:
# model = WideResNet(args, model_params)
if 'resnet18' in network_type:
model = resnet18(args, model_params)
elif 'resnet50' in network_type:
model = resnet50(args, model_params)
elif 'resnet152' in network_type:
model = resnet152(args, model_params)
elif 'mobilenetv2' in network_type:
model = MobileNetV2(args, model_params)
# 存储model权值参数
save_model(model, model_params, model_path, model_name, epoch=0)
return model_name
# 按epoch去存储model的权值参数和model_params信息
def save_model(model, model_params, model_path, model_name, epoch=-1):
if not os.path.exists(model_path):
os.makedirs(model_path)
network_path = model_path + '/' + model_name
if not os.path.exists(network_path):
os.makedirs(network_path)
if epoch == 0:
path = network_path + '/untrained'
params_path = network_path + '/parameters_untrained'
torch.save(model.state_dict(), path)
elif epoch == -1:
path = network_path + '/last'
params_path = network_path + '/parameters_last'
torch.save(model.state_dict(), path)
else:
path = network_path + '/' + str(epoch)
params_path = network_path + '/parameters_'+str(epoch)
torch.save(model.state_dict(), path)
if model_params is not None:
with open(params_path, 'wb') as f:
pickle.dump(model_params, f, pickle.HIGHEST_PROTOCOL)
# 配置dataset,返回配置信息 (input_size, num_classes等)
# 在params['task']中记录用什么数据集
def get_data_params(data):
if data == 'cinic10':
return cinic10_params()
elif data == 'gtsrb':
return gtsrb_params()
elif data == 'cifar10':
return cifar10_params()
elif data == 'cifar100':
return cifar100_params()
def gtsrb_params():
model_params = {}
model_params['task'] = 'gtsrb'
model_params['input_size'] = 32
model_params['num_classes'] = 43
return model_params
def cinic10_params():
model_params = {}
model_params['task'] = 'cinic10'
model_params['input_size'] = 32
model_params['num_classes'] = 10
return model_params
def cifar10_params():
model_params = {}
model_params['task'] = 'cifar10'
model_params['input_size'] = 32
model_params['num_classes'] = 10
return model_params
def cifar100_params():
model_params = {}
model_params['task'] = 'cifar100'
model_params['input_size'] = 32
model_params['num_classes'] = 100
return model_params
# 设置optimizer和lr_scheduler的超参数 并记录到model_params中
def get_lr_params(model_params, args):
model_params['momentum'] = 0.9
network_type = model_params['network_type']
if 'vgg' in network_type or 'wideresnet' in network_type:
model_params['weight_decay'] = 0.0005
else:
model_params['weight_decay'] = 0.0001
model_params['learning_rate'] = 0.1
model_params['epochs'] = args.epochs
model_params['scheduler'] = f'CosineAnnealingLR_{args.epochs}'
import numpy as np
import matplotlib.pyplot as plt
from sklearn import metrics
import argparse
if __name__ == '__main__':
# 在 test_mia_attack_model存储的最优的数据
parser = argparse.ArgumentParser(description='PLOT_TrajectoryMIA')
parser.add_argument('--model', type=str, default='resnet18', help=['resnet18','resnet50','resnet152','mobilenetv2'])
parser.add_argument('--data', type=str, default='cifar10', help=['cinic10', 'cifar10', 'cifar100', 'gtsrb'])
parser.add_argument('--model_distill', type=str, default='resnet18',help=['resnet18','resnet50','resnet152','mobilenetv2'])
args = parser.parse_args()
data_auc = np.load(f'./outputs/{args.data}_{args.model}_{args.model_distill}_trajectory_auc.npy', allow_pickle=True).item()
for i in range(len(data_auc['fpr'])):
if data_auc['fpr'][i] > 0.001:
print('TPR at 0.1% FPR: {:.1%}'.format(data_auc['tpr'][i-1]))
break
plt.plot(data_auc['fpr'], data_auc['tpr'], color='darkorange', lw=2, label='ROC curve')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic example')
plt.legend(loc="lower right")
# plt.show()
plt.savefig(f'./img/{args.data}_{args.model}_{args.model_distill}_roc_curve.png')
\ No newline at end of file
## Loss Trajectory MIA
#### Update 2023.5.28
1\. 思路
思路来源于CCS 2022的文章 ``Membership Inference Attacks by Exploiting Loss Trajectory`` https://arxiv.org/abs/2208.14933
文章中首先分析了常用的MIA方法的攻击效果不够理想的原因,同时给出了名为TrajectoryMIA的新方法。
传统的MIA方法只利用了模型的输出信息(output vector或loss),由于模型一定存在一定程度的过拟合,因此模型对训练集/非训练集数据的输入而产生的output或loss会有比较明显的差异(比如训练集数据的loss小,非训练集数据的loss大)。
但传统方法无法区分那些虽然不是训练集的数据,但模型loss仍然很小的输入。文章作者发现了在模型的训练过程中,对于这类小loss数据,在训练集或不在训练集会在训练过程的loss收敛速度和进程方面有差异。如图所示,他们有着不同的Loss轨迹(Loss Trajectory)
![p1](fig\p1.png)
因为小loss数据如果不在训练集中,一般是比较简单的图片,其训练loss会下降的很快,因此其loss轨迹在训练中期是低于在训练集中的数据的。通过捕获Loss Trajectory的区别,可以实现效果更好的MIA.
具体应该如何获得Loss Trajectory呢?考虑到我们对于Target Model,只能获得其output,无法获得其训练过程中的任何信息,更无法提取Loss Trajectory,于是我们可以采用知识蒸馏,通过保存distill_target_model在各个epoch的权值参数,最后按epoch分别load这些参数并计算与label的loss即可获得Loss Trajectory,进而得到Attack Model的测试集数据。
我们仍然通过Shadow Model的方式来构造Attack Model的训练集数据,Target Model与Shadow Model我暂时使用了相同的结构,Shadow Model的数据集的构造也采用了与Naive MIA相似的方式,都是切分了数据集。为了与distill Target Model的Loss Trajectory对齐,我们在训练完Shadow Model后,通过distill Shadow Model的方式获得其Loss Trajectory,然后再训练Attack Model.
2\. 代码文件说明
architectures.py 各种模型
dataset.py 构建Target Model和Shadow Model用到的各种数据集
normal.py 各种config参数配置,构建、训练、存储、加载模型
utils.py 配置optimizer,lr_scheduler,以及具体的训练和测试
plot.py 画Attack Model的AUC曲线图
3\. 结果
- 分别在CIFAR10,100, CINIC10上对Resnet18,50,152,MobileNetV2进行了TrajectoryMIA,均取得了比较显著的攻击效果。
- 具体数据:
注:
在试验阶段,训练Target Model, Shadow Model, Distill Target Model, Distill Shadow Model的时候没有使用early stopping,期望获得过拟合程度更高的模型来使得Attack Model的攻击效果更显著,验证了训练流程是否正常,后续会使用early stopping并尽量提高各个model的测试精度。
CIFAR10训练的各个模型的acc均低于过去的训练值,主要原因在于为了Shadow Model和Distill Model切分了数据集,训练数据变少了,且调整了一些超参数,还未进行更加细致的调整。也可能与代码目前对test dataset也进行了数据增强,提高了测试难度有关。
CIFAR100训练的模型的acc较低(ResNet系列仅有30%左右,MobileNetV2也只有40%左右),并且train acc和test acc的差异也较大,出现了比较明显的过拟合,可能原因是给CIFAR100数据集拆分后,训练数据很不充分,而CIFAR100又是较难的任务。
CINIC10数据集有270000张图片,比CIFAR10和CIFAR100都大,但图片任务部分是对Imagenet下采样得到的,比CIFAR10难,训练结果显示的top1 acc低于CIFAR10,但高于CIFAR100。与Naive MIA中的Target Model和Shadow Model数据对比,能够看出来TrajectoryMIA中的Target Model和Shadow Model都没有full trained,之后需要再对各种超参数进行调整。
以下数据将分别展示各个模型、数据集组合的Target Model,Shadow Model,Distill Target Model,Distill Shadow Model,Attack Model的训练、测试结果。
* CIFAR10 + ResNet18
Target Model:
```
load aug_target_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 78.6500015258789
Top5 Test accuracy: 98.57999420166016
Top1 Train accuracy: 99.18999481201172
Top5 Train accuracy: 100.0
```
Shadow Model
```
load aug_shadow_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 78.79999542236328
Top5 Test accuracy: 98.48999786376953
Top1 Train accuracy: 99.22000122070312
Top5 Train accuracy: 100.0
```
Distill Target Model
```
load aug_distill_target_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 79.02999877929688
Top5 Test accuracy: 98.97999572753906
Top1 Train accuracy: 80.11000061035156
Top5 Train accuracy: 98.66999816894531
```
Distill Shadow Model
```
load aug_distill_shadow_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 79.3499984741211
Top5 Test accuracy: 98.79999542236328
Top1 Train accuracy: 80.02999877929688
Top5 Train accuracy: 98.69999694824219
```
Attack Model:
```
epoch:90 train_loss:0.0045 test_loss:0.0050 train_prec1:0.7016 test_prec1:0.6216 val_prec1:0.6216 val_auc:0.6929
Max AUC: 0.70787291
Max ACC: 0.63575
TPR at 0.1% FPR: 1.2%
```
* CIFAR10 + ResNet50
Target Model:
```
load aug_target_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 68.25999450683594
Top5 Test accuracy: 97.3699951171875
Top1 Train accuracy: 87.27999877929688
Top5 Train accuracy: 99.80999755859375
```
Shadow Model:
```
load aug_shadow_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 66.6199951171875
Top5 Test accuracy: 96.89999389648438
Top1 Train accuracy: 94.79000091552734
Top5 Train accuracy: 99.97000122070312
```
Distill Target Model:
```
load aug_distill_target_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 70.20999908447266
Top5 Test accuracy: 97.30999755859375
Top1 Train accuracy: 70.08499908447266
Top5 Train accuracy: 97.46499633789062
```
Distill Shadow Model;
```
load aug_distill_shadow_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 69.4000015258789
Top5 Test accuracy: 97.07999420166016
Top1 Train accuracy: 69.16500091552734
Top5 Train accuracy: 97.3699951171875
```
Attack Model:
```
val_prec1:0.6158 val_auc:0.6675
Max AUC: 0.67598043
Max ACC: 0.6204500000000001
TPR at 0.1% FPR: 1.2%
```
* CIFAR10 + ResNet152
Target Model:
```
load aug_target_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 66.77999877929688
Top5 Test accuracy: 96.55999755859375
Top1 Train accuracy: 97.04000091552734
Top5 Train accuracy: 99.95999908447266
```
Shadow Model:
```
load aug_shadow_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 67.58999633789062
Top5 Test accuracy: 96.5
Top1 Train accuracy: 94.2699966430664
Top5 Train accuracy: 99.88999938964844
```
Distill Target Model:
```
load aug_distill_target_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 68.80999755859375
Top5 Test accuracy: 97.30999755859375
Top1 Train accuracy: 69.83499908447266
Top5 Train accuracy: 97.42499542236328
```
Distill Shadow Model:
```
load aug_distill_shadow_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 69.98999786376953
Top5 Test accuracy: 97.31999969482422
Top1 Train accuracy: 70.15499877929688
Top5 Train accuracy: 97.31999969482422
```
Attack Model:
```
epoch:90 train_loss:0.0046 test_loss:0.0048 train_prec1:0.7022 test_prec1:0.6547 val_prec1:0.6547 val_auc:0.7353
Max AUC: 0.7494320150000001
Max ACC: 0.6719499999999999
TPR at 0.1% FPR: 2.2%
```
* CIFAR10 + MobileNetV2
Target Model:
```
load aug_target_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 79.66999816894531
Top5 Test accuracy: 98.55999755859375
Top1 Train accuracy: 99.81999969482422
Top5 Train accuracy: 100.0
```
Shadow Model:
```
load aug_shadow_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 79.40999603271484
Top5 Test accuracy: 98.50999450683594
Top1 Train accuracy: 99.70999908447266
Top5 Train accuracy: 100.0
```
Distill Target Model:
```
load aug_distill_target_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 80.32999420166016
Top5 Test accuracy: 98.93999481201172
Top1 Train accuracy: 81.10499572753906
Top5 Train accuracy: 98.83499908447266
```
Distill Shadow Model:
```
load aug_distill_shadow_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 80.68999481201172
Top5 Test accuracy: 98.79999542236328
Top1 Train accuracy: 80.93000030517578
Top5 Train accuracy: 98.80500030517578
```
Attack Model:
```
epoch:90 train_loss:0.0044 test_loss:0.0051 train_prec1:0.7212 test_prec1:0.6013 val_prec1:0.6013 val_auc:0.6869
Max AUC: 0.70338137
Max ACC: 0.63095
TPR at 0.1% FPR: 2.6%
```
* CIFAR100 + ResNet18
Target Model:
```
load aug_target_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 38.79999923706055
Top5 Test accuracy: 68.69999694824219
Top1 Train accuracy: 97.58999633789062
Top5 Train accuracy: 99.93999481201172
```
Shadow Model:
```
load aug_shadow_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 38.22999954223633
Top5 Test accuracy: 68.5199966430664
Top1 Train accuracy: 97.37999725341797
Top5 Train accuracy: 100.0
```
Distill Target Model:
```
load aug_distill_target_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 38.77000045776367
Top5 Test accuracy: 70.6199951171875
Top1 Train accuracy: 40.91999816894531
Top5 Train accuracy: 71.22999572753906
```
Distill Shadow Model:
```
load aug_distill_shadow_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 40.11000061035156
Top5 Test accuracy: 70.83000183105469
Top1 Train accuracy: 42.06999969482422
Top5 Train accuracy: 72.04000091552734
```
Attack Model:
```
epoch:90 train_loss:0.0036 test_loss:0.0038 train_prec1:0.8490 test_prec1:0.8244 val_prec1:0.8244 val_auc:0.9037
Max AUC: 0.9115403099999998
Max ACC: 0.8295999999999999
TPR at 0.1% FPR: 0.0%
```
* CIFAR100 + ResNet50
Target Model:
```python
load aug_target_dataset ...
Epoch: 100/100
Cur lr: 2.467198171342e-07
Top1 Test accuracy: 31.079999923706055
Top5 Test accuracy: 59.44999694824219
Top1 Train accuracy: 97.79999542236328
Top5 Train accuracy: 99.94999694824219
```
Shadow Model:
```python
load aug_shadow_dataset ...
Epoch: 100/100
Cur lr: 2.467198171342e-07
Top1 Test accuracy: 29.779998779296875
Top5 Test accuracy: 57.64999771118164
Top1 Train accuracy: 98.31999969482422
Top5 Train accuracy: 99.95999908447266
```
Target Distill Model:
```python
load aug_distill_target_dataset ...
Epoch: 100/100
Cur lr: 2.467198171342e-07
Top1 Test accuracy: 32.04999923706055
Top5 Test accuracy: 61.43000030517578
Top1 Train accuracy: 33.18499755859375
Top5 Train accuracy: 62.18499755859375
```
Shadow Distill Model:
```python
load aug_distill_shadow_dataset ...
Epoch: 100/100
Cur lr: 2.467198171342e-07
Top1 Test accuracy: 31.809999465942383
Top5 Test accuracy: 61.03999710083008
Top1 Train accuracy: 33.06999969482422
Top5 Train accuracy: 62.20499801635742
```
Attack Model:
```
Max AUC: 0.9372197300000003
Max ACC: 0.8634499999999999
TPR at 0.1% FPR: 0.0%
```
* CIFAR100 + ResNet152
Target Model:
```python
load aug_target_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 30.67999839782715
Top5 Test accuracy: 57.959999084472656
Top1 Train accuracy: 96.80999755859375
Top5 Train accuracy: 99.86000061035156
```
Shadow Model:
```python
load aug_shadow_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 27.939998626708984
Top5 Test accuracy: 56.05999755859375
Top1 Train accuracy: 95.86000061035156
Top5 Train accuracy: 99.75999450683594
```
Target Distill Model:
```python
load aug_distill_target_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 30.529998779296875
Top5 Test accuracy: 60.55999755859375
Top1 Train accuracy: 32.154998779296875
Top5 Train accuracy: 61.53999710083008
```
Shadow Distill Model:
```python
load aug_distill_shadow_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 29.170000076293945
Top5 Test accuracy: 58.34000015258789
Top1 Train accuracy: 30.14499855041504
Top5 Train accuracy: 59.11499786376953
```
Attack Model:
```
val_prec1:0.8617 val_auc:0.9357
Max AUC: 0.94010921
Max ACC: 0.8649
TPR at 0.1% FPR: 7.1%
```
* CIFAR100 + MobileNetV2
Target Model
```python
load aug_target_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 41.38999938964844
Top5 Test accuracy: 69.93999481201172
Top1 Train accuracy: 99.91999816894531
Top5 Train accuracy: 100.0
```
Shadow Model:
```python
load aug_shadow_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 41.82999801635742
Top5 Test accuracy: 70.54000091552734
Top1 Train accuracy: 99.93000030517578
Top5 Train accuracy: 100.0
```
Target Distill Model:
```python
load aug_distill_target_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 42.09000015258789
Top5 Test accuracy: 71.91999816894531
Top1 Train accuracy: 43.28999710083008
Top5 Train accuracy: 72.50499725341797
```
Shadow Distill Model:
```python
load aug_distill_shadow_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 42.45000076293945
Top5 Test accuracy: 72.43000030517578
Top1 Train accuracy: 44.01499938964844
Top5 Train accuracy: 73.53499603271484
```
Attack Model:
```
Max AUC: 0.9335802350000002
Max ACC: 0.8553499999999999
TPR at 0.1% FPR: 0.0%
```
* CINIC10 + ResNet18:
Target Model:
```
load aug_target_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 58.48999786376953
Top5 Test accuracy: 94.68000030517578
Top1 Train accuracy: 96.1199951171875
Top5 Train accuracy: 99.98999786376953
```
Shadow Model:
```
load aug_shadow_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 58.68000030517578
Top5 Test accuracy: 94.6199951171875
Top1 Train accuracy: 96.38999938964844
Top5 Train accuracy: 100.0
```
Distill Target Model:
```
load aug_distill_target_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 60.56999969482422
Top5 Test accuracy: 95.50999450683594
Top1 Train accuracy: 61.137725830078125
Top5 Train accuracy: 95.41818237304688
```
Distill Shadow Model:
```
load aug_distill_shadow_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 60.15999984741211
Top5 Test accuracy: 95.23999786376953
Top1 Train accuracy: 61.085453033447266
Top5 Train accuracy: 95.53772735595703
```
Attack Model:
```
Max AUC: 0.8098187400000001
Max ACC: 0.7252500000000001
TPR at 0.1% FPR: 3.8%
```
* CINIC10 + ResNet50:
Target Model:
```
load aug_target_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 51.7599983215332
Top5 Test accuracy: 93.8499984741211
Top1 Train accuracy: 84.88999938964844
Top5 Train accuracy: 99.52999877929688
```
Shadow Model:
```
load aug_shadow_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 48.77000045776367
Top5 Test accuracy: 92.0
Top1 Train accuracy: 80.72000122070312
Top5 Train accuracy: 99.29000091552734
```
Distill Target Model:
```
load aug_distill_target_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 55.72999954223633
Top5 Test accuracy: 93.97999572753906
Top1 Train accuracy: 55.32772445678711
Top5 Train accuracy: 94.23681640625
```
Distill Shadow Model:
```
load aug_distill_shadow_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 51.459999084472656
Top5 Test accuracy: 93.04999542236328
Top1 Train accuracy: 51.17409133911133
Top5 Train accuracy: 93.13136291503906
```
Attack Model:
```
Max AUC: 0.764546
Max ACC: 0.69
TPR at 0.1% FPR: 1.7%
```
* CINIC10 + ResNet152:
Target Model:
```
load aug_target_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 47.75
Top5 Test accuracy: 91.29999542236328
Top1 Train accuracy: 89.69999694824219
Top5 Train accuracy: 99.81999969482422
```
Shadow Model:
```
load aug_shadow_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 46.22999954223633
Top5 Test accuracy: 90.55999755859375
Top1 Train accuracy: 84.0199966430664
Top5 Train accuracy: 99.18000030517578
```
Distill Target Model:
```
load aug_distill_target_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 51.09000015258789
Top5 Test accuracy: 92.58999633789062
Top1 Train accuracy: 50.53999710083008
Top5 Train accuracy: 92.56181335449219
```
Distill Shadow Model:
```
load aug_distill_shadow_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 48.93000030517578
Top5 Test accuracy: 91.62999725341797
Top1 Train accuracy: 48.51545333862305
Top5 Train accuracy: 91.79590606689453
```
Attack Model:
```
epoch:90 train_loss:0.0042 test_loss:0.0044 train_prec1:0.7580 test_prec1:0.7336 val_prec1:0.7336 val_auc:0.8117
Max AUC: 0.81900534
Max ACC: 0.7394
TPR at 0.1% FPR: 3.0%
```
* CINIC10 + MobileNetV2:
Target Model:
```
load aug_target_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 58.619998931884766
Top5 Test accuracy: 94.02999877929688
Top1 Train accuracy: 99.19999694824219
Top5 Train accuracy: 100.0
```
Shadow Model:
```
load aug_shadow_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 60.779998779296875
Top5 Test accuracy: 94.16999816894531
Top1 Train accuracy: 99.31999969482422
Top5 Train accuracy: 100.0
```
Distill Target Model:
```
load aug_distill_target_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 60.55999755859375
Top5 Test accuracy: 95.18000030517578
Top1 Train accuracy: 60.62772750854492
Top5 Train accuracy: 95.2677230834961
```
Distill Shadow Model:
```
load aug_distill_shadow_dataset ...
Epoch: 100/100
Cur lr: 2.4671981713420017e-05
Top1 Test accuracy: 61.23999786376953
Top5 Test accuracy: 95.41999816894531
Top1 Train accuracy: 61.27545166015625
Top5 Train accuracy: 95.35317993164062
```
Attack Model:
```
Max AUC: 0.843258295
Max ACC: 0.75125
TPR at 0.1% FPR: 0.0%
```
4\. 问题及改进方向
Q1: 如何把量化考虑进来?
A1:主要取决于我们假设的攻击情景是怎样的。
如果认为攻击者不知道要攻击的模型是全精度的还是量化后的,那么就会采用全精度的Shadow Model和Distill Target Model, Distill Shadow Model. 考虑到只要知道Target Model的output,我们就能进行模型蒸馏,因此如果Target Model是量化模型,则已经把量化考虑在MIA内了。
如果攻击者知道Target Model是量化模型,则可以将Shadow Model也进行量化来实现对齐,可能会得到更好的攻击效果。
Q2:如何进行预测?
A2:可以考虑将loss trajectory作为property,去计算in,out数据(member_status可以作为in, out的标记)的loss trajectory的相似度然后预测? 可以用Distill Target Model的Loss Trajectory与Attack Acc(AUC...)之类的配对构成数据点(也可以增加Distill Shadow Model的作为补充)。
预计的效果是相似度越低,则攻击成功率越高
#!/bin/bash
#- Job parameters
# (TODO)
# Please modify job name
#SBATCH -J Tra_152 # The job name
#SBATCH -o ./info/ret-%j.out # Write the standard output to file named 'ret-<job_number>.out'
#SBATCH -e ./info/ret-%j.err # Write the standard error to file named 'ret-<job_number>.err'
#- Resources
# (TODO)
# Please modify your requirements
#SBATCH -p nv-gpu # Submit to 'nv-gpu' Partitiion
#SBATCH -t 0-12:00:00 # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
#SBATCH --nodes=1 # Request N nodes
#SBATCH --gres=gpu:1 # Request M GPU per node
#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
#SBATCH --qos=gpu-normal # Request QOS Type
###
### The system will alloc 8 or 16 cores per gpu by default.
### If you need more or less, use following:
### #SBATCH --cpus-per-task=K # Request K cores
###
###
### Without specifying the constraint, any available nodes that meet the requirement will be allocated
### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
###
### #SBATCH --nodelist=gpu-v00 # Request a specific list of hosts
### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
###
#- Log information
echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
echo "Job run at:"
echo "$(hostnamectl)"
#- Load environments
source /tools/module_env.sh
source ~/pyt1.5/bin/activate
module list # list modules loaded
##- Tools
module load cluster-tools/v1.0
module load slurm-tools/v1.0
module load cmake/3.15.7
module load git/2.17.1
module load vim/8.1.2424
##- language
module load python3/3.6.8
##- CUDA
module load cuda-cudnn/11.1-8.1.1
##- virtualenv
# source xxxxx/activate
echo $(module list) # list modules loaded
echo $(which gcc)
echo $(which python)
echo $(which python3)
cluster-quota # nas quota
nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
#- Warning! Please not change your CUDA_VISIBLE_DEVICES
#- in `.bashrc`, `env.sh`, or your job script
echo "Use GPU ${CUDA_VISIBLE_DEVICES}" # which gpus
#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
#- Job step
# [EDIT HERE(TODO)]
sleep 2s
hostname
echo "python main.py --mode target --model resnet152 --data cifar10"
python main.py --mode target --model resnet152 --data cifar10
echo "python main.py --mode shadow --model resnet152 --data cifar10"
python main.py --mode shadow --model resnet152 --data cifar10
echo "python main.py --mode distill_target --model resnet152 --data cifar10"
python main.py --mode distill_target --model resnet152 --data cifar10
echo "python main.py --mode distill_shadow --model resnet152 --data cifar10"
python main.py --mode distill_shadow --model resnet152 --data cifar10
echo "python main.py --action 1 --mode shadow --mia_type build-dataset --model resnet152 --model_distill resnet152 --data cifar10"
python main.py --action 1 --mode shadow --mia_type build-dataset --model resnet152 --model_distill resnet152 --data cifar10
echo "python main.py --action 1 --mode target --mia_type build-dataset --model resnet152 --model_distill resnet152 --data cifar10"
python main.py --action 1 --mode target --mia_type build-dataset --model resnet152 --model_distill resnet152 --data cifar10
echo "python main.py --action 1 --mia_type black-box --model resnet152 --model_distill resnet152 --data cifar10"
python main.py --action 1 --mia_type black-box --model resnet152 --model_distill resnet152 --data cifar10
#- End
echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
#!/bin/bash
#- Job parameters
# (TODO)
# Please modify job name
#SBATCH -J Tra_152_10 # The job name
#SBATCH -o ./info/ret-%j.out # Write the standard output to file named 'ret-<job_number>.out'
#SBATCH -e ./info/ret-%j.err # Write the standard error to file named 'ret-<job_number>.err'
#- Resources
# (TODO)
# Please modify your requirements
#SBATCH -p nv-gpu # Submit to 'nv-gpu' Partitiion
#SBATCH -t 0-12:00:00 # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
#SBATCH --nodes=1 # Request N nodes
#SBATCH --gres=gpu:1 # Request M GPU per node
#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
#SBATCH --qos=gpu-normal # Request QOS Type
###
### The system will alloc 8 or 16 cores per gpu by default.
### If you need more or less, use following:
### #SBATCH --cpus-per-task=K # Request K cores
###
###
### Without specifying the constraint, any available nodes that meet the requirement will be allocated
### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
###
### #SBATCH --nodelist=gpu-v00 # Request a specific list of hosts
### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
###
#- Log information
echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
echo "Job run at:"
echo "$(hostnamectl)"
#- Load environments
source /tools/module_env.sh
source ~/pyt1.5/bin/activate
module list # list modules loaded
##- Tools
module load cluster-tools/v1.0
module load slurm-tools/v1.0
module load cmake/3.15.7
module load git/2.17.1
module load vim/8.1.2424
##- language
module load python3/3.6.8
##- CUDA
module load cuda-cudnn/11.1-8.1.1
##- virtualenv
# source xxxxx/activate
echo $(module list) # list modules loaded
echo $(which gcc)
echo $(which python)
echo $(which python3)
cluster-quota # nas quota
nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
#- Warning! Please not change your CUDA_VISIBLE_DEVICES
#- in `.bashrc`, `env.sh`, or your job script
echo "Use GPU ${CUDA_VISIBLE_DEVICES}" # which gpus
#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
#- Job step
# [EDIT HERE(TODO)]
sleep 2s
hostname
echo "python main.py --mode target --model resnet152 --data cifar100"
python main.py --mode target --model resnet152 --data cifar100
echo "python main.py --mode shadow --model resnet152 --data cifar100"
python main.py --mode shadow --model resnet152 --data cifar100
echo "python main.py --mode distill_target --model resnet152 --data cifar100"
python main.py --mode distill_target --model resnet152 --data cifar100
echo "python main.py --mode distill_shadow --model resnet152 --data cifar100"
python main.py --mode distill_shadow --model resnet152 --data cifar100
echo "python main.py --action 1 --mode shadow --mia_type build-dataset --model resnet152 --model_distill resnet152 --data cifar100"
python main.py --action 1 --mode shadow --mia_type build-dataset --model resnet152 --model_distill resnet152 --data cifar100
echo "python main.py --action 1 --mode target --mia_type build-dataset --model resnet152 --model_distill resnet152 --data cifar100"
python main.py --action 1 --mode target --mia_type build-dataset --model resnet152 --model_distill resnet152 --data cifar100
echo "python main.py --action 1 --mia_type black-box --model resnet152 --model_distill resnet152 --data cifar100"
python main.py --action 1 --mia_type black-box --model resnet152 --model_distill resnet152 --data cifar100
#- End
echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
#!/bin/bash
#- Job parameters
# (TODO)
# Please modify job name
#SBATCH -J Tra_CIN_10 # The job name
#SBATCH -o ./info/ret-%j.out # Write the standard output to file named 'ret-<job_number>.out'
#SBATCH -e ./info/ret-%j.err # Write the standard error to file named 'ret-<job_number>.err'
#- Resources
# (TODO)
# Please modify your requirements
#SBATCH -p nv-gpu # Submit to 'nv-gpu' Partitiion
#SBATCH -t 3-00:00:00 # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
#SBATCH --nodes=1 # Request N nodes
#SBATCH --gres=gpu:1 # Request M GPU per node
#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
#SBATCH --qos=gpu-long # Request QOS Type
###
### The system will alloc 8 or 16 cores per gpu by default.
### If you need more or less, use following:
### #SBATCH --cpus-per-task=K # Request K cores
###
###
### Without specifying the constraint, any available nodes that meet the requirement will be allocated
### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
###
### #SBATCH --nodelist=gpu-v00 # Request a specific list of hosts
### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
###
#- Log information
echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
echo "Job run at:"
echo "$(hostnamectl)"
#- Load environments
source /tools/module_env.sh
source ~/pyt1.5/bin/activate
module list # list modules loaded
##- Tools
module load cluster-tools/v1.0
module load slurm-tools/v1.0
module load cmake/3.15.7
module load git/2.17.1
module load vim/8.1.2424
##- language
module load python3/3.6.8
##- CUDA
module load cuda-cudnn/11.1-8.1.1
##- virtualenv
# source xxxxx/activate
echo $(module list) # list modules loaded
echo $(which gcc)
echo $(which python)
echo $(which python3)
cluster-quota # nas quota
nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
#- Warning! Please not change your CUDA_VISIBLE_DEVICES
#- in `.bashrc`, `env.sh`, or your job script
echo "Use GPU ${CUDA_VISIBLE_DEVICES}" # which gpus
#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
#- Job step
# [EDIT HERE(TODO)]
sleep 2s
hostname
echo "python main.py --mode target --model resnet152 --data cinic10"
python main.py --mode target --model resnet152 --data cinic10
echo "python main.py --mode shadow --model resnet152 --data cinic10"
python main.py --mode shadow --model resnet152 --data cinic10
echo "python main.py --mode distill_target --model resnet152 --data cinic10"
python main.py --mode distill_target --model resnet152 --data cinic10
echo "python main.py --mode distill_shadow --model resnet152 --data cinic10"
python main.py --mode distill_shadow --model resnet152 --data cinic10
echo "python main.py --action 1 --mode shadow --mia_type build-dataset --model resnet152 --model_distill resnet152 --data cinic10"
python main.py --action 1 --mode shadow --mia_type build-dataset --model resnet152 --model_distill resnet152 --data cinic10
echo "python main.py --action 1 --mode target --mia_type build-dataset --model resnet152 --model_distill resnet152 --data cinic10"
python main.py --action 1 --mode target --mia_type build-dataset --model resnet152 --model_distill resnet152 --data cinic10
echo "python main.py --action 1 --mia_type black-box --model resnet152 --model_distill resnet152 --data cinic10"
python main.py --action 1 --mia_type black-box --model resnet152 --model_distill resnet152 --data cinic10
#- End
echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
#!/bin/bash
#- Job parameters
# (TODO)
# Please modify job name
#SBATCH -J Tra_10_18 # The job name
#SBATCH -o ./info/ret-%j.out # Write the standard output to file named 'ret-<job_number>.out'
#SBATCH -e ./info/ret-%j.err # Write the standard error to file named 'ret-<job_number>.err'
#- Resources
# (TODO)
# Please modify your requirements
#SBATCH -p nv-gpu # Submit to 'nv-gpu' Partitiion
#SBATCH -t 1-00:00:00 # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
#SBATCH --nodes=1 # Request N nodes
#SBATCH --gres=gpu:1 # Request M GPU per node
#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
#SBATCH --qos=gpu-normal # Request QOS Type
###
### The system will alloc 8 or 16 cores per gpu by default.
### If you need more or less, use following:
### #SBATCH --cpus-per-task=K # Request K cores
###
###
### Without specifying the constraint, any available nodes that meet the requirement will be allocated
### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
###
### #SBATCH --nodelist=gpu-v00 # Request a specific list of hosts
### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
###
#- Log information
echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
echo "Job run at:"
echo "$(hostnamectl)"
#- Load environments
source /tools/module_env.sh
source ~/pyt1.5/bin/activate
module list # list modules loaded
##- Tools
module load cluster-tools/v1.0
module load slurm-tools/v1.0
module load cmake/3.15.7
module load git/2.17.1
module load vim/8.1.2424
##- language
module load python3/3.6.8
##- CUDA
module load cuda-cudnn/11.1-8.1.1
##- virtualenv
# source xxxxx/activate
echo $(module list) # list modules loaded
echo $(which gcc)
echo $(which python)
echo $(which python3)
cluster-quota # nas quota
nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
#- Warning! Please not change your CUDA_VISIBLE_DEVICES
#- in `.bashrc`, `env.sh`, or your job script
echo "Use GPU ${CUDA_VISIBLE_DEVICES}" # which gpus
#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
#- Job step
# [EDIT HERE(TODO)]
sleep 2s
hostname
echo "python main.py --mode target --model resnet18 --data cifar10"
python main.py --mode target --model resnet18 --data cifar10
echo "python main.py --mode shadow --model resnet18 --data cifar10"
python main.py --mode shadow --model resnet18 --data cifar10
echo "python main.py --mode distill_target --model resnet18 --data cifar10"
python main.py --mode distill_target --model resnet18 --data cifar10
echo "python main.py --mode distill_shadow --model resnet18 --data cifar10"
python main.py --mode distill_shadow --model resnet18 --data cifar10
echo "python main.py --action 1 --mode shadow --mia_type build-dataset --model resnet18 --model_distill resnet18 --data cifar10"
python main.py --action 1 --mode shadow --mia_type build-dataset --model resnet18 --model_distill resnet18 --data cifar10
echo "python main.py --action 1 --mode target --mia_type build-dataset --model resnet18 --model_distill resnet18 --data cifar10"
python main.py --action 1 --mode target --mia_type build-dataset --model resnet18 --model_distill resnet18 --data cifar10
echo "python main.py --action 1 --mia_type black-box --model resnet18 --model_distill resnet18 --data cifar10"
python main.py --action 1 --mia_type black-box --model resnet18 --model_distill resnet18 --data cifar10
#- End
echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
#!/bin/bash
#- Job parameters
# (TODO)
# Please modify job name
#SBATCH -J Tra_C100_18 # The job name
#SBATCH -o ./info/ret-%j.out # Write the standard output to file named 'ret-<job_number>.out'
#SBATCH -e ./info/ret-%j.err # Write the standard error to file named 'ret-<job_number>.err'
#- Resources
# (TODO)
# Please modify your requirements
#SBATCH -p nv-gpu # Submit to 'nv-gpu' Partitiion
#SBATCH -t 1-00:00:00 # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
#SBATCH --nodes=1 # Request N nodes
#SBATCH --gres=gpu:1 # Request M GPU per node
#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
#SBATCH --qos=gpu-normal # Request QOS Type
###
### The system will alloc 8 or 16 cores per gpu by default.
### If you need more or less, use following:
### #SBATCH --cpus-per-task=K # Request K cores
###
###
### Without specifying the constraint, any available nodes that meet the requirement will be allocated
### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
###
### #SBATCH --nodelist=gpu-v00 # Request a specific list of hosts
### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
###
#- Log information
echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
echo "Job run at:"
echo "$(hostnamectl)"
#- Load environments
source /tools/module_env.sh
source ~/pyt1.5/bin/activate
module list # list modules loaded
##- Tools
module load cluster-tools/v1.0
module load slurm-tools/v1.0
module load cmake/3.15.7
module load git/2.17.1
module load vim/8.1.2424
##- language
module load python3/3.6.8
##- CUDA
module load cuda-cudnn/11.1-8.1.1
##- virtualenv
# source xxxxx/activate
echo $(module list) # list modules loaded
echo $(which gcc)
echo $(which python)
echo $(which python3)
cluster-quota # nas quota
nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
#- Warning! Please not change your CUDA_VISIBLE_DEVICES
#- in `.bashrc`, `env.sh`, or your job script
echo "Use GPU ${CUDA_VISIBLE_DEVICES}" # which gpus
#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
#- Job step
# [EDIT HERE(TODO)]
sleep 2s
hostname
echo "python main.py --mode target --model resnet18 --data cifar100"
python main.py --mode target --model resnet18 --data cifar100
echo "python main.py --mode shadow --model resnet18 --data cifar100"
python main.py --mode shadow --model resnet18 --data cifar100
echo "python main.py --mode distill_target --model resnet18 --data cifar100"
python main.py --mode distill_target --model resnet18 --data cifar100
echo "python main.py --mode distill_shadow --model resnet18 --data cifar100"
python main.py --mode distill_shadow --model resnet18 --data cifar100
echo "python main.py --action 1 --mode shadow --mia_type build-dataset --model resnet18 --model_distill resnet18 --data cifar100"
python main.py --action 1 --mode shadow --mia_type build-dataset --model resnet18 --model_distill resnet18 --data cifar100
echo "python main.py --action 1 --mode target --mia_type build-dataset --model resnet18 --model_distill resnet18 --data cifar100"
python main.py --action 1 --mode target --mia_type build-dataset --model resnet18 --model_distill resnet18 --data cifar100
echo "python main.py --action 1 --mia_type black-box --model resnet18 --model_distill resnet18 --data cifar100"
python main.py --action 1 --mia_type black-box --model resnet18 --model_distill resnet18 --data cifar100
#- End
echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
#!/bin/bash
#- Job parameters
# (TODO)
# Please modify job name
#SBATCH -J Tra_CIN_18 # The job name
#SBATCH -o ./info/ret-%j.out # Write the standard output to file named 'ret-<job_number>.out'
#SBATCH -e ./info/ret-%j.err # Write the standard error to file named 'ret-<job_number>.err'
#- Resources
# (TODO)
# Please modify your requirements
#SBATCH -p nv-gpu # Submit to 'nv-gpu' Partitiion
#SBATCH -t 1-12:00:00 # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
#SBATCH --nodes=1 # Request N nodes
#SBATCH --gres=gpu:1 # Request M GPU per node
#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
#SBATCH --qos=gpu-long # Request QOS Type
###
### The system will alloc 8 or 16 cores per gpu by default.
### If you need more or less, use following:
### #SBATCH --cpus-per-task=K # Request K cores
###
###
### Without specifying the constraint, any available nodes that meet the requirement will be allocated
### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
###
### #SBATCH --nodelist=gpu-v00 # Request a specific list of hosts
### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
###
#- Log information
echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
echo "Job run at:"
echo "$(hostnamectl)"
#- Load environments
source /tools/module_env.sh
source ~/pyt1.5/bin/activate
module list # list modules loaded
##- Tools
module load cluster-tools/v1.0
module load slurm-tools/v1.0
module load cmake/3.15.7
module load git/2.17.1
module load vim/8.1.2424
##- language
module load python3/3.6.8
##- CUDA
module load cuda-cudnn/11.1-8.1.1
##- virtualenv
# source xxxxx/activate
echo $(module list) # list modules loaded
echo $(which gcc)
echo $(which python)
echo $(which python3)
cluster-quota # nas quota
nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
#- Warning! Please not change your CUDA_VISIBLE_DEVICES
#- in `.bashrc`, `env.sh`, or your job script
echo "Use GPU ${CUDA_VISIBLE_DEVICES}" # which gpus
#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
#- Job step
# [EDIT HERE(TODO)]
sleep 2s
hostname
echo "python main.py --mode target --model resnet18 --data cinic10"
python main.py --mode target --model resnet18 --data cinic10
echo "python main.py --mode shadow --model resnet18 --data cinic10"
python main.py --mode shadow --model resnet18 --data cinic10
echo "python main.py --mode distill_target --model resnet18 --data cinic10"
python main.py --mode distill_target --model resnet18 --data cinic10
echo "python main.py --mode distill_shadow --model resnet18 --data cinic10"
python main.py --mode distill_shadow --model resnet18 --data cinic10
echo "python main.py --action 1 --mode shadow --mia_type build-dataset --model resnet18 --model_distill resnet18 --data cinic10"
python main.py --action 1 --mode shadow --mia_type build-dataset --model resnet18 --model_distill resnet18 --data cinic10
echo "python main.py --action 1 --mode target --mia_type build-dataset --model resnet18 --model_distill resnet18 --data cinic10"
python main.py --action 1 --mode target --mia_type build-dataset --model resnet18 --model_distill resnet18 --data cinic10
echo "python main.py --action 1 --mia_type black-box --model resnet18 --model_distill resnet18 --data cinic10"
python main.py --action 1 --mia_type black-box --model resnet18 --model_distill resnet18 --data cinic10
#- End
echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
#!/bin/bash
#- Job parameters
# (TODO)
# Please modify job name
#SBATCH -J Tra_50 # The job name
#SBATCH -o ./info/ret-%j.out # Write the standard output to file named 'ret-<job_number>.out'
#SBATCH -e ./info/ret-%j.err # Write the standard error to file named 'ret-<job_number>.err'
#- Resources
# (TODO)
# Please modify your requirements
#SBATCH -p nv-gpu # Submit to 'nv-gpu' Partitiion
#SBATCH -t 0-12:00:00 # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
#SBATCH --nodes=1 # Request N nodes
#SBATCH --gres=gpu:1 # Request M GPU per node
#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
#SBATCH --qos=gpu-normal # Request QOS Type
###
### The system will alloc 8 or 16 cores per gpu by default.
### If you need more or less, use following:
### #SBATCH --cpus-per-task=K # Request K cores
###
###
### Without specifying the constraint, any available nodes that meet the requirement will be allocated
### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
###
### #SBATCH --nodelist=gpu-v00 # Request a specific list of hosts
### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
###
#- Log information
echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
echo "Job run at:"
echo "$(hostnamectl)"
#- Load environments
source /tools/module_env.sh
source ~/pyt1.5/bin/activate
module list # list modules loaded
##- Tools
module load cluster-tools/v1.0
module load slurm-tools/v1.0
module load cmake/3.15.7
module load git/2.17.1
module load vim/8.1.2424
##- language
module load python3/3.6.8
##- CUDA
module load cuda-cudnn/11.1-8.1.1
##- virtualenv
# source xxxxx/activate
echo $(module list) # list modules loaded
echo $(which gcc)
echo $(which python)
echo $(which python3)
cluster-quota # nas quota
nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
#- Warning! Please not change your CUDA_VISIBLE_DEVICES
#- in `.bashrc`, `env.sh`, or your job script
echo "Use GPU ${CUDA_VISIBLE_DEVICES}" # which gpus
#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
#- Job step
# [EDIT HERE(TODO)]
sleep 2s
hostname
echo "python main.py --mode target --model resnet50 "
python main.py --mode target --model resnet50
echo "python main.py --mode shadow --model resnet50 "
python main.py --mode shadow --model resnet50
echo "python main.py --mode distill_target --model resnet50 "
python main.py --mode distill_target --model resnet50
echo "python main.py --mode distill_shadow --model resnet50 "
python main.py --mode distill_shadow --model resnet50
echo "python main.py --action 1 --mode shadow --mia_type build-dataset --model resnet50 --model_distill resnet50 "
python main.py --action 1 --mode shadow --mia_type build-dataset --model resnet50 --model_distill resnet50
echo "python main.py --action 1 --mode target --mia_type build-dataset --model resnet50 --model_distill resnet50 "
python main.py --action 1 --mode target --mia_type build-dataset --model resnet50 --model_distill resnet50
echo "python main.py --action 1 --mia_type black-box --model resnet50 --model_distill resnet50 "
python main.py --action 1 --mia_type black-box --model resnet50 --model_distill resnet50
#- End
echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
#!/bin/bash
#- Job parameters
# (TODO)
# Please modify job name
#SBATCH -J Tra_50_10 # The job name
#SBATCH -o ./info/ret-%j.out # Write the standard output to file named 'ret-<job_number>.out'
#SBATCH -e ./info/ret-%j.err # Write the standard error to file named 'ret-<job_number>.err'
#- Resources
# (TODO)
# Please modify your requirements
#SBATCH -p nv-gpu # Submit to 'nv-gpu' Partitiion
#SBATCH -t 0-12:00:00 # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
#SBATCH --nodes=1 # Request N nodes
#SBATCH --gres=gpu:1 # Request M GPU per node
#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
#SBATCH --qos=gpu-normal # Request QOS Type
###
### The system will alloc 8 or 16 cores per gpu by default.
### If you need more or less, use following:
### #SBATCH --cpus-per-task=K # Request K cores
###
###
### Without specifying the constraint, any available nodes that meet the requirement will be allocated
### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
###
### #SBATCH --nodelist=gpu-v00 # Request a specific list of hosts
### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
###
#- Log information
echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
echo "Job run at:"
echo "$(hostnamectl)"
#- Load environments
source /tools/module_env.sh
source ~/pyt1.5/bin/activate
module list # list modules loaded
##- Tools
module load cluster-tools/v1.0
module load slurm-tools/v1.0
module load cmake/3.15.7
module load git/2.17.1
module load vim/8.1.2424
##- language
module load python3/3.6.8
##- CUDA
module load cuda-cudnn/11.1-8.1.1
##- virtualenv
# source xxxxx/activate
echo $(module list) # list modules loaded
echo $(which gcc)
echo $(which python)
echo $(which python3)
cluster-quota # nas quota
nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
#- Warning! Please not change your CUDA_VISIBLE_DEVICES
#- in `.bashrc`, `env.sh`, or your job script
echo "Use GPU ${CUDA_VISIBLE_DEVICES}" # which gpus
#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
#- Job step
# [EDIT HERE(TODO)]
sleep 2s
hostname
echo "python main.py --mode target --model resnet50 --data cifar100"
python main.py --mode target --model resnet50 --data cifar100
echo "python main.py --mode shadow --model resnet50 --data cifar100"
python main.py --mode shadow --model resnet50 --data cifar100
echo "python main.py --mode distill_target --model resnet50 --data cifar100"
python main.py --mode distill_target --model resnet50 --data cifar100
echo "python main.py --mode distill_shadow --model resnet50 --data cifar100"
python main.py --mode distill_shadow --model resnet50 --data cifar100
echo "python main.py --action 1 --mode shadow --mia_type build-dataset --model resnet50 --model_distill resnet50 --data cifar100"
python main.py --action 1 --mode shadow --mia_type build-dataset --model resnet50 --model_distill resnet50 --data cifar100
echo "python main.py --action 1 --mode target --mia_type build-dataset --model resnet50 --model_distill resnet50 --data cifar100"
python main.py --action 1 --mode target --mia_type build-dataset --model resnet50 --model_distill resnet50 --data cifar100
echo "python main.py --action 1 --mia_type black-box --model resnet50 --model_distill resnet50 --data cifar100"
python main.py --action 1 --mia_type black-box --model resnet50 --model_distill resnet50 --data cifar100
#- End
echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
#!/bin/bash
#- Job parameters
# (TODO)
# Please modify job name
#SBATCH -J Tra_CIN_50 # The job name
#SBATCH -o ./info/ret-%j.out # Write the standard output to file named 'ret-<job_number>.out'
#SBATCH -e ./info/ret-%j.err # Write the standard error to file named 'ret-<job_number>.err'
#- Resources
# (TODO)
# Please modify your requirements
#SBATCH -p nv-gpu # Submit to 'nv-gpu' Partitiion
#SBATCH -t 1-12:00:00 # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
#SBATCH --nodes=1 # Request N nodes
#SBATCH --gres=gpu:1 # Request M GPU per node
#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
#SBATCH --qos=gpu-long # Request QOS Type
###
### The system will alloc 8 or 16 cores per gpu by default.
### If you need more or less, use following:
### #SBATCH --cpus-per-task=K # Request K cores
###
###
### Without specifying the constraint, any available nodes that meet the requirement will be allocated
### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
###
### #SBATCH --nodelist=gpu-v00 # Request a specific list of hosts
### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
###
#- Log information
echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
echo "Job run at:"
echo "$(hostnamectl)"
#- Load environments
source /tools/module_env.sh
source ~/pyt1.5/bin/activate
module list # list modules loaded
##- Tools
module load cluster-tools/v1.0
module load slurm-tools/v1.0
module load cmake/3.15.7
module load git/2.17.1
module load vim/8.1.2424
##- language
module load python3/3.6.8
##- CUDA
module load cuda-cudnn/11.1-8.1.1
##- virtualenv
# source xxxxx/activate
echo $(module list) # list modules loaded
echo $(which gcc)
echo $(which python)
echo $(which python3)
cluster-quota # nas quota
nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
#- Warning! Please not change your CUDA_VISIBLE_DEVICES
#- in `.bashrc`, `env.sh`, or your job script
echo "Use GPU ${CUDA_VISIBLE_DEVICES}" # which gpus
#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
#- Job step
# [EDIT HERE(TODO)]
sleep 2s
hostname
echo "python main.py --mode target --model resnet50 --data cinic10"
python main.py --mode target --model resnet50 --data cinic10
echo "python main.py --mode shadow --model resnet50 --data cinic10"
python main.py --mode shadow --model resnet50 --data cinic10
echo "python main.py --mode distill_target --model resnet50 --data cinic10"
python main.py --mode distill_target --model resnet50 --data cinic10
echo "python main.py --mode distill_shadow --model resnet50 --data cinic10"
python main.py --mode distill_shadow --model resnet50 --data cinic10
echo "python main.py --action 1 --mode shadow --mia_type build-dataset --model resnet50 --model_distill resnet50 --data cinic10"
python main.py --action 1 --mode shadow --mia_type build-dataset --model resnet50 --model_distill resnet50 --data cinic10
echo "python main.py --action 1 --mode target --mia_type build-dataset --model resnet50 --model_distill resnet50 --data cinic10"
python main.py --action 1 --mode target --mia_type build-dataset --model resnet50 --model_distill resnet50 --data cinic10
echo "python main.py --action 1 --mia_type black-box --model resnet50 --model_distill resnet50 --data cinic10"
python main.py --action 1 --mia_type black-box --model resnet50 --model_distill resnet50 --data cinic10
#- End
echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
#!/bin/bash
#- Job parameters
# (TODO)
# Please modify job name
#SBATCH -J Tra_Mobile # The job name
#SBATCH -o ./info/ret-%j.out # Write the standard output to file named 'ret-<job_number>.out'
#SBATCH -e ./info/ret-%j.err # Write the standard error to file named 'ret-<job_number>.err'
#- Resources
# (TODO)
# Please modify your requirements
#SBATCH -p nv-gpu # Submit to 'nv-gpu' Partitiion
#SBATCH -t 0-12:00:00 # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
#SBATCH --nodes=1 # Request N nodes
#SBATCH --gres=gpu:1 # Request M GPU per node
#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
#SBATCH --qos=gpu-normal # Request QOS Type
###
### The system will alloc 8 or 16 cores per gpu by default.
### If you need more or less, use following:
### #SBATCH --cpus-per-task=K # Request K cores
###
###
### Without specifying the constraint, any available nodes that meet the requirement will be allocated
### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
###
### #SBATCH --nodelist=gpu-v00 # Request a specific list of hosts
### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
###
#- Log information
echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
echo "Job run at:"
echo "$(hostnamectl)"
#- Load environments
source /tools/module_env.sh
source ~/pyt1.5/bin/activate
module list # list modules loaded
##- Tools
module load cluster-tools/v1.0
module load slurm-tools/v1.0
module load cmake/3.15.7
module load git/2.17.1
module load vim/8.1.2424
##- language
module load python3/3.6.8
##- CUDA
module load cuda-cudnn/11.1-8.1.1
##- virtualenv
# source xxxxx/activate
echo $(module list) # list modules loaded
echo $(which gcc)
echo $(which python)
echo $(which python3)
cluster-quota # nas quota
nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
#- Warning! Please not change your CUDA_VISIBLE_DEVICES
#- in `.bashrc`, `env.sh`, or your job script
echo "Use GPU ${CUDA_VISIBLE_DEVICES}" # which gpus
#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
#- Job step
# [EDIT HERE(TODO)]
sleep 2s
hostname
echo "python main.py --mode target --model mobilenetv2"
python main.py --mode target --model mobilenetv2
echo "python main.py --mode shadow --model mobilenetv2"
python main.py --mode shadow --model mobilenetv2
echo "python main.py --mode distill_target --model mobilenetv2"
python main.py --mode distill_target --model mobilenetv2
echo "python main.py --mode distill_shadow --model mobilenetv2"
python main.py --mode distill_shadow --model mobilenetv2
echo "python main.py --action 1 --mode shadow --mia_type build-dataset --model mobilenetv2 --model_distill mobilenetv2 "
python main.py --action 1 --mode shadow --mia_type build-dataset --model mobilenetv2 --model_distill mobilenetv2
echo "python main.py --action 1 --mode target --mia_type build-dataset --model mobilenetv2 --model_distill mobilenetv2"
python main.py --action 1 --mode target --mia_type build-dataset --model mobilenetv2 --model_distill mobilenetv2
echo "python main.py --action 1 --mia_type black-box --model mobilenetv2 --model_distill mobilenetv2"
python main.py --action 1 --mia_type black-box --model mobilenetv2 --model_distill mobilenetv2
#- End
echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
#!/bin/bash
#- Job parameters
# (TODO)
# Please modify job name
#SBATCH -J Tra_100_Mobile # The job name
#SBATCH -o ./info/ret-%j.out # Write the standard output to file named 'ret-<job_number>.out'
#SBATCH -e ./info/ret-%j.err # Write the standard error to file named 'ret-<job_number>.err'
#- Resources
# (TODO)
# Please modify your requirements
#SBATCH -p nv-gpu # Submit to 'nv-gpu' Partitiion
#SBATCH -t 0-12:00:00 # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
#SBATCH --nodes=1 # Request N nodes
#SBATCH --gres=gpu:1 # Request M GPU per node
#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
#SBATCH --qos=gpu-normal # Request QOS Type
###
### The system will alloc 8 or 16 cores per gpu by default.
### If you need more or less, use following:
### #SBATCH --cpus-per-task=K # Request K cores
###
###
### Without specifying the constraint, any available nodes that meet the requirement will be allocated
### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
###
### #SBATCH --nodelist=gpu-v00 # Request a specific list of hosts
### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
###
#- Log information
echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
echo "Job run at:"
echo "$(hostnamectl)"
#- Load environments
source /tools/module_env.sh
source ~/pyt1.5/bin/activate
module list # list modules loaded
##- Tools
module load cluster-tools/v1.0
module load slurm-tools/v1.0
module load cmake/3.15.7
module load git/2.17.1
module load vim/8.1.2424
##- language
module load python3/3.6.8
##- CUDA
module load cuda-cudnn/11.1-8.1.1
##- virtualenv
# source xxxxx/activate
echo $(module list) # list modules loaded
echo $(which gcc)
echo $(which python)
echo $(which python3)
cluster-quota # nas quota
nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
#- Warning! Please not change your CUDA_VISIBLE_DEVICES
#- in `.bashrc`, `env.sh`, or your job script
echo "Use GPU ${CUDA_VISIBLE_DEVICES}" # which gpus
#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
#- Job step
# [EDIT HERE(TODO)]
sleep 2s
hostname
echo "python main.py --mode target --model mobilenetv2 --data cifar100"
python main.py --mode target --model mobilenetv2 --data cifar100
echo "python main.py --mode shadow --model mobilenetv2 --data cifar100"
python main.py --mode shadow --model mobilenetv2 --data cifar100
echo "python main.py --mode distill_target --model mobilenetv2 --data cifar100"
python main.py --mode distill_target --model mobilenetv2 --data cifar100
echo "python main.py --mode distill_shadow --model mobilenetv2 --data cifar100"
python main.py --mode distill_shadow --model mobilenetv2 --data cifar100
echo "python main.py --action 1 --mode shadow --mia_type build-dataset --model mobilenetv2 --data cifar100 --model_distill mobilenetv2"
python main.py --action 1 --mode shadow --mia_type build-dataset --model mobilenetv2 --model_distill mobilenetv2 --data cifar100
echo "python main.py --action 1 --mode target --mia_type build-dataset --model mobilenetv2 --data cifar100 --model_distill mobilenetv2"
python main.py --action 1 --mode target --mia_type build-dataset --model mobilenetv2 --model_distill mobilenetv2 --data cifar100
echo "python main.py --action 1 --mia_type black-box --model mobilenetv2 --model_distill mobilenetv2 --data cifar100"
python main.py --action 1 --mia_type black-box --model mobilenetv2 --model_distill mobilenetv2 --data cifar100
#- End
echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
#!/bin/bash
#- Job parameters
# (TODO)
# Please modify job name
#SBATCH -J Tra_CIN_Mobile # The job name
#SBATCH -o ./info/ret-%j.out # Write the standard output to file named 'ret-<job_number>.out'
#SBATCH -e ./info/ret-%j.err # Write the standard error to file named 'ret-<job_number>.err'
#- Resources
# (TODO)
# Please modify your requirements
#SBATCH -p nv-gpu # Submit to 'nv-gpu' Partitiion
#SBATCH -t 3-00:00:00 # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
#SBATCH --nodes=1 # Request N nodes
#SBATCH --gres=gpu:1 # Request M GPU per node
#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
#SBATCH --qos=gpu-long # Request QOS Type
###
### The system will alloc 8 or 16 cores per gpu by default.
### If you need more or less, use following:
### #SBATCH --cpus-per-task=K # Request K cores
###
###
### Without specifying the constraint, any available nodes that meet the requirement will be allocated
### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
###
### #SBATCH --nodelist=gpu-v00 # Request a specific list of hosts
### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
###
#- Log information
echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
echo "Job run at:"
echo "$(hostnamectl)"
#- Load environments
source /tools/module_env.sh
source ~/pyt1.5/bin/activate
module list # list modules loaded
##- Tools
module load cluster-tools/v1.0
module load slurm-tools/v1.0
module load cmake/3.15.7
module load git/2.17.1
module load vim/8.1.2424
##- language
module load python3/3.6.8
##- CUDA
module load cuda-cudnn/11.1-8.1.1
##- virtualenv
# source xxxxx/activate
echo $(module list) # list modules loaded
echo $(which gcc)
echo $(which python)
echo $(which python3)
cluster-quota # nas quota
nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
#- Warning! Please not change your CUDA_VISIBLE_DEVICES
#- in `.bashrc`, `env.sh`, or your job script
echo "Use GPU ${CUDA_VISIBLE_DEVICES}" # which gpus
#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
#- Job step
# [EDIT HERE(TODO)]
sleep 2s
hostname
echo "python main.py --mode target --model mobilenetv2 --data cinic10"
python main.py --mode target --model mobilenetv2 --data cinic10
echo "python main.py --mode shadow --model mobilenetv2 --data cinic10"
python main.py --mode shadow --model mobilenetv2 --data cinic10
echo "python main.py --mode distill_target --model mobilenetv2 --data cinic10"
python main.py --mode distill_target --model mobilenetv2 --data cinic10
echo "python main.py --mode distill_shadow --model mobilenetv2 --data cinic10"
python main.py --mode distill_shadow --model mobilenetv2 --data cinic10
echo "python main.py --action 1 --mode shadow --mia_type build-dataset --model mobilenetv2 --data cinic10 --model_distill mobilenetv2"
python main.py --action 1 --mode shadow --mia_type build-dataset --model mobilenetv2 --model_distill mobilenetv2 --data cinic10
echo "python main.py --action 1 --mode target --mia_type build-dataset --model mobilenetv2 --data cinic10 --model_distill mobilenetv2"
python main.py --action 1 --mode target --mia_type build-dataset --model mobilenetv2 --model_distill mobilenetv2 --data cinic10
echo "python main.py --action 1 --mia_type black-box --model mobilenetv2 --model_distill mobilenetv2 --data cinic10"
python main.py --action 1 --mia_type black-box --model mobilenetv2 --model_distill mobilenetv2 --data cinic10
#- End
echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
#!/bin/bash
#- Job parameters
# (TODO)
# Please modify job name
#SBATCH -J PLOT # The job name
#SBATCH -o ./info/ret-%j.out # Write the standard output to file named 'ret-<job_number>.out'
#SBATCH -e ./info/ret-%j.err # Write the standard error to file named 'ret-<job_number>.err'
#- Resources
# (TODO)
# Please modify your requirements
#SBATCH -p nv-gpu # Submit to 'nv-gpu' Partitiion
#SBATCH -t 0-12:00:00 # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
#SBATCH --nodes=1 # Request N nodes
#SBATCH --gres=gpu:1 # Request M GPU per node
#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
#SBATCH --qos=gpu-normal # Request QOS Type
###
### The system will alloc 8 or 16 cores per gpu by default.
### If you need more or less, use following:
### #SBATCH --cpus-per-task=K # Request K cores
###
###
### Without specifying the constraint, any available nodes that meet the requirement will be allocated
### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
###
### #SBATCH --nodelist=gpu-v00 # Request a specific list of hosts
### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
###
#- Log information
echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
echo "Job run at:"
echo "$(hostnamectl)"
#- Load environments
source /tools/module_env.sh
source ~/pyt1.5/bin/activate
module list # list modules loaded
##- Tools
module load cluster-tools/v1.0
module load slurm-tools/v1.0
module load cmake/3.15.7
module load git/2.17.1
module load vim/8.1.2424
##- language
module load python3/3.6.8
##- CUDA
module load cuda-cudnn/11.1-8.1.1
##- virtualenv
# source xxxxx/activate
echo $(module list) # list modules loaded
echo $(which gcc)
echo $(which python)
echo $(which python3)
cluster-quota # nas quota
nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
#- Warning! Please not change your CUDA_VISIBLE_DEVICES
#- in `.bashrc`, `env.sh`, or your job script
echo "Use GPU ${CUDA_VISIBLE_DEVICES}" # which gpus
#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
#- Job step
# [EDIT HERE(TODO)]
sleep 2s
hostname
echo "python plot.py --model resnet50 --model_distill resnet50 --data cifar100"
python plot.py --model resnet50 --model_distill resnet50 --data cifar100
#- End
echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
import torch
import numpy as np
import random
import sys
import time
import os
import dataset
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import CrossEntropyLoss
from torch.optim import SGD, Adam
from torch.optim.lr_scheduler import _LRScheduler, CosineAnnealingLR
from bisect import bisect_right
from normal import save_model
def set_random_seeds(seed):
np.random.seed(seed)
torch.manual_seed(seed)
random.seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.benchmark=False
torch.backends.cudnn.deterministic = True
def get_pytorch_device():
device = 'cpu'
cuda = torch.cuda.is_available()
print('Using Pytorch version:', torch.__version__, 'CUDA:', cuda)
if cuda:
device = 'cuda'
return device
class MultiStepMultiLR(_LRScheduler):
def __init__(self, optimizer, milestones, gammas, last_epoch=-1):
if not list(milestones) == sorted(milestones):
raise ValueError('Milestones should be a list of'
' increasing integers. Got {}', milestones)
self.milestones = milestones
self.gammas = gammas
super(MultiStepMultiLR, self).__init__(optimizer, last_epoch)
def get_lr(self):
lrs = []
for base_lr in self.base_lrs:
cur_milestone = bisect_right(self.milestones, self.last_epoch)
new_lr = base_lr * np.prod(self.gammas[:cur_milestone])
new_lr = round(new_lr,8)
lrs.append(new_lr)
return lrs
class Logger(object):
def __init__(self, log_file, mode='out'):
# write to terminal
if mode == 'out':
self.terminal = sys.stdout
else:
self.terminal = sys.stderr
self.log= open('{}.{}'.format(log_file, mode), "a")
def write(self, message):
self.terminal.write(message)
self.terminal.flush()
self.log.write(message)
self.log.flush()
def flush(self):
self.terminal.flush()
self.log.flush()
def __del__(self):
self.log.close()
def set_logger(log_file):
sys.stdout = Logger(log_file, 'out')
def create_path(path):
if not os.path.exists(path):
os.makedirs(path, exist_ok=True)
def get_lr(optimizers):
if isinstance(optimizers, dict):
return optimizers[list(optimizers.keys())[-1]].param_groups[-1]['lr']
else:
return optimizers.param_groups[-1]['lr']
def get_loss_criterion():
return CrossEntropyLoss()
class Flatten(nn.Module):
def forward(self, input):
return input.view(input.size(0), -1)
def cnn_test(model, loader, device='cpu'):
model.eval()
top1 = dataset.AverageMeter()
top5 = dataset.AverageMeter()
with torch.no_grad():
for batch in loader:
b_x = batch[0].to(device)
b_y = batch[1].to(device)
output = model(b_x)
prec1, prec5 = dataset.accuracy(output, b_y, topk=(1, 5))
top1.update(prec1[0], b_x.size(0))
top5.update(prec5[0], b_x.size(0))
top1_acc = top1.avg.data.cpu().numpy()[()]
top5_acc = top5.avg.data.cpu().numpy()[()]
return top1_acc, top5_acc
# 每一步的train
def cnn_training_step(model, optimizer, data, labels, device='cpu'):
b_x = data.to(device)
b_y = labels.to(device)
output = model(b_x)
criterion = get_loss_criterion()
loss = criterion(output, b_y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
# 直接的train函数,具有通用性
def cnn_train(args, model, data, epochs, optimizer, scheduler, model_params, model_path, trained_model_name, device='cpu'):
metrics = {'epoch_times':[], 'test_top1_acc':[], 'test_top5_acc':[], 'train_top1_acc':[], 'train_top5_acc':[], 'lrs':[]}
for epoch in range(1, epochs+1):
cur_lr = get_lr(optimizer)
if not hasattr(model, 'augment_training') or model.augment_training:
if args.mode == 'target':
print('load aug_target_dataset ... ')
train_loader = data.aug_target_train_loader
test_loader = data.aug_target_test_loader
elif args.mode == 'shadow':
print('load aug_shadow_dataset ...')
train_loader = data.aug_shadow_train_loader
test_loader = data.aug_shadow_test_loader
else:
if args.mode == 'target':
print('load target_dataset ... ')
train_loader = data.target_train_loader
test_loader = data.target_test_loader
elif args.mode == 'shadow':
print('load shadow_dataset ...')
train_loader = data.shadow_train_loader
test_loader = data.shadow_test_loader
start_time = time.time()
model.train()
print('Epoch: {}/{}'.format(epoch, epochs))
print('Cur lr: {}'.format(cur_lr))
for x, y, idx in train_loader:
cnn_training_step(model, optimizer, x, y, device)
end_time = time.time()
# 没有用单独的val (直接用的test)
top1_test, top5_test = cnn_test(model, test_loader, device)
print('Top1 Test accuracy: {}'.format(top1_test))
print('Top5 Test accuracy: {}'.format(top5_test))
metrics['test_top1_acc'].append(top1_test)
metrics['test_top5_acc'].append(top5_test)
top1_train, top5_train = cnn_test(model, train_loader, device)
print('Top1 Train accuracy: {}'.format(top1_train))
print('Top5 Train accuracy: {}'.format(top5_train))
metrics['train_top1_acc'].append(top1_train)
metrics['train_top5_acc'].append(top5_train)
epoch_time = int(end_time-start_time)
print('Epoch took {} seconds.'.format(epoch_time))
metrics['epoch_times'].append(epoch_time)
metrics['lrs'].append(cur_lr)
scheduler.step()
model_params['train_top1_acc'] = metrics['train_top1_acc']
model_params['test_top1_acc'] = metrics['test_top1_acc']
model_params['train_top5_acc'] = metrics['train_top5_acc']
model_params['test_top5_acc'] = metrics['test_top5_acc']
model_params['epoch_times'] = metrics['epoch_times']
model_params['lrs'] = metrics['lrs']
total_training_time = sum(model_params['epoch_times'])
model_params['total_time'] = total_training_time
print('Training took {} seconds...'.format(total_training_time))
return metrics
# 做model distill的每一步具体训练 (在对trained Target/Shadow Model通过KL散度做distill)
def cnn_training_step_dis(model, model_dis, optimizer, data, labels, device='cpu'):
b_x = data.to(device)
# 不会用label的
b_y_1 = labels.to(device)
output = model_dis(b_x)
# distill model和 target/shadow model 做蒸馏 学习 loss对比的不再是标签,而是output (target/shadow model的output应该是采用的trained model的吧,optimizer不会对其参数更新,只会更新distill model的权值参数)
b_y = model(b_x)
loss = nn.KLDivLoss(reduction='batchmean')(F.log_softmax(output, dim=1), F.softmax(b_y, dim=1))
optimizer.zero_grad()
loss.backward()
optimizer.step()
# 对distill model的训练
def cnn_train_dis(args, model, model_dis, data, epochs, optimizer, scheduler, model_params, model_path, trained_model_name, device='cpu'):
metrics = {'epoch_times':[], 'test_top1_acc':[], 'test_top5_acc':[], 'train_top1_acc':[], 'train_top5_acc':[], 'lrs':[]}
for epoch in range(1, epochs+1):
cur_lr = get_lr(optimizer)
if not hasattr(model, 'augment_training') or model.augment_training:
print(f'load aug_{args.mode}_dataset ...')
train_loader = data.aug_distill_train_loader
test_loader = data.aug_distill_test_loader
else:
print(f'load {args.mode}_dataset ...')
train_loader = data.distill_train_loader
test_loader = data.distill_test_loader
start_time = time.time()
model = model.to(device)
model_dis = model_dis.to(device)
model_dis.train() # 只更新distill model
model.eval() # 不会更新target model或者shadow model
print('Epoch: {}/{}'.format(epoch, epochs))
print('Cur lr: {}'.format(cur_lr))
for i, (x, y, idx) in enumerate(train_loader):
cnn_training_step_dis(model, model_dis, optimizer, x, y, device)
end_time = time.time()
top1_test, top5_test = cnn_test(model_dis, test_loader, device)
print('Top1 Test accuracy: {}'.format(top1_test))
print('Top5 Test accuracy: {}'.format(top5_test))
metrics['test_top1_acc'].append(top1_test)
metrics['test_top5_acc'].append(top5_test)
top1_train, top5_train = cnn_test(model_dis, train_loader, device)
print('Top1 Train accuracy: {}'.format(top1_train))
print('Top5 Train accuracy: {}'.format(top5_train))
metrics['train_top1_acc'].append(top1_train)
metrics['train_top5_acc'].append(top5_train)
epoch_time = int(end_time-start_time)
print('Epoch took {} seconds.'.format(epoch_time))
metrics['epoch_times'].append(epoch_time)
metrics['lrs'].append(cur_lr)
scheduler.step()
model_params['train_top1_acc'] = metrics['train_top1_acc']
model_params['test_top1_acc'] = metrics['test_top1_acc']
model_params['train_top5_acc'] = metrics['train_top5_acc']
model_params['test_top5_acc'] = metrics['test_top5_acc']
model_params['epoch_times'] = metrics['epoch_times']
model_params['lrs'] = metrics['lrs']
total_training_time = sum(model_params['epoch_times'])
model_params['total_time'] = total_training_time
print('Training took {} seconds...'.format(total_training_time))
save_model(model_dis, model_params, model_path, trained_model_name, epoch=epoch)
return metrics
def get_dataset(dataset, mode, aug=False, batch_size=512, add_trigger=False):
if dataset == 'cifar10':
return load_cifar10(mode, aug, batch_size, add_trigger)
elif dataset == 'gtsrb':
return load_gtsrb(mode, aug, batch_size, add_trigger)
elif dataset == 'cinic10':
return load_cinic10(mode, aug, batch_size, add_trigger)
elif dataset == 'cifar100':
return load_cifar100(mode, aug, batch_size)
def load_gtsrb(mode, aug, batch_size, add_trigger=False):
gtsrb_data = dataset.GTSRB(mode, aug, batch_size=batch_size)
return gtsrb_data
def load_cinic10(mode, aug, batch_size, add_trigger=False):
cinic10_data = dataset.CINIC10(mode, aug, batch_size=batch_size, add_trigger=add_trigger)
return cinic10_data
def load_cifar10(mode, aug, batch_size, add_trigger=False):
cifar10_data = dataset.CIFAR10(mode, aug, batch_size=batch_size, add_trigger=add_trigger)
return cifar10_data
def load_cifar100(mode, aug, batch_size):
cifar100_data = dataset.CIFAR100(mode, aug, batch_size=batch_size)
return cifar100_data
# TODO 修改训练策略
def get_full_optimizer(model, lr_params, args):
lr=lr_params[0]
weight_decay=lr_params[1]
momentum=lr_params[2]
optimizer = SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=lr, momentum=momentum, weight_decay=weight_decay)
# optimizer = Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=lr)
scheduler = CosineAnnealingLR(optimizer, args.epochs)
return optimizer, scheduler
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment