Commit 759cfc47 by YuxuanGuo

init commit

parents
*.npy
data
data-bin
*pkl
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
\ No newline at end of file
# from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import TensorDataset, DataLoader
import numpy as np
from net import Net
# Training settings
parser = argparse.ArgumentParser(description='Classification')
parser.add_argument('--batch-size', type=int, default=32, metavar='N',
help='input batch size for training (default: 32)')
parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
help='input batch size for testing (default: 1000)')
parser.add_argument('--epochs', type=int, default=100, metavar='N',
help='number of epochs to train (default: 100)')
parser.add_argument('--lr', type=float, default=0.001, metavar='LR',
help='learning rate (default: 0.001)')
parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
help='SGD momentum (default: 0.5)')
parser.add_argument('--no-cuda', action='store_true', default=False,
help='disables CUDA training')
parser.add_argument('--seed', type=int, default=1, metavar='S',
help='random seed (default: 1)')
parser.add_argument('--gpus', default=0,
help='gpus used for training - e.g 0,1,3')
parser.add_argument('--log-interval', type=int, default=10, metavar='N',
help='how many batches to wait before logging training status')
args = parser.parse_args()
# print(args)
# exit(0)
args.cuda = not args.no_cuda and torch.cuda.is_available()
torch.manual_seed(args.seed)
if args.cuda:
torch.cuda.manual_seed(args.seed)
val_x = np.load("val_x.npy")
val_y = np.load("val_y.npy")
train_x = np.load("train_x.npy")
train_y = np.load("train_y.npy")
batch_size = args.batch_size
train_loader = TensorDataset(torch.from_numpy(train_x), torch.from_numpy(train_y))
train_loader = DataLoader(train_loader, shuffle=True, batch_size=batch_size, drop_last=True)
test_loader = TensorDataset(torch.from_numpy(val_x), torch.from_numpy(val_y))
test_loader = DataLoader(test_loader, shuffle=False, batch_size=1000)
model = Net(10)
from torchinfo import summary
summary(model, input_size=(batch_size, 32, 32))
if args.cuda:
torch.cuda.set_device(0)
model.cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=args.lr)
def train(epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data = torch.tensor(data, dtype=torch.float)
target = target.long()
if args.cuda:
data, target = data.cuda(), target.cuda()
# target = target.cuda()
data, target = Variable(data), Variable(target)
# print(data)
# print(data.shape)
# print(target.shape)
# exit(0)
optimizer.zero_grad()
output = model(data)
# print(output.shape)
loss = criterion(output, target)
if epoch%40==0:
optimizer.param_groups[0]['lr']=optimizer.param_groups[0]['lr']*0.1
optimizer.zero_grad()
loss.backward()
for p in list(model.parameters()):
if hasattr(p,'org'):
p.data.copy_(p.org)
optimizer.step()
for p in list(model.parameters()):
if hasattr(p,'org'):
p.org.copy_(p.data.clamp_(-1,1))
if batch_idx % args.log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
def accuracy(output, target, k=5):
"""Computes the precision@k for the specified values of k"""
batch_size = target.size(0)
_, pred = output.topk(k, 1, True, True)
pred = pred.t()
correct = pred.eq(target.view(1, -1).expand_as(pred))
return correct.view(k, batch_size).float().sum()
def test():
model.eval()
test_loss = 0
correct_1 = 0
correct_5 = 0
with torch.no_grad():
for data, target in test_loader:
data = torch.tensor(data, dtype=torch.float)
target = target.long()
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data), Variable(target)
output = model(data)
test_loss += criterion(output, target).item() # sum up batch loss
# pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
correct_1 += accuracy(output, target, k=1)
correct_5 += accuracy(output, target, k=5)
test_loss /= len(test_loader.dataset)
test_acc = 100. * correct_1 / len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy@1: {}/{} ({:.0f}%), Accuracy@5: {}/{} ({:.0f}%)\n'.format(
test_loss,
int(correct_1), len(test_loader.dataset), 100. * correct_1 / len(test_loader.dataset),
int(correct_5), len(test_loader.dataset), 100. * correct_5 / len(test_loader.dataset),
))
return test_acc
max_test_acc = 0
for epoch in range(1, args.epochs + 1):
train(epoch)
test_acc = test()
if test_acc > max_test_acc:
max_test_acc = test_acc
torch.save(model.state_dict(), "model_parameter_best.pkl")
print("Best test acc: %.4f.\n" % max_test_acc)
if epoch%40==0:
optimizer.param_groups[0]['lr']=optimizer.param_groups[0]['lr']*0.1
# for name, parameters in model.named_parameters():#打印出每一层的参数的大小
# print(name, ':', parameters.size())
# print(parameters)
torch.save(model.state_dict(), "model_parameter_fp32.pkl")
import torch
import torch.nn as nn
class Net(nn.Module):
def __init__(self, num_classes=1000):
super(Net, self).__init__()
# self.ratioInfl=1
self.features = nn.Sequential(
nn.Conv2d(1, 10, kernel_size=11, stride=1, padding=1),
# BinarizeConv2d(1, int(64*self.ratioInfl), kernel_size=5, stride=2, padding=1),
nn.BatchNorm2d(10, affine=False),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(10, 10, kernel_size=5, padding=2),
# BinarizeConv2d(int(64*self.ratioInfl), int(192*self.ratioInfl), kernel_size=3, padding=1),
nn.BatchNorm2d(10, affine=False),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(10, 32, kernel_size=3, padding=1),
nn.BatchNorm2d(32, affine=False),
nn.ReLU(inplace=True),
nn.Conv2d(32, 32, kernel_size=3, padding=1),
nn.BatchNorm2d(32, affine=False),
nn.ReLU(inplace=True),
nn.Conv2d(32, 32, kernel_size=3, padding=1),
nn.BatchNorm2d(32, affine=False),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
)
self.classifier_1 = nn.Sequential(
nn.Linear(32 * 2 * 2, 1024),
# BinarizeLinear(256 * 1 * 1, 4096),
nn.BatchNorm1d(1024),
nn.ReLU(inplace=True),
)
self.classifier_2 = nn.Sequential(
nn.Linear(256, 256),
nn.BatchNorm1d(256),
nn.ReLU(inplace=True),
nn.Linear(256, 64),
nn.BatchNorm1d(64),
nn.ReLU(inplace=True),
)
self.classifier_3 = nn.Sequential(
nn.Linear(256, num_classes),
nn.BatchNorm1d(num_classes),
nn.Softmax()
)
def forward(self, x):
x = x.unsqueeze(1)
# print(x.shape)
x = self.features(x)
# print(x.shape)
# exit()
# exit(0)
# x = x.view(-1, 256 * 1 * 1)
x = x.view(-1, 32 * 2 * 2)
# x = self.classifier(x)
x = self.classifier_1(x)
B, C = x.size()
x = x.view(4 * B, 256)
x = self.classifier_2(x)
x = x.view(B, 256)
x = self.classifier_3(x)
return x
import os
from sklearn.model_selection import train_test_split
# import cv2
import numpy as np
from tqdm import tqdm
from PIL import Image
path = ["../data-bin/TUGraz_bike/", "../data-bin/TUGraz_cars/", "../data-bin/TUGraz_person/"]
label = []
data = []
num = 0
img_size = 32
for k in range(3):
for file in os.listdir(path[k]):
num += 1
shape = (num, img_size, img_size)
with tqdm(total=num) as bar:
for k in range(3):
for file in os.listdir(path[k]):
img = Image.open(path[k]+file)
img = img.resize((img_size, img_size), Image.Resampling.LANCZOS)
# img = img.resize((28, 28), Image.ANTIALIAS)
arr = np.array(img)
# print(arr.size)
# exit()
res = list(arr.reshape(-1))
res = [-1 if i == 0 else 1 for i in res]
data.append(res)
label.append(k)
bar.update(1)
X = np.array(data).reshape(shape)
Y = np.array(label)
train_data, val_data = train_test_split([i for i in range(len(X))], test_size=0.2, random_state=2022)
np.save("train_x.npy", X[train_data])
np.save("train_y.npy", Y[train_data])
np.save("val_x.npy", X[val_data])
np.save("val_y.npy", Y[val_data])
\ No newline at end of file
# Classification-fp32
- `python preprocess.py`
- `python main.py`
\ No newline at end of file
# from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import TensorDataset, DataLoader
import numpy as np
from net import Net
# Training settings
parser = argparse.ArgumentParser(description='Classification')
parser.add_argument('--batch-size', type=int, default=32, metavar='N',
help='input batch size for training (default: 32)')
parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
help='input batch size for testing (default: 1000)')
parser.add_argument('--epochs', type=int, default=100, metavar='N',
help='number of epochs to train (default: 100)')
parser.add_argument('--lr', type=float, default=0.001, metavar='LR',
help='learning rate (default: 0.001)')
parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
help='SGD momentum (default: 0.5)')
parser.add_argument('--no-cuda', action='store_true', default=False,
help='disables CUDA training')
parser.add_argument('--seed', type=int, default=1, metavar='S',
help='random seed (default: 1)')
parser.add_argument('--gpus', default=0,
help='gpus used for training - e.g 0,1,3')
parser.add_argument('--log-interval', type=int, default=10, metavar='N',
help='how many batches to wait before logging training status')
args = parser.parse_args()
# print(args)
# exit(0)
args.cuda = not args.no_cuda and torch.cuda.is_available()
torch.manual_seed(args.seed)
if args.cuda:
torch.cuda.manual_seed(args.seed)
val_x = np.load("val_x.npy")
val_y = np.load("val_y.npy")
train_x = np.load("train_x.npy")
train_y = np.load("train_y.npy")
batch_size = args.batch_size
train_loader = TensorDataset(torch.from_numpy(train_x), torch.from_numpy(train_y))
train_loader = DataLoader(train_loader, shuffle=True, batch_size=batch_size, drop_last=True)
test_loader = TensorDataset(torch.from_numpy(val_x), torch.from_numpy(val_y))
test_loader = DataLoader(test_loader, shuffle=False, batch_size=1000)
model = Net(10)
from torchinfo import summary
summary(model, input_size=(batch_size, 3, 32, 32))
if args.cuda:
torch.cuda.set_device(0)
model.cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=args.lr)
def train(epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data = torch.tensor(data, dtype=torch.float)
target = target.long()
if args.cuda:
data, target = data.cuda(), target.cuda()
# target = target.cuda()
data, target = Variable(data), Variable(target)
# print(data)
# print(data.shape)
# print(target.shape)
# exit(0)
optimizer.zero_grad()
output = model(data)
# print(output.shape)
loss = criterion(output, target)
if epoch%40==0:
optimizer.param_groups[0]['lr']=optimizer.param_groups[0]['lr']*0.1
optimizer.zero_grad()
loss.backward()
for p in list(model.parameters()):
if hasattr(p,'org'):
p.data.copy_(p.org)
optimizer.step()
for p in list(model.parameters()):
if hasattr(p,'org'):
p.org.copy_(p.data.clamp_(-1,1))
if batch_idx % args.log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
def accuracy(output, target, k=5):
"""Computes the precision@k for the specified values of k"""
batch_size = target.size(0)
_, pred = output.topk(k, 1, True, True)
pred = pred.t()
correct = pred.eq(target.view(1, -1).expand_as(pred))
return correct.view(k, batch_size).float().sum()
def test():
model.eval()
test_loss = 0
correct_1 = 0
correct_5 = 0
with torch.no_grad():
for data, target in test_loader:
data = torch.tensor(data, dtype=torch.float)
target = target.long()
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data), Variable(target)
output = model(data)
test_loss += criterion(output, target).item() # sum up batch loss
# pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
correct_1 += accuracy(output, target, k=1)
correct_5 += accuracy(output, target, k=5)
test_loss /= len(test_loader.dataset)
test_acc = 100. * correct_1 / len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy@1: {}/{} ({:.0f}%), Accuracy@5: {}/{} ({:.0f}%)\n'.format(
test_loss,
int(correct_1), len(test_loader.dataset), 100. * correct_1 / len(test_loader.dataset),
int(correct_5), len(test_loader.dataset), 100. * correct_5 / len(test_loader.dataset),
))
return test_acc
max_test_acc = 0
for epoch in range(1, args.epochs + 1):
train(epoch)
test_acc = test()
if test_acc > max_test_acc:
max_test_acc = test_acc
torch.save(model.state_dict(), "model_parameter_best.pkl")
print("Best test acc: %.4f.\n" % max_test_acc)
if epoch%40==0:
optimizer.param_groups[0]['lr']=optimizer.param_groups[0]['lr']*0.1
# for name, parameters in model.named_parameters():#打印出每一层的参数的大小
# print(name, ':', parameters.size())
# print(parameters)
torch.save(model.state_dict(), "model_parameter_fp32.pkl")
import torch
import torch.nn as nn
class Net(nn.Module):
def __init__(self, num_classes=1000):
super(Net, self).__init__()
# self.ratioInfl=1
self.features = nn.Sequential(
nn.Conv2d(3, 10, kernel_size=11, stride=1, padding=1),
# BinarizeConv2d(1, int(64*self.ratioInfl), kernel_size=5, stride=2, padding=1),
nn.BatchNorm2d(10, affine=False),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(10, 10, kernel_size=5, padding=2),
# BinarizeConv2d(int(64*self.ratioInfl), int(192*self.ratioInfl), kernel_size=3, padding=1),
nn.BatchNorm2d(10, affine=False),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(10, 32, kernel_size=3, padding=1),
nn.BatchNorm2d(32, affine=False),
nn.ReLU(inplace=True),
nn.Conv2d(32, 32, kernel_size=3, padding=1),
nn.BatchNorm2d(32, affine=False),
nn.ReLU(inplace=True),
nn.Conv2d(32, 32, kernel_size=3, padding=1),
nn.BatchNorm2d(32, affine=False),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
)
self.classifier_1 = nn.Sequential(
nn.Linear(32 * 2 * 2, 1024),
# BinarizeLinear(256 * 1 * 1, 4096),
nn.BatchNorm1d(1024),
nn.ReLU(inplace=True),
)
self.classifier_2 = nn.Sequential(
nn.Linear(256, 256),
nn.BatchNorm1d(256),
nn.ReLU(inplace=True),
nn.Linear(256, 64),
nn.BatchNorm1d(64),
nn.ReLU(inplace=True),
)
self.classifier_3 = nn.Sequential(
nn.Linear(256, num_classes),
nn.BatchNorm1d(num_classes),
nn.Softmax()
)
def forward(self, x):
# print(x.shape)
x = self.features(x)
# print(x.shape)
# exit()
# exit(0)
# x = x.view(-1, 256 * 1 * 1)
x = x.view(-1, 32 * 2 * 2)
# x = self.classifier(x)
x = self.classifier_1(x)
B, C = x.size()
x = x.view(4 * B, 256)
x = self.classifier_2(x)
x = x.view(B, 256)
x = self.classifier_3(x)
return x
import os
from sklearn.model_selection import train_test_split
# import cv2
import numpy as np
from tqdm import tqdm
from PIL import Image
path = ["../data/TUGraz_bike/", "../data/TUGraz_cars/", "../data/TUGraz_person/"]
label = []
data = []
num = 0
img_size = 32
for k in range(3):
for file in os.listdir(path[k]):
num += 1
shape = (num, 3, img_size, img_size)
with tqdm(total=num) as bar:
for k in range(3):
for file in os.listdir(path[k]):
img = Image.open(path[k]+file)
img = img.resize((img_size, img_size), Image.Resampling.LANCZOS)
# img = img.resize((28, 28), Image.ANTIALIAS)
arr = np.array(img)
arr = np.transpose(arr, (2, 0, 1))
res = list(arr.reshape(-1))
data.append(res)
label.append(k)
bar.update(1)
X = np.array(data).reshape(shape)
Y = np.array(label)
train_data, val_data = train_test_split([i for i in range(len(X))], test_size=0.2, random_state=2022)
np.save("train_x.npy", X[train_data])
np.save("train_y.npy", Y[train_data])
np.save("val_x.npy", X[val_data])
np.save("val_y.npy", Y[val_data])
\ No newline at end of file
# Classification-fp32
- `python preprocess.py`
- `python main.py`
\ No newline at end of file
import torch
import torch.nn as nn
from qtorch.quant import fixed_point_quantize as fpq
def to_int8(tensor):
return fpq(tensor, 8, 4)
class Int8_Linear(nn.Linear):
def __init__(self, *kargs, **kwargs):
super(Int8_Linear, self).__init__(*kargs, **kwargs)
def forward(self, input):
input.data=to_int8(input.data)
if not hasattr(self.weight,'org'):
self.weight.org=self.weight.data.clone()
self.weight.data=to_int8(self.weight.org)
out = nn.functional.linear(input, self.weight)
if not self.bias is None:
self.bias.org=self.bias.data.clone()
out += self.bias.view(1, -1).expand_as(out)
return out
class Int8_Conv2d(nn.Conv2d):
def __init__(self, *kargs, **kwargs):
super(Int8_Conv2d, self).__init__(*kargs, **kwargs)
def forward(self, input):
input.data = to_int8(input.data)
if not hasattr(self.weight,'org'):
self.weight.org=self.weight.data.clone()
self.weight.data=to_int8(self.weight.org)
out = nn.functional.conv2d(input, self.weight, None, self.stride,
self.padding, self.dilation, self.groups)
if not self.bias is None:
self.bias.org=self.bias.data.clone()
out += self.bias.view(1, -1, 1, 1).expand_as(out)
return out
class Int8_MaxPool2d(nn.MaxPool2d):
def __init__(self, *kargs, **kwargs):
super(Int8_MaxPool2d, self).__init__(*kargs, **kwargs)
def forward(self, input):
input.data = to_int8(input.data)
out = nn.functional.max_pool2d(input, self.kernel_size, self.stride, self.padding, self.dilation)
return out
\ No newline at end of file
# from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import TensorDataset, DataLoader
import numpy as np
from net import Net
# Training settings
parser = argparse.ArgumentParser(description='Classification')
parser.add_argument('--batch-size', type=int, default=32, metavar='N',
help='input batch size for training (default: 32)')
parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
help='input batch size for testing (default: 1000)')
parser.add_argument('--epochs', type=int, default=100, metavar='N',
help='number of epochs to train (default: 100)')
parser.add_argument('--lr', type=float, default=0.001, metavar='LR',
help='learning rate (default: 0.001)')
parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
help='SGD momentum (default: 0.5)')
parser.add_argument('--no-cuda', action='store_true', default=False,
help='disables CUDA training')
parser.add_argument('--seed', type=int, default=1, metavar='S',
help='random seed (default: 1)')
parser.add_argument('--gpus', default=0,
help='gpus used for training - e.g 0,1,3')
parser.add_argument('--log-interval', type=int, default=10, metavar='N',
help='how many batches to wait before logging training status')
args = parser.parse_args()
# print(args)
# exit(0)
args.cuda = not args.no_cuda and torch.cuda.is_available()
torch.manual_seed(args.seed)
if args.cuda:
torch.cuda.manual_seed(args.seed)
val_x = np.load("val_x.npy")
val_y = np.load("val_y.npy")
train_x = np.load("train_x.npy")
train_y = np.load("train_y.npy")
batch_size = args.batch_size
train_loader = TensorDataset(torch.from_numpy(train_x), torch.from_numpy(train_y))
train_loader = DataLoader(train_loader, shuffle=True, batch_size=batch_size, drop_last=True)
test_loader = TensorDataset(torch.from_numpy(val_x), torch.from_numpy(val_y))
test_loader = DataLoader(test_loader, shuffle=False, batch_size=1000)
model = Net(10)
from torchinfo import summary
summary(model, input_size=(batch_size, 32, 32))
if args.cuda:
torch.cuda.set_device(0)
model.cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=args.lr)
def train(epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data = torch.tensor(data, dtype=torch.float)
target = target.long()
if args.cuda:
data, target = data.cuda(), target.cuda()
# target = target.cuda()
data, target = Variable(data), Variable(target)
# print(data)
# print(data.shape)
# print(target.shape)
# exit(0)
optimizer.zero_grad()
output = model(data)
# print(output.shape)
loss = criterion(output, target)
if epoch%40==0:
optimizer.param_groups[0]['lr']=optimizer.param_groups[0]['lr']*0.1
optimizer.zero_grad()
loss.backward()
for p in list(model.parameters()):
if hasattr(p,'org'):
p.data.copy_(p.org)
optimizer.step()
for p in list(model.parameters()):
if hasattr(p,'org'):
p.org.copy_(p.data.clamp_(-1,1))
if batch_idx % args.log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
def accuracy(output, target, k=5):
"""Computes the precision@k for the specified values of k"""
batch_size = target.size(0)
_, pred = output.topk(k, 1, True, True)
pred = pred.t()
correct = pred.eq(target.view(1, -1).expand_as(pred))
return correct.view(k, batch_size).float().sum()
def test():
model.eval()
test_loss = 0
correct_1 = 0
correct_5 = 0
with torch.no_grad():
for data, target in test_loader:
data = torch.tensor(data, dtype=torch.float)
target = target.long()
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data), Variable(target)
output = model(data)
test_loss += criterion(output, target).item() # sum up batch loss
# pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
correct_1 += accuracy(output, target, k=1)
correct_5 += accuracy(output, target, k=5)
test_loss /= len(test_loader.dataset)
test_acc = 100. * correct_1 / len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy@1: {}/{} ({:.0f}%), Accuracy@5: {}/{} ({:.0f}%)\n'.format(
test_loss,
int(correct_1), len(test_loader.dataset), 100. * correct_1 / len(test_loader.dataset),
int(correct_5), len(test_loader.dataset), 100. * correct_5 / len(test_loader.dataset),
))
return test_acc
max_test_acc = 0
for epoch in range(1, args.epochs + 1):
train(epoch)
test_acc = test()
if test_acc > max_test_acc:
max_test_acc = test_acc
torch.save(model.state_dict(), "model_parameter_best.pkl")
print("Best test acc: %.4f.\n" % max_test_acc)
if epoch%40==0:
optimizer.param_groups[0]['lr']=optimizer.param_groups[0]['lr']*0.1
# for name, parameters in model.named_parameters():#打印出每一层的参数的大小
# print(name, ':', parameters.size())
# print(parameters)
torch.save(model.state_dict(), "model_parameter_int8.pkl")
import torch
import torch.nn as nn
from int8_modules import *
class Net(nn.Module):
def __init__(self, num_classes=1000):
super(Net, self).__init__()
# self.ratioInfl=1
self.features = nn.Sequential(
Int8_Conv2d(1, 10, kernel_size=11, stride=1, padding=1),
# BinarizeConv2d(1, int(64*self.ratioInfl), kernel_size=5, stride=2, padding=1),
nn.BatchNorm2d(10, affine=False),
nn.ReLU(inplace=True),
Int8_MaxPool2d(kernel_size=3, stride=2),
Int8_Conv2d(10, 10, kernel_size=5, padding=2),
# BinarizeConv2d(int(64*self.ratioInfl), int(192*self.ratioInfl), kernel_size=3, padding=1),
nn.BatchNorm2d(10, affine=False),
nn.ReLU(inplace=True),
Int8_MaxPool2d(kernel_size=3, stride=2),
Int8_Conv2d(10, 32, kernel_size=3, padding=1),
nn.BatchNorm2d(32, affine=False),
nn.ReLU(inplace=True),
Int8_Conv2d(32, 32, kernel_size=3, padding=1),
nn.BatchNorm2d(32, affine=False),
nn.ReLU(inplace=True),
Int8_Conv2d(32, 32, kernel_size=3, padding=1),
nn.BatchNorm2d(32, affine=False),
nn.ReLU(inplace=True),
Int8_MaxPool2d(kernel_size=3, stride=2),
)
self.classifier_1 = nn.Sequential(
Int8_Linear(32 * 2 * 2, 1024),
# BinarizeLinear(256 * 1 * 1, 4096),
nn.BatchNorm1d(1024),
nn.ReLU(inplace=True),
)
self.classifier_2 = nn.Sequential(
Int8_Linear(256, 256),
nn.BatchNorm1d(256),
nn.ReLU(inplace=True),
Int8_Linear(256, 64),
nn.BatchNorm1d(64),
nn.ReLU(inplace=True),
)
self.classifier_3 = nn.Sequential(
Int8_Linear(256, num_classes),
nn.BatchNorm1d(num_classes),
nn.Softmax()
)
def forward(self, x):
# print(x.shape)
x = x.unsqueeze(1)
x = self.features(x)
# print(x.shape)
# exit()
# exit(0)
# x = x.view(-1, 256 * 1 * 1)
x = x.view(-1, 32 * 2 * 2)
# x = self.classifier(x)
x = self.classifier_1(x)
B, C = x.size()
x = x.view(4 * B, 256)
x = self.classifier_2(x)
x = x.view(B, 256)
x = self.classifier_3(x)
return x
import os
from sklearn.model_selection import train_test_split
# import cv2
import numpy as np
from tqdm import tqdm
from PIL import Image
path = ["../data-bin/TUGraz_bike/", "../data-bin/TUGraz_cars/", "../data-bin/TUGraz_person/"]
label = []
data = []
num = 0
img_size = 32
for k in range(3):
for file in os.listdir(path[k]):
num += 1
shape = (num, img_size, img_size)
with tqdm(total=num) as bar:
for k in range(3):
for file in os.listdir(path[k]):
img = Image.open(path[k]+file)
img = img.resize((img_size, img_size), Image.Resampling.LANCZOS)
# img = img.resize((28, 28), Image.ANTIALIAS)
arr = np.array(img)
# print(arr.size)
# exit()
res = list(arr.reshape(-1))
res = [-1 if i == 0 else 1 for i in res]
data.append(res)
label.append(k)
bar.update(1)
X = np.array(data).reshape(shape)
Y = np.array(label)
train_data, val_data = train_test_split([i for i in range(len(X))], test_size=0.2, random_state=2022)
np.save("train_x.npy", X[train_data])
np.save("train_y.npy", Y[train_data])
np.save("val_x.npy", X[val_data])
np.save("val_y.npy", Y[val_data])
\ No newline at end of file
# Classification-int8
- `python preprocess.py`
- `python main.py`
\ No newline at end of file
import torch
import torch.nn as nn
from qtorch.quant import fixed_point_quantize as fpq
def to_int8(tensor):
return fpq(tensor, 8, 4)
class Int8_Linear(nn.Linear):
def __init__(self, *kargs, **kwargs):
super(Int8_Linear, self).__init__(*kargs, **kwargs)
def forward(self, input):
input.data=to_int8(input.data)
if not hasattr(self.weight,'org'):
self.weight.org=self.weight.data.clone()
self.weight.data=to_int8(self.weight.org)
out = nn.functional.linear(input, self.weight)
if not self.bias is None:
self.bias.org=self.bias.data.clone()
out += self.bias.view(1, -1).expand_as(out)
return out
class Int8_Conv2d(nn.Conv2d):
def __init__(self, *kargs, **kwargs):
super(Int8_Conv2d, self).__init__(*kargs, **kwargs)
def forward(self, input):
input.data = to_int8(input.data)
if not hasattr(self.weight,'org'):
self.weight.org=self.weight.data.clone()
self.weight.data=to_int8(self.weight.org)
out = nn.functional.conv2d(input, self.weight, None, self.stride,
self.padding, self.dilation, self.groups)
if not self.bias is None:
self.bias.org=self.bias.data.clone()
out += self.bias.view(1, -1, 1, 1).expand_as(out)
return out
class Int8_MaxPool2d(nn.MaxPool2d):
def __init__(self, *kargs, **kwargs):
super(Int8_MaxPool2d, self).__init__(*kargs, **kwargs)
def forward(self, input):
input.data = to_int8(input.data)
out = nn.functional.max_pool2d(input, self.kernel_size, self.stride, self.padding, self.dilation)
return out
\ No newline at end of file
# from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import TensorDataset, DataLoader
import numpy as np
from net import Net
# Training settings
parser = argparse.ArgumentParser(description='Classification')
parser.add_argument('--batch-size', type=int, default=32, metavar='N',
help='input batch size for training (default: 32)')
parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
help='input batch size for testing (default: 1000)')
parser.add_argument('--epochs', type=int, default=100, metavar='N',
help='number of epochs to train (default: 100)')
parser.add_argument('--lr', type=float, default=0.001, metavar='LR',
help='learning rate (default: 0.001)')
parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
help='SGD momentum (default: 0.5)')
parser.add_argument('--no-cuda', action='store_true', default=False,
help='disables CUDA training')
parser.add_argument('--seed', type=int, default=1, metavar='S',
help='random seed (default: 1)')
parser.add_argument('--gpus', default=0,
help='gpus used for training - e.g 0,1,3')
parser.add_argument('--log-interval', type=int, default=10, metavar='N',
help='how many batches to wait before logging training status')
args = parser.parse_args()
# print(args)
# exit(0)
args.cuda = not args.no_cuda and torch.cuda.is_available()
torch.manual_seed(args.seed)
if args.cuda:
torch.cuda.manual_seed(args.seed)
val_x = np.load("val_x.npy")
val_y = np.load("val_y.npy")
train_x = np.load("train_x.npy")
train_y = np.load("train_y.npy")
batch_size = args.batch_size
train_loader = TensorDataset(torch.from_numpy(train_x), torch.from_numpy(train_y))
train_loader = DataLoader(train_loader, shuffle=True, batch_size=batch_size, drop_last=True)
test_loader = TensorDataset(torch.from_numpy(val_x), torch.from_numpy(val_y))
test_loader = DataLoader(test_loader, shuffle=False, batch_size=1000)
model = Net(10)
from torchinfo import summary
summary(model, input_size=(batch_size, 3, 32, 32))
if args.cuda:
torch.cuda.set_device(0)
model.cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=args.lr)
def train(epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data = torch.tensor(data, dtype=torch.float)
target = target.long()
if args.cuda:
data, target = data.cuda(), target.cuda()
# target = target.cuda()
data, target = Variable(data), Variable(target)
# print(data)
# print(data.shape)
# print(target.shape)
# exit(0)
optimizer.zero_grad()
output = model(data)
# print(output.shape)
loss = criterion(output, target)
if epoch%40==0:
optimizer.param_groups[0]['lr']=optimizer.param_groups[0]['lr']*0.1
optimizer.zero_grad()
loss.backward()
for p in list(model.parameters()):
if hasattr(p,'org'):
p.data.copy_(p.org)
optimizer.step()
for p in list(model.parameters()):
if hasattr(p,'org'):
p.org.copy_(p.data.clamp_(-1,1))
if batch_idx % args.log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
def accuracy(output, target, k=5):
"""Computes the precision@k for the specified values of k"""
batch_size = target.size(0)
_, pred = output.topk(k, 1, True, True)
pred = pred.t()
correct = pred.eq(target.view(1, -1).expand_as(pred))
return correct.view(k, batch_size).float().sum()
def test():
model.eval()
test_loss = 0
correct_1 = 0
correct_5 = 0
with torch.no_grad():
for data, target in test_loader:
data = torch.tensor(data, dtype=torch.float)
target = target.long()
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data), Variable(target)
output = model(data)
test_loss += criterion(output, target).item() # sum up batch loss
# pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
correct_1 += accuracy(output, target, k=1)
correct_5 += accuracy(output, target, k=5)
test_loss /= len(test_loader.dataset)
test_acc = 100. * correct_1 / len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy@1: {}/{} ({:.0f}%), Accuracy@5: {}/{} ({:.0f}%)\n'.format(
test_loss,
int(correct_1), len(test_loader.dataset), 100. * correct_1 / len(test_loader.dataset),
int(correct_5), len(test_loader.dataset), 100. * correct_5 / len(test_loader.dataset),
))
return test_acc
max_test_acc = 0
for epoch in range(1, args.epochs + 1):
train(epoch)
test_acc = test()
if test_acc > max_test_acc:
max_test_acc = test_acc
torch.save(model.state_dict(), "model_parameter_best.pkl")
print("Best test acc: %.4f.\n" % max_test_acc)
if epoch%40==0:
optimizer.param_groups[0]['lr']=optimizer.param_groups[0]['lr']*0.1
# for name, parameters in model.named_parameters():#打印出每一层的参数的大小
# print(name, ':', parameters.size())
# print(parameters)
torch.save(model.state_dict(), "model_parameter_int8.pkl")
import torch
import torch.nn as nn
from int8_modules import *
class Net(nn.Module):
def __init__(self, num_classes=1000):
super(Net, self).__init__()
# self.ratioInfl=1
self.features = nn.Sequential(
Int8_Conv2d(3, 10, kernel_size=11, stride=1, padding=1),
# BinarizeConv2d(1, int(64*self.ratioInfl), kernel_size=5, stride=2, padding=1),
nn.BatchNorm2d(10, affine=False),
nn.ReLU(inplace=True),
Int8_MaxPool2d(kernel_size=3, stride=2),
Int8_Conv2d(10, 10, kernel_size=5, padding=2),
# BinarizeConv2d(int(64*self.ratioInfl), int(192*self.ratioInfl), kernel_size=3, padding=1),
nn.BatchNorm2d(10, affine=False),
nn.ReLU(inplace=True),
Int8_MaxPool2d(kernel_size=3, stride=2),
Int8_Conv2d(10, 32, kernel_size=3, padding=1),
nn.BatchNorm2d(32, affine=False),
nn.ReLU(inplace=True),
Int8_Conv2d(32, 32, kernel_size=3, padding=1),
nn.BatchNorm2d(32, affine=False),
nn.ReLU(inplace=True),
Int8_Conv2d(32, 32, kernel_size=3, padding=1),
nn.BatchNorm2d(32, affine=False),
nn.ReLU(inplace=True),
Int8_MaxPool2d(kernel_size=3, stride=2),
)
self.classifier_1 = nn.Sequential(
Int8_Linear(32 * 2 * 2, 1024),
# BinarizeLinear(256 * 1 * 1, 4096),
nn.BatchNorm1d(1024),
nn.ReLU(inplace=True),
)
self.classifier_2 = nn.Sequential(
Int8_Linear(256, 256),
nn.BatchNorm1d(256),
nn.ReLU(inplace=True),
Int8_Linear(256, 64),
nn.BatchNorm1d(64),
nn.ReLU(inplace=True),
)
self.classifier_3 = nn.Sequential(
Int8_Linear(256, num_classes),
nn.BatchNorm1d(num_classes),
nn.Softmax()
)
def forward(self, x):
# print(x.shape)
x = self.features(x)
# print(x.shape)
# exit()
# exit(0)
# x = x.view(-1, 256 * 1 * 1)
x = x.view(-1, 32 * 2 * 2)
# x = self.classifier(x)
x = self.classifier_1(x)
B, C = x.size()
x = x.view(4 * B, 256)
x = self.classifier_2(x)
x = x.view(B, 256)
x = self.classifier_3(x)
return x
import os
from sklearn.model_selection import train_test_split
# import cv2
import numpy as np
from tqdm import tqdm
from PIL import Image
path = ["../data/TUGraz_bike/", "../data/TUGraz_cars/", "../data/TUGraz_person/"]
label = []
data = []
num = 0
img_size = 32
for k in range(3):
for file in os.listdir(path[k]):
num += 1
shape = (num, 3, img_size, img_size)
with tqdm(total=num) as bar:
for k in range(3):
for file in os.listdir(path[k]):
img = Image.open(path[k]+file)
img = img.resize((img_size, img_size), Image.Resampling.LANCZOS)
# img = img.resize((28, 28), Image.ANTIALIAS)
arr = np.array(img)
arr = np.transpose(arr, (2, 0, 1))
res = list(arr.reshape(-1))
data.append(res)
label.append(k)
bar.update(1)
X = np.array(data).reshape(shape)
Y = np.array(label)
train_data, val_data = train_test_split([i for i in range(len(X))], test_size=0.2, random_state=2022)
np.save("train_x.npy", X[train_data])
np.save("train_y.npy", Y[train_data])
np.save("val_x.npy", X[val_data])
np.save("val_y.npy", Y[val_data])
\ No newline at end of file
# Classification-int8
- `python preprocess.py`
- `python main.py`
\ No newline at end of file
# from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
from net import Net
from torch.utils.data import TensorDataset, DataLoader
import numpy as np
# Training settings
parser = argparse.ArgumentParser(description='Classification')
parser.add_argument('--batch-size', type=int, default=64, metavar='N',
help='input batch size for training (default: 32)')
parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
help='input batch size for testing (default: 1000)')
parser.add_argument('--epochs', type=int, default=25, metavar='N',
help='number of epochs to train (default: 100)')
parser.add_argument('--lr', type=float, default=0.001, metavar='LR',
help='learning rate (default: 0.001)')
parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
help='SGD momentum (default: 0.5)')
parser.add_argument('--no-cuda', action='store_true', default=False,
help='disables CUDA training')
parser.add_argument('--seed', type=int, default=1, metavar='S',
help='random seed (default: 1)')
parser.add_argument('--gpus', default=0,
help='gpus used for training - e.g 0,1,3')
parser.add_argument('--log-interval', type=int, default=10, metavar='N',
help='how many batches to wait before logging training status')
args = parser.parse_args()
# print(args)
# exit(0)
args.cuda = not args.no_cuda and torch.cuda.is_available()
torch.manual_seed(args.seed)
if args.cuda:
torch.cuda.manual_seed(args.seed)
val_x = np.load("val_x.npy")
val_y = np.load("val_y.npy")
train_x = np.load("train_x.npy")
train_y = np.load("train_y.npy")
batch_size = args.batch_size
train_loader = TensorDataset(torch.from_numpy(train_x), torch.from_numpy(train_y))
train_loader = DataLoader(train_loader, shuffle=True, batch_size=batch_size, drop_last=True)
test_loader = TensorDataset(torch.from_numpy(val_x), torch.from_numpy(val_y))
test_loader = DataLoader(test_loader, shuffle=False, batch_size=args.test_batch_size)
model = Net(2)
# print(model)
from torchinfo import summary
summary(model, input_size=(batch_size, 1, 32, 32))
# exit(0)
if args.cuda:
torch.cuda.set_device(0)
model.cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=args.lr)
def train(epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data = torch.tensor(data, dtype=torch.float)
target = target.long()
if args.cuda:
data, target = data.cuda(), target.cuda()
# target = target.cuda()
data, target = Variable(data), Variable(target)
# print(data)
# print(data.shape)
# print(target)
# exit(0)
optimizer.zero_grad()
output = model(data)
# print(output.shape)
loss = criterion(output, target)
if epoch%40==0:
optimizer.param_groups[0]['lr']=optimizer.param_groups[0]['lr']*0.1
optimizer.zero_grad()
loss.backward()
for p in list(model.parameters()):
if hasattr(p,'org'):
p.data.copy_(p.org)
optimizer.step()
for p in list(model.parameters()):
if hasattr(p,'org'):
p.org.copy_(p.data.clamp_(-1,1))
if batch_idx % args.log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
def test():
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
data = torch.tensor(data, dtype=torch.float)
target = target.long()
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data), Variable(target)
output = model(data)
test_loss += criterion(output, target).item() # sum up batch loss
pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
correct += pred.eq(target.data.view_as(pred)).cpu().sum()
test_loss /= len(test_loader.dataset)
test_acc = 100. * correct / len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
return test_acc
max_test_acc = 0
for epoch in range(1, args.epochs + 1):
train(epoch)
test_acc = test()
if test_acc > max_test_acc:
max_test_acc = test_acc
torch.save(model.state_dict(), "model_parameter_best_int8.pkl")
print("Best test acc: %.4f.\n" % max_test_acc)
if epoch%40==0:
optimizer.param_groups[0]['lr']=optimizer.param_groups[0]['lr']*0.1
from xml.dom import minidom
import os
import numpy as np
import cv2
from tqdm import tqdm
import random
import torch
from torchvision import transforms
from net import Net
import numpy as np
random.seed(2022)
path = ["data/Annotations/", "data/JPEGImages/"]
img_size = 32
# encoding: utf-8
def IOU(rec1,rec2):
left_column_max = max(rec1[0],rec2[0])
right_column_min = min(rec1[2],rec2[2])
up_row_max = max(rec1[1],rec2[1])
down_row_min = min(rec1[3],rec2[3])
#两矩形无相交区域的情况
if left_column_max>=right_column_min or down_row_min<=up_row_max:
return 0
# 两矩形有相交区域的情况
else:
S1 = (rec1[2]-rec1[0])*(rec1[3]-rec1[1])
S2 = (rec2[2]-rec2[0])*(rec2[3]-rec2[1])
S_cross = (down_row_min-up_row_max)*(right_column_min-left_column_max)
return S_cross/(S1+S2-S_cross)
model = Net(2)
model.load_state_dict(torch.load("model_parameter.pkl"))
model.eval()
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
transform_gray=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5, ), (0.5, ))
])
files = os.listdir(path[0])
logThreshold = np.log(0.95)
cnt = 0
for i in range(3300, 5000):
f = files[i]
doc = minidom.parse(path[0]+f)
fileName = doc.getElementsByTagName("filename")[0].firstChild.data
# img = Image.open(path[1]+fileName)
im = cv2.imread(path[1]+fileName)
objects = doc.getElementsByTagName("object")
detectionList = []
for o in objects:
if o.getElementsByTagName("name")[0].firstChild.data == "car":
bndbox = o.getElementsByTagName("bndbox")[0]
xmin = int(bndbox.getElementsByTagName("xmin")[0].firstChild.data)
xmax = int(bndbox.getElementsByTagName("xmax")[0].firstChild.data)
ymin = int(bndbox.getElementsByTagName("ymin")[0].firstChild.data)
ymax = int(bndbox.getElementsByTagName("ymax")[0].firstChild.data)
detectionList.append((xmin, ymin, xmax, ymax))
if len(detectionList):
ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()
ss.setBaseImage(im)
method = 'f' # f=fast, q=quality
if method == 'f': # fast but low recall
ss.switchToSelectiveSearchFast()
elif method == 'q': # high recall but slow
ss.switchToSelectiveSearchQuality()
rects = ss.process() # f:453, q:1354
rects = [(i[0], i[1], i[0]+i[2], i[1]+i[3]) for i in rects]
X = []
for rect in rects:
# res = cv2.resize(im[rect[1]:rect[3], :][:, rect[0]:rect[2]], (100, 100)).reshape(-1)
res = cv2.resize(im[rect[1]:rect[3], :][:, rect[0]:rect[2]], (img_size, img_size))
res = cv2.cvtColor(res, cv2.COLOR_BGR2GRAY)
res = (transform_gray(res).reshape(-1) > 0).tolist()
# X = [res for i in range(1)]
X.append(res)
X = np.array(X, dtype=np.int8).reshape(-1, 1, img_size, img_size)
X = torch.tensor(X, dtype=torch.float)
Y = model(X)
# print(Y[:, 0])
# print(Y[:, 1])
predictList = []
for j in range(len(rects)):
# if Y[j][1] > logThreshold:
if Y[j][1] > 0.8:
predictList.append((rects[j], float(Y[j][1])))
predictList.sort(key=lambda item: item[1], reverse=True)
del X, Y, rects
M = []
H = predictList
while len(H):
rect = H[0][0]
Hnew = []
for h in H:
if IOU(rect, h[0]) <= 0.05:
Hnew.append(h)
H = Hnew
M.append(rect)
for rect in M:
cv2.rectangle(im, (rect[0], rect[1]), (rect[2], rect[3]), (0, 0, 255), 1, cv2.LINE_AA)
if len(M):
cv2.imwrite('pic'+str(cnt)+'.png', im)
cnt = cnt + 1
if cnt > 20:
exit(0)
\ No newline at end of file
import torch
import torch.nn as nn
class Net(nn.Module):
def __init__(self, num_classes=1000):
super(Net, self).__init__()
# self.ratioInfl=1
self.features = nn.Sequential(
nn.Conv2d(1, 10, kernel_size=11, stride=1, padding=1),
# BinarizeConv2d(1, int(64*self.ratioInfl), kernel_size=5, stride=2, padding=1),
nn.BatchNorm2d(10, affine=False),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(10, 10, kernel_size=5, padding=2),
# BinarizeConv2d(int(64*self.ratioInfl), int(192*self.ratioInfl), kernel_size=3, padding=1),
nn.BatchNorm2d(10, affine=False),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(10, 32, kernel_size=3, padding=1),
nn.BatchNorm2d(32, affine=False),
nn.ReLU(inplace=True),
nn.Conv2d(32, 32, kernel_size=3, padding=1),
nn.BatchNorm2d(32, affine=False),
nn.ReLU(inplace=True),
nn.Conv2d(32, 32, kernel_size=3, padding=1),
nn.BatchNorm2d(32, affine=False),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
)
self.classifier_1 = nn.Sequential(
nn.Linear(32 * 2 * 2, 1024),
# BinarizeLinear(256 * 1 * 1, 4096),
nn.BatchNorm1d(1024),
nn.ReLU(inplace=True),
)
self.classifier_2 = nn.Sequential(
nn.Linear(256, 256),
nn.BatchNorm1d(256),
nn.ReLU(inplace=True),
nn.Linear(256, 64),
nn.BatchNorm1d(64),
nn.ReLU(inplace=True),
)
self.classifier_3 = nn.Sequential(
nn.Linear(256, num_classes),
nn.BatchNorm1d(num_classes),
nn.Softmax()
)
def forward(self, x):
# print(x.shape)
x = x.reshape(-1, 1, 32, 32)
x = self.features(x)
# print(x.shape)
# exit()
# exit(0)
# x = x.view(-1, 256 * 1 * 1)
x = x.view(-1, 32 * 2 * 2)
# x = self.classifier(x)
x = self.classifier_1(x)
B, C = x.size()
x = x.view(4 * B, 256)
x = self.classifier_2(x)
x = x.view(B, 256)
x = self.classifier_3(x)
return x
\ No newline at end of file
from xml.dom import minidom
import os
import numpy as np
import cv2
from tqdm import tqdm
import random
import torchvision.transforms as transforms
import psutil, sys
random.seed(2022)
path = ["../data/Annotations/", "../data/JPEGImages/"]
img_size = 32
# encoding: utf-8
def IOU(rec1,rec2):
left_column_max = max(rec1[0],rec2[0])
right_column_min = min(rec1[2],rec2[2])
up_row_max = max(rec1[1],rec2[1])
down_row_min = min(rec1[3],rec2[3])
#两矩形无相交区域的情况
if left_column_max>=right_column_min or down_row_min<=up_row_max:
return 0
# 两矩形有相交区域的情况
else:
S1 = (rec1[2]-rec1[0])*(rec1[3]-rec1[1])
S2 = (rec2[2]-rec2[0])*(rec2[3]-rec2[1])
S_cross = (down_row_min-up_row_max)*(right_column_min-left_column_max)
return S_cross/(S1+S2-S_cross)
X = []
Y = []
cnt = 0
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
transform_gray=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))
])
with tqdm(total=len(os.listdir(path[0]))) as bar:
for file in os.listdir(path[0]):
doc = minidom.parse(path[0]+file)
fileName = doc.getElementsByTagName("filename")[0].firstChild.data
# img = Image.open(path[1]+fileName)
im = cv2.imread(path[1]+fileName)
objects = doc.getElementsByTagName("object")
detectionList = []
for o in objects:
if o.getElementsByTagName("name")[0].firstChild.data == "car":
bndbox = o.getElementsByTagName("bndbox")[0]
xmin = int(bndbox.getElementsByTagName("xmin")[0].firstChild.data)
xmax = int(bndbox.getElementsByTagName("xmax")[0].firstChild.data)
ymin = int(bndbox.getElementsByTagName("ymin")[0].firstChild.data)
ymax = int(bndbox.getElementsByTagName("ymax")[0].firstChild.data)
detectionList.append((xmin, ymin, xmax, ymax))
if len(detectionList):
ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()
ss.setBaseImage(im)
method = 'f' # f=fast, q=quality
if method == 'f': # fast but low recall
ss.switchToSelectiveSearchFast()
elif method == 'q': # high recall but slow
ss.switchToSelectiveSearchQuality()
rects = ss.process() # f:453, q:1354
rects = [(i[0], i[1], i[0]+i[2], i[1]+i[3]) for i in rects]
for rect in rects:
positive = 0
# negative = 1
for i in detectionList:
iou = IOU(rect, i)
if iou > 0.5:
positive = positive + 1
# if iou:
# negative = 0
if positive > 1 or (not positive and random.random() > 0.1):
continue
# res = cv2.resize(im[rect[1]:rect[3], :][:, rect[0]:rect[2]], (100, 100)).reshape(-1)
res = cv2.resize(im[rect[1]:rect[3], :][:, rect[0]:rect[2]], (img_size, img_size))
res = cv2.cvtColor(res, cv2.COLOR_BGR2GRAY)
res = (transform_gray(res).reshape(-1) > 0).tolist()
# res = (transform(res).reshape(-1) > 0).tolist()
# print(len(X), sys.getsizeof(X)/(1024*1024))
# res = np.array(res)
# print(len(res), res)
# exit(0)
X.append(res)
# del res
# memory = psutil.virtual_memory()
# print(float(memory.total)/(1024*1024*1024), float(memory.used)/(1024*1024*1024))
if positive:
Y.append(1)
else:
Y.append(0)
bar.update(1)
if len(X) > 5000:
X = np.array(X, dtype=np.int8).reshape((-1, 1, img_size, img_size))
Y = np.array(Y)
np.save("X"+str(cnt)+".npy", X)
np.save("Y"+str(cnt)+".npy", Y)
cnt = cnt + 1
if cnt == 5:
break
del X,Y,rects
X, Y = [], []
# X = np.array(X).reshape((-1, 100, 100, 3))
# Y = np.array(Y)
# train_data, val_data = train_test_split([i for i in range(len(X))], test_size=0.2, random_state=2022)
# np.save("train_x.npy", X[train_data])
# np.save("train_y.npy", Y[train_data])
# np.save("val_x.npy", X[val_data])
# np.save("val_y.npy", Y[val_data])
\ No newline at end of file
import numpy as np
from sklearn.model_selection import train_test_split
X = np.load("X0.npy")
Y = np.load("Y0.npy")
for i in range(1, 5):
tmpX = np.load("X"+str(i)+".npy")
tmpY = np.load("Y"+str(i)+".npy")
X = np.concatenate([X, tmpX],axis=0)
Y = np.concatenate([Y, tmpY],axis=0)
# print(X.size, X.itemsize, X.size * X.itemsize)
X[X == 0] = -1
# np.save("X.npy", X)
# np.save("Y.npy", Y)
train_data, val_data = train_test_split([i for i in range(len(X))], test_size=0.2, random_state=2022)
np.save("train_x.npy", X[train_data])
np.save("train_y.npy", Y[train_data])
np.save("val_x.npy", X[val_data])
np.save("val_y.npy", Y[val_data])
\ No newline at end of file
# Detection-fp32
- `python preprocess.py`
- `python main.py`
\ No newline at end of file
# from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
from net import Net
from torch.utils.data import TensorDataset, DataLoader
import numpy as np
# Training settings
parser = argparse.ArgumentParser(description='Classification')
parser.add_argument('--batch-size', type=int, default=64, metavar='N',
help='input batch size for training (default: 32)')
parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
help='input batch size for testing (default: 1000)')
parser.add_argument('--epochs', type=int, default=25, metavar='N',
help='number of epochs to train (default: 100)')
parser.add_argument('--lr', type=float, default=0.001, metavar='LR',
help='learning rate (default: 0.001)')
parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
help='SGD momentum (default: 0.5)')
parser.add_argument('--no-cuda', action='store_true', default=False,
help='disables CUDA training')
parser.add_argument('--seed', type=int, default=1, metavar='S',
help='random seed (default: 1)')
parser.add_argument('--gpus', default=0,
help='gpus used for training - e.g 0,1,3')
parser.add_argument('--log-interval', type=int, default=10, metavar='N',
help='how many batches to wait before logging training status')
args = parser.parse_args()
# print(args)
# exit(0)
args.cuda = not args.no_cuda and torch.cuda.is_available()
torch.manual_seed(args.seed)
if args.cuda:
torch.cuda.manual_seed(args.seed)
val_x = np.load("val_x.npy")
val_y = np.load("val_y.npy")
train_x = np.load("train_x.npy")
train_y = np.load("train_y.npy")
batch_size = args.batch_size
train_loader = TensorDataset(torch.from_numpy(train_x), torch.from_numpy(train_y))
train_loader = DataLoader(train_loader, shuffle=True, batch_size=batch_size, drop_last=True)
test_loader = TensorDataset(torch.from_numpy(val_x), torch.from_numpy(val_y))
test_loader = DataLoader(test_loader, shuffle=False, batch_size=args.test_batch_size)
model = Net(2)
# print(model)
from torchinfo import summary
summary(model, input_size=(batch_size, 3, 32, 32))
# exit(0)
if args.cuda:
torch.cuda.set_device(0)
model.cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=args.lr)
def train(epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data = torch.tensor(data, dtype=torch.float)
target = target.long()
if args.cuda:
data, target = data.cuda(), target.cuda()
# target = target.cuda()
data, target = Variable(data), Variable(target)
# print(data)
# print(data.shape)
# print(target)
# exit(0)
optimizer.zero_grad()
output = model(data)
# print(output.shape)
loss = criterion(output, target)
if epoch%40==0:
optimizer.param_groups[0]['lr']=optimizer.param_groups[0]['lr']*0.1
optimizer.zero_grad()
loss.backward()
for p in list(model.parameters()):
if hasattr(p,'org'):
p.data.copy_(p.org)
optimizer.step()
for p in list(model.parameters()):
if hasattr(p,'org'):
p.org.copy_(p.data.clamp_(-1,1))
if batch_idx % args.log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
def test():
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
data = torch.tensor(data, dtype=torch.float)
target = target.long()
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data), Variable(target)
output = model(data)
test_loss += criterion(output, target).item() # sum up batch loss
pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
correct += pred.eq(target.data.view_as(pred)).cpu().sum()
test_loss /= len(test_loader.dataset)
test_acc = 100. * correct / len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
return test_acc
max_test_acc = 0
for epoch in range(1, args.epochs + 1):
train(epoch)
test_acc = test()
if test_acc > max_test_acc:
max_test_acc = test_acc
torch.save(model.state_dict(), "model_parameter_best_fp32.pkl")
print("Best test acc: %.4f.\n" % max_test_acc)
if epoch%40==0:
optimizer.param_groups[0]['lr']=optimizer.param_groups[0]['lr']*0.1
from xml.dom import minidom
import os
import numpy as np
import cv2
from tqdm import tqdm
import random
import torch
from torchvision import transforms
from net import Net
import numpy as np
random.seed(2022)
path = ["data/Annotations/", "data/JPEGImages/"]
img_size = 32
# encoding: utf-8
def IOU(rec1,rec2):
left_column_max = max(rec1[0],rec2[0])
right_column_min = min(rec1[2],rec2[2])
up_row_max = max(rec1[1],rec2[1])
down_row_min = min(rec1[3],rec2[3])
#两矩形无相交区域的情况
if left_column_max>=right_column_min or down_row_min<=up_row_max:
return 0
# 两矩形有相交区域的情况
else:
S1 = (rec1[2]-rec1[0])*(rec1[3]-rec1[1])
S2 = (rec2[2]-rec2[0])*(rec2[3]-rec2[1])
S_cross = (down_row_min-up_row_max)*(right_column_min-left_column_max)
return S_cross/(S1+S2-S_cross)
model = Net(2)
model.load_state_dict(torch.load("model_parameter.pkl"))
model.eval()
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
transform_gray=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5, ), (0.5, ))
])
files = os.listdir(path[0])
logThreshold = np.log(0.95)
cnt = 0
for i in range(3300, 5000):
f = files[i]
doc = minidom.parse(path[0]+f)
fileName = doc.getElementsByTagName("filename")[0].firstChild.data
# img = Image.open(path[1]+fileName)
im = cv2.imread(path[1]+fileName)
objects = doc.getElementsByTagName("object")
detectionList = []
for o in objects:
if o.getElementsByTagName("name")[0].firstChild.data == "car":
bndbox = o.getElementsByTagName("bndbox")[0]
xmin = int(bndbox.getElementsByTagName("xmin")[0].firstChild.data)
xmax = int(bndbox.getElementsByTagName("xmax")[0].firstChild.data)
ymin = int(bndbox.getElementsByTagName("ymin")[0].firstChild.data)
ymax = int(bndbox.getElementsByTagName("ymax")[0].firstChild.data)
detectionList.append((xmin, ymin, xmax, ymax))
if len(detectionList):
ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()
ss.setBaseImage(im)
method = 'f' # f=fast, q=quality
if method == 'f': # fast but low recall
ss.switchToSelectiveSearchFast()
elif method == 'q': # high recall but slow
ss.switchToSelectiveSearchQuality()
rects = ss.process() # f:453, q:1354
rects = [(i[0], i[1], i[0]+i[2], i[1]+i[3]) for i in rects]
X = []
for rect in rects:
# res = cv2.resize(im[rect[1]:rect[3], :][:, rect[0]:rect[2]], (100, 100)).reshape(-1)
res = cv2.resize(im[rect[1]:rect[3], :][:, rect[0]:rect[2]], (img_size, img_size))
res = cv2.cvtColor(res, cv2.COLOR_BGR2GRAY)
res = (transform_gray(res).reshape(-1) > 0).tolist()
# X = [res for i in range(1)]
X.append(res)
X = np.array(X, dtype=np.int8).reshape(-1, 1, img_size, img_size)
X = torch.tensor(X, dtype=torch.float)
Y = model(X)
# print(Y[:, 0])
# print(Y[:, 1])
predictList = []
for j in range(len(rects)):
# if Y[j][1] > logThreshold:
if Y[j][1] > 0.8:
predictList.append((rects[j], float(Y[j][1])))
predictList.sort(key=lambda item: item[1], reverse=True)
del X, Y, rects
M = []
H = predictList
while len(H):
rect = H[0][0]
Hnew = []
for h in H:
if IOU(rect, h[0]) <= 0.05:
Hnew.append(h)
H = Hnew
M.append(rect)
for rect in M:
cv2.rectangle(im, (rect[0], rect[1]), (rect[2], rect[3]), (0, 0, 255), 1, cv2.LINE_AA)
if len(M):
cv2.imwrite('pic'+str(cnt)+'.png', im)
cnt = cnt + 1
if cnt > 20:
exit(0)
\ No newline at end of file
import torch
import torch.nn as nn
class Net(nn.Module):
def __init__(self, num_classes=1000):
super(Net, self).__init__()
# self.ratioInfl=1
self.features = nn.Sequential(
nn.Conv2d(3, 10, kernel_size=11, stride=1, padding=1),
# BinarizeConv2d(1, int(64*self.ratioInfl), kernel_size=5, stride=2, padding=1),
nn.BatchNorm2d(10, affine=False),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(10, 10, kernel_size=5, padding=2),
# BinarizeConv2d(int(64*self.ratioInfl), int(192*self.ratioInfl), kernel_size=3, padding=1),
nn.BatchNorm2d(10, affine=False),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(10, 32, kernel_size=3, padding=1),
nn.BatchNorm2d(32, affine=False),
nn.ReLU(inplace=True),
nn.Conv2d(32, 32, kernel_size=3, padding=1),
nn.BatchNorm2d(32, affine=False),
nn.ReLU(inplace=True),
nn.Conv2d(32, 32, kernel_size=3, padding=1),
nn.BatchNorm2d(32, affine=False),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
)
self.classifier_1 = nn.Sequential(
nn.Linear(32 * 2 * 2, 1024),
# BinarizeLinear(256 * 1 * 1, 4096),
nn.BatchNorm1d(1024),
nn.ReLU(inplace=True),
)
self.classifier_2 = nn.Sequential(
nn.Linear(256, 256),
nn.BatchNorm1d(256),
nn.ReLU(inplace=True),
nn.Linear(256, 64),
nn.BatchNorm1d(64),
nn.ReLU(inplace=True),
)
self.classifier_3 = nn.Sequential(
nn.Linear(256, num_classes),
nn.BatchNorm1d(num_classes),
nn.Softmax()
)
def forward(self, x):
# print(x.shape)
x = x.reshape(-1, 3, 32, 32)
x = self.features(x)
# print(x.shape)
# exit()
# exit(0)
# x = x.view(-1, 256 * 1 * 1)
x = x.view(-1, 32 * 2 * 2)
# x = self.classifier(x)
x = self.classifier_1(x)
B, C = x.size()
x = x.view(4 * B, 256)
x = self.classifier_2(x)
x = x.view(B, 256)
x = self.classifier_3(x)
return x
\ No newline at end of file
from xml.dom import minidom
import os
import numpy as np
import cv2
from tqdm import tqdm
import random
import torchvision.transforms as transforms
import psutil, sys
random.seed(2022)
path = ["../data/Annotations/", "../data/JPEGImages/"]
img_size = 32
# encoding: utf-8
def IOU(rec1,rec2):
left_column_max = max(rec1[0],rec2[0])
right_column_min = min(rec1[2],rec2[2])
up_row_max = max(rec1[1],rec2[1])
down_row_min = min(rec1[3],rec2[3])
#两矩形无相交区域的情况
if left_column_max>=right_column_min or down_row_min<=up_row_max:
return 0
# 两矩形有相交区域的情况
else:
S1 = (rec1[2]-rec1[0])*(rec1[3]-rec1[1])
S2 = (rec2[2]-rec2[0])*(rec2[3]-rec2[1])
S_cross = (down_row_min-up_row_max)*(right_column_min-left_column_max)
return S_cross/(S1+S2-S_cross)
X = []
Y = []
cnt = 0
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
transform_gray=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))
])
with tqdm(total=len(os.listdir(path[0]))) as bar:
for file in os.listdir(path[0]):
doc = minidom.parse(path[0]+file)
fileName = doc.getElementsByTagName("filename")[0].firstChild.data
# img = Image.open(path[1]+fileName)
im = cv2.imread(path[1]+fileName)
objects = doc.getElementsByTagName("object")
detectionList = []
for o in objects:
if o.getElementsByTagName("name")[0].firstChild.data == "car":
bndbox = o.getElementsByTagName("bndbox")[0]
xmin = int(bndbox.getElementsByTagName("xmin")[0].firstChild.data)
xmax = int(bndbox.getElementsByTagName("xmax")[0].firstChild.data)
ymin = int(bndbox.getElementsByTagName("ymin")[0].firstChild.data)
ymax = int(bndbox.getElementsByTagName("ymax")[0].firstChild.data)
detectionList.append((xmin, ymin, xmax, ymax))
if len(detectionList):
ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()
ss.setBaseImage(im)
method = 'f' # f=fast, q=quality
if method == 'f': # fast but low recall
ss.switchToSelectiveSearchFast()
elif method == 'q': # high recall but slow
ss.switchToSelectiveSearchQuality()
rects = ss.process() # f:453, q:1354
rects = [(i[0], i[1], i[0]+i[2], i[1]+i[3]) for i in rects]
for rect in rects:
positive = 0
# negative = 1
for i in detectionList:
iou = IOU(rect, i)
if iou > 0.5:
positive = positive + 1
# if iou:
# negative = 0
if positive > 1 or (not positive and random.random() > 0.1):
continue
# res = cv2.resize(im[rect[1]:rect[3], :][:, rect[0]:rect[2]], (100, 100)).reshape(-1)
res = cv2.resize(im[rect[1]:rect[3], :][:, rect[0]:rect[2]], (img_size, img_size))
# res = cv2.cvtColor(res, cv2.COLOR_BGR2GRAY)
# res = (transform_gray(res).reshape(-1) > 0).tolist()
res = (transform(res).reshape(-1) > 0).tolist()
# print(len(X), sys.getsizeof(X)/(1024*1024))
# res = np.array(res)
# print(len(res), res)
# exit(0)
X.append(res)
# del res
# memory = psutil.virtual_memory()
# print(float(memory.total)/(1024*1024*1024), float(memory.used)/(1024*1024*1024))
if positive:
Y.append(1)
else:
Y.append(0)
bar.update(1)
if len(X) > 5000:
X = np.array(X, dtype=np.int8).reshape((-1, 3, img_size, img_size))
Y = np.array(Y)
np.save("X"+str(cnt)+".npy", X)
np.save("Y"+str(cnt)+".npy", Y)
cnt = cnt + 1
if cnt == 5:
break
del X,Y,rects
X, Y = [], []
# X = np.array(X).reshape((-1, 100, 100, 3))
# Y = np.array(Y)
# train_data, val_data = train_test_split([i for i in range(len(X))], test_size=0.2, random_state=2022)
# np.save("train_x.npy", X[train_data])
# np.save("train_y.npy", Y[train_data])
# np.save("val_x.npy", X[val_data])
# np.save("val_y.npy", Y[val_data])
\ No newline at end of file
import numpy as np
from sklearn.model_selection import train_test_split
X = np.load("X0.npy")
Y = np.load("Y0.npy")
for i in range(1, 5):
tmpX = np.load("X"+str(i)+".npy")
tmpY = np.load("Y"+str(i)+".npy")
X = np.concatenate([X, tmpX],axis=0)
Y = np.concatenate([Y, tmpY],axis=0)
# print(X.size, X.itemsize, X.size * X.itemsize)
# np.save("X.npy", X)
# np.save("Y.npy", Y)
train_data, val_data = train_test_split([i for i in range(len(X))], test_size=0.2, random_state=2022)
np.save("train_x.npy", X[train_data])
np.save("train_y.npy", Y[train_data])
np.save("val_x.npy", X[val_data])
np.save("val_y.npy", Y[val_data])
\ No newline at end of file
# Detection-fp32
- `python preprocess.py`
- `python main.py`
\ No newline at end of file
# from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
from net import Net
from torch.utils.data import TensorDataset, DataLoader
import numpy as np
# Training settings
parser = argparse.ArgumentParser(description='Classification')
parser.add_argument('--batch-size', type=int, default=64, metavar='N',
help='input batch size for training (default: 32)')
parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
help='input batch size for testing (default: 1000)')
parser.add_argument('--epochs', type=int, default=25, metavar='N',
help='number of epochs to train (default: 100)')
parser.add_argument('--lr', type=float, default=0.001, metavar='LR',
help='learning rate (default: 0.001)')
parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
help='SGD momentum (default: 0.5)')
parser.add_argument('--no-cuda', action='store_true', default=False,
help='disables CUDA training')
parser.add_argument('--seed', type=int, default=1, metavar='S',
help='random seed (default: 1)')
parser.add_argument('--gpus', default=0,
help='gpus used for training - e.g 0,1,3')
parser.add_argument('--log-interval', type=int, default=10, metavar='N',
help='how many batches to wait before logging training status')
args = parser.parse_args()
# print(args)
# exit(0)
args.cuda = not args.no_cuda and torch.cuda.is_available()
torch.manual_seed(args.seed)
if args.cuda:
torch.cuda.manual_seed(args.seed)
val_x = np.load("val_x.npy")
val_y = np.load("val_y.npy")
train_x = np.load("train_x.npy")
train_y = np.load("train_y.npy")
batch_size = args.batch_size
train_loader = TensorDataset(torch.from_numpy(train_x), torch.from_numpy(train_y))
train_loader = DataLoader(train_loader, shuffle=True, batch_size=batch_size, drop_last=True)
test_loader = TensorDataset(torch.from_numpy(val_x), torch.from_numpy(val_y))
test_loader = DataLoader(test_loader, shuffle=False, batch_size=args.test_batch_size)
model = Net(2)
# print(model)
from torchinfo import summary
summary(model, input_size=(batch_size, 1, 32, 32))
# exit(0)
if args.cuda:
torch.cuda.set_device(0)
model.cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=args.lr)
def train(epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data = torch.tensor(data, dtype=torch.float)
target = target.long()
if args.cuda:
data, target = data.cuda(), target.cuda()
# target = target.cuda()
data, target = Variable(data), Variable(target)
# print(data)
# print(data.shape)
# print(target)
# exit(0)
optimizer.zero_grad()
output = model(data)
# print(output.shape)
loss = criterion(output, target)
if epoch%40==0:
optimizer.param_groups[0]['lr']=optimizer.param_groups[0]['lr']*0.1
optimizer.zero_grad()
loss.backward()
for p in list(model.parameters()):
if hasattr(p,'org'):
p.data.copy_(p.org)
optimizer.step()
for p in list(model.parameters()):
if hasattr(p,'org'):
p.org.copy_(p.data.clamp_(-1,1))
if batch_idx % args.log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
def test():
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
data = torch.tensor(data, dtype=torch.float)
target = target.long()
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data), Variable(target)
output = model(data)
test_loss += criterion(output, target).item() # sum up batch loss
pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
correct += pred.eq(target.data.view_as(pred)).cpu().sum()
test_loss /= len(test_loader.dataset)
test_acc = 100. * correct / len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
return test_acc
max_test_acc = 0
for epoch in range(1, args.epochs + 1):
train(epoch)
test_acc = test()
if test_acc > max_test_acc:
max_test_acc = test_acc
torch.save(model.state_dict(), "model_parameter_best_fp32.pkl")
print("Best test acc: %.4f.\n" % max_test_acc)
if epoch%40==0:
optimizer.param_groups[0]['lr']=optimizer.param_groups[0]['lr']*0.1
import torch
import torch.nn as nn
from qtorch.quant import fixed_point_quantize as fpq
def to_int8(tensor):
return fpq(tensor, 8, 4)
class Int8_Linear(nn.Linear):
def __init__(self, *kargs, **kwargs):
super(Int8_Linear, self).__init__(*kargs, **kwargs)
def forward(self, input):
input.data=to_int8(input.data)
if not hasattr(self.weight,'org'):
self.weight.org=self.weight.data.clone()
self.weight.data=to_int8(self.weight.org)
out = nn.functional.linear(input, self.weight)
if not self.bias is None:
self.bias.org=self.bias.data.clone()
out += self.bias.view(1, -1).expand_as(out)
return out
class Int8_Conv2d(nn.Conv2d):
def __init__(self, *kargs, **kwargs):
super(Int8_Conv2d, self).__init__(*kargs, **kwargs)
def forward(self, input):
input.data = to_int8(input.data)
if not hasattr(self.weight,'org'):
self.weight.org=self.weight.data.clone()
self.weight.data=to_int8(self.weight.org)
out = nn.functional.conv2d(input, self.weight, None, self.stride,
self.padding, self.dilation, self.groups)
if not self.bias is None:
self.bias.org=self.bias.data.clone()
out += self.bias.view(1, -1, 1, 1).expand_as(out)
return out
class Int8_MaxPool2d(nn.MaxPool2d):
def __init__(self, *kargs, **kwargs):
super(Int8_MaxPool2d, self).__init__(*kargs, **kwargs)
def forward(self, input):
input.data = to_int8(input.data)
out = nn.functional.max_pool2d(input, self.kernel_size, self.stride, self.padding, self.dilation)
return out
\ No newline at end of file
from xml.dom import minidom
import os
import numpy as np
import cv2
from tqdm import tqdm
import random
import torch
from torchvision import transforms
from net import Net
import numpy as np
random.seed(2022)
path = ["data/Annotations/", "data/JPEGImages/"]
img_size = 32
# encoding: utf-8
def IOU(rec1,rec2):
left_column_max = max(rec1[0],rec2[0])
right_column_min = min(rec1[2],rec2[2])
up_row_max = max(rec1[1],rec2[1])
down_row_min = min(rec1[3],rec2[3])
#两矩形无相交区域的情况
if left_column_max>=right_column_min or down_row_min<=up_row_max:
return 0
# 两矩形有相交区域的情况
else:
S1 = (rec1[2]-rec1[0])*(rec1[3]-rec1[1])
S2 = (rec2[2]-rec2[0])*(rec2[3]-rec2[1])
S_cross = (down_row_min-up_row_max)*(right_column_min-left_column_max)
return S_cross/(S1+S2-S_cross)
model = Net(2)
model.load_state_dict(torch.load("model_parameter.pkl"))
model.eval()
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
transform_gray=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5, ), (0.5, ))
])
files = os.listdir(path[0])
logThreshold = np.log(0.95)
cnt = 0
for i in range(3300, 5000):
f = files[i]
doc = minidom.parse(path[0]+f)
fileName = doc.getElementsByTagName("filename")[0].firstChild.data
# img = Image.open(path[1]+fileName)
im = cv2.imread(path[1]+fileName)
objects = doc.getElementsByTagName("object")
detectionList = []
for o in objects:
if o.getElementsByTagName("name")[0].firstChild.data == "car":
bndbox = o.getElementsByTagName("bndbox")[0]
xmin = int(bndbox.getElementsByTagName("xmin")[0].firstChild.data)
xmax = int(bndbox.getElementsByTagName("xmax")[0].firstChild.data)
ymin = int(bndbox.getElementsByTagName("ymin")[0].firstChild.data)
ymax = int(bndbox.getElementsByTagName("ymax")[0].firstChild.data)
detectionList.append((xmin, ymin, xmax, ymax))
if len(detectionList):
ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()
ss.setBaseImage(im)
method = 'f' # f=fast, q=quality
if method == 'f': # fast but low recall
ss.switchToSelectiveSearchFast()
elif method == 'q': # high recall but slow
ss.switchToSelectiveSearchQuality()
rects = ss.process() # f:453, q:1354
rects = [(i[0], i[1], i[0]+i[2], i[1]+i[3]) for i in rects]
X = []
for rect in rects:
# res = cv2.resize(im[rect[1]:rect[3], :][:, rect[0]:rect[2]], (100, 100)).reshape(-1)
res = cv2.resize(im[rect[1]:rect[3], :][:, rect[0]:rect[2]], (img_size, img_size))
res = cv2.cvtColor(res, cv2.COLOR_BGR2GRAY)
res = (transform_gray(res).reshape(-1) > 0).tolist()
# X = [res for i in range(1)]
X.append(res)
X = np.array(X, dtype=np.int8).reshape(-1, 1, img_size, img_size)
X = torch.tensor(X, dtype=torch.float)
Y = model(X)
# print(Y[:, 0])
# print(Y[:, 1])
predictList = []
for j in range(len(rects)):
# if Y[j][1] > logThreshold:
if Y[j][1] > 0.8:
predictList.append((rects[j], float(Y[j][1])))
predictList.sort(key=lambda item: item[1], reverse=True)
del X, Y, rects
M = []
H = predictList
while len(H):
rect = H[0][0]
Hnew = []
for h in H:
if IOU(rect, h[0]) <= 0.05:
Hnew.append(h)
H = Hnew
M.append(rect)
for rect in M:
cv2.rectangle(im, (rect[0], rect[1]), (rect[2], rect[3]), (0, 0, 255), 1, cv2.LINE_AA)
if len(M):
cv2.imwrite('pic'+str(cnt)+'.png', im)
cnt = cnt + 1
if cnt > 20:
exit(0)
\ No newline at end of file
import torch
import torch.nn as nn
from int8_modules import *
class Net(nn.Module):
def __init__(self, num_classes=1000):
super(Net, self).__init__()
# self.ratioInfl=1
self.features = nn.Sequential(
Int8_Conv2d(1, 10, kernel_size=11, stride=1, padding=1),
# BinarizeConv2d(1, int(64*self.ratioInfl), kernel_size=5, stride=2, padding=1),
nn.BatchNorm2d(10, affine=False),
nn.ReLU(inplace=True),
Int8_MaxPool2d(kernel_size=3, stride=2),
Int8_Conv2d(10, 10, kernel_size=5, padding=2),
# BinarizeConv2d(int(64*self.ratioInfl), int(192*self.ratioInfl), kernel_size=3, padding=1),
nn.BatchNorm2d(10, affine=False),
nn.ReLU(inplace=True),
Int8_MaxPool2d(kernel_size=3, stride=2),
Int8_Conv2d(10, 32, kernel_size=3, padding=1),
nn.BatchNorm2d(32, affine=False),
nn.ReLU(inplace=True),
Int8_Conv2d(32, 32, kernel_size=3, padding=1),
nn.BatchNorm2d(32, affine=False),
nn.ReLU(inplace=True),
Int8_Conv2d(32, 32, kernel_size=3, padding=1),
nn.BatchNorm2d(32, affine=False),
nn.ReLU(inplace=True),
Int8_MaxPool2d(kernel_size=3, stride=2),
)
self.classifier_1 = nn.Sequential(
Int8_Linear(32 * 2 * 2, 1024),
# BinarizeLinear(256 * 1 * 1, 4096),
nn.BatchNorm1d(1024),
nn.ReLU(inplace=True),
)
self.classifier_2 = nn.Sequential(
Int8_Linear(256, 256),
nn.BatchNorm1d(256),
nn.ReLU(inplace=True),
Int8_Linear(256, 64),
nn.BatchNorm1d(64),
nn.ReLU(inplace=True),
)
self.classifier_3 = nn.Sequential(
Int8_Linear(256, num_classes),
nn.BatchNorm1d(num_classes),
nn.Softmax()
)
def forward(self, x):
# print(x.shape)
x = x.reshape(-1, 1, 32, 32)
x = self.features(x)
# print(x.shape)
# exit()
# exit(0)
# x = x.view(-1, 256 * 1 * 1)
x = x.view(-1, 32 * 2 * 2)
# x = self.classifier(x)
x = self.classifier_1(x)
B, C = x.size()
x = x.view(4 * B, 256)
x = self.classifier_2(x)
x = x.view(B, 256)
x = self.classifier_3(x)
return x
\ No newline at end of file
from xml.dom import minidom
import os
import numpy as np
import cv2
from tqdm import tqdm
import random
import torchvision.transforms as transforms
import psutil, sys
random.seed(2022)
path = ["../data/Annotations/", "../data/JPEGImages/"]
img_size = 32
# encoding: utf-8
def IOU(rec1,rec2):
left_column_max = max(rec1[0],rec2[0])
right_column_min = min(rec1[2],rec2[2])
up_row_max = max(rec1[1],rec2[1])
down_row_min = min(rec1[3],rec2[3])
#两矩形无相交区域的情况
if left_column_max>=right_column_min or down_row_min<=up_row_max:
return 0
# 两矩形有相交区域的情况
else:
S1 = (rec1[2]-rec1[0])*(rec1[3]-rec1[1])
S2 = (rec2[2]-rec2[0])*(rec2[3]-rec2[1])
S_cross = (down_row_min-up_row_max)*(right_column_min-left_column_max)
return S_cross/(S1+S2-S_cross)
X = []
Y = []
cnt = 0
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
transform_gray=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))
])
with tqdm(total=len(os.listdir(path[0]))) as bar:
for file in os.listdir(path[0]):
doc = minidom.parse(path[0]+file)
fileName = doc.getElementsByTagName("filename")[0].firstChild.data
# img = Image.open(path[1]+fileName)
im = cv2.imread(path[1]+fileName)
objects = doc.getElementsByTagName("object")
detectionList = []
for o in objects:
if o.getElementsByTagName("name")[0].firstChild.data == "car":
bndbox = o.getElementsByTagName("bndbox")[0]
xmin = int(bndbox.getElementsByTagName("xmin")[0].firstChild.data)
xmax = int(bndbox.getElementsByTagName("xmax")[0].firstChild.data)
ymin = int(bndbox.getElementsByTagName("ymin")[0].firstChild.data)
ymax = int(bndbox.getElementsByTagName("ymax")[0].firstChild.data)
detectionList.append((xmin, ymin, xmax, ymax))
if len(detectionList):
ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()
ss.setBaseImage(im)
method = 'f' # f=fast, q=quality
if method == 'f': # fast but low recall
ss.switchToSelectiveSearchFast()
elif method == 'q': # high recall but slow
ss.switchToSelectiveSearchQuality()
rects = ss.process() # f:453, q:1354
rects = [(i[0], i[1], i[0]+i[2], i[1]+i[3]) for i in rects]
for rect in rects:
positive = 0
# negative = 1
for i in detectionList:
iou = IOU(rect, i)
if iou > 0.5:
positive = positive + 1
# if iou:
# negative = 0
if positive > 1 or (not positive and random.random() > 0.1):
continue
# res = cv2.resize(im[rect[1]:rect[3], :][:, rect[0]:rect[2]], (100, 100)).reshape(-1)
res = cv2.resize(im[rect[1]:rect[3], :][:, rect[0]:rect[2]], (img_size, img_size))
res = cv2.cvtColor(res, cv2.COLOR_BGR2GRAY)
res = (transform_gray(res).reshape(-1) > 0).tolist()
# res = (transform(res).reshape(-1) > 0).tolist()
# print(len(X), sys.getsizeof(X)/(1024*1024))
# res = np.array(res)
# print(len(res), res)
# exit(0)
X.append(res)
# del res
# memory = psutil.virtual_memory()
# print(float(memory.total)/(1024*1024*1024), float(memory.used)/(1024*1024*1024))
if positive:
Y.append(1)
else:
Y.append(0)
bar.update(1)
if len(X) > 5000:
X = np.array(X, dtype=np.int8).reshape((-1, 1, img_size, img_size))
Y = np.array(Y)
np.save("X"+str(cnt)+".npy", X)
np.save("Y"+str(cnt)+".npy", Y)
cnt = cnt + 1
if cnt == 5:
break
del X,Y,rects
X, Y = [], []
# X = np.array(X).reshape((-1, 100, 100, 3))
# Y = np.array(Y)
# train_data, val_data = train_test_split([i for i in range(len(X))], test_size=0.2, random_state=2022)
# np.save("train_x.npy", X[train_data])
# np.save("train_y.npy", Y[train_data])
# np.save("val_x.npy", X[val_data])
# np.save("val_y.npy", Y[val_data])
\ No newline at end of file
import numpy as np
from sklearn.model_selection import train_test_split
X = np.load("X0.npy")
Y = np.load("Y0.npy")
for i in range(1, 5):
tmpX = np.load("X"+str(i)+".npy")
tmpY = np.load("Y"+str(i)+".npy")
X = np.concatenate([X, tmpX],axis=0)
Y = np.concatenate([Y, tmpY],axis=0)
# print(X.size, X.itemsize, X.size * X.itemsize)
X[X == 0] = -1
# np.save("X.npy", X)
# np.save("Y.npy", Y)
train_data, val_data = train_test_split([i for i in range(len(X))], test_size=0.2, random_state=2022)
np.save("train_x.npy", X[train_data])
np.save("train_y.npy", Y[train_data])
np.save("val_x.npy", X[val_data])
np.save("val_y.npy", Y[val_data])
\ No newline at end of file
# Detection-int8
- `python preprocess.py`
- `python process.py`
- `python cnn.py`
- `python main.py`
# from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
from net import Net
from torch.utils.data import TensorDataset, DataLoader
import numpy as np
# Training settings
parser = argparse.ArgumentParser(description='Classification')
parser.add_argument('--batch-size', type=int, default=64, metavar='N',
help='input batch size for training (default: 32)')
parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
help='input batch size for testing (default: 1000)')
parser.add_argument('--epochs', type=int, default=25, metavar='N',
help='number of epochs to train (default: 100)')
parser.add_argument('--lr', type=float, default=0.001, metavar='LR',
help='learning rate (default: 0.001)')
parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
help='SGD momentum (default: 0.5)')
parser.add_argument('--no-cuda', action='store_true', default=False,
help='disables CUDA training')
parser.add_argument('--seed', type=int, default=1, metavar='S',
help='random seed (default: 1)')
parser.add_argument('--gpus', default=0,
help='gpus used for training - e.g 0,1,3')
parser.add_argument('--log-interval', type=int, default=10, metavar='N',
help='how many batches to wait before logging training status')
args = parser.parse_args()
# print(args)
# exit(0)
args.cuda = not args.no_cuda and torch.cuda.is_available()
torch.manual_seed(args.seed)
if args.cuda:
torch.cuda.manual_seed(args.seed)
val_x = np.load("val_x.npy")
val_y = np.load("val_y.npy")
train_x = np.load("train_x.npy")
train_y = np.load("train_y.npy")
batch_size = args.batch_size
train_loader = TensorDataset(torch.from_numpy(train_x), torch.from_numpy(train_y))
train_loader = DataLoader(train_loader, shuffle=True, batch_size=batch_size, drop_last=True)
test_loader = TensorDataset(torch.from_numpy(val_x), torch.from_numpy(val_y))
test_loader = DataLoader(test_loader, shuffle=False, batch_size=args.test_batch_size)
model = Net(2)
# print(model)
from torchinfo import summary
summary(model, input_size=(batch_size, 3, 32, 32))
# exit(0)
if args.cuda:
torch.cuda.set_device(0)
model.cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=args.lr)
def train(epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data = torch.tensor(data, dtype=torch.float)
target = target.long()
if args.cuda:
data, target = data.cuda(), target.cuda()
# target = target.cuda()
data, target = Variable(data), Variable(target)
# print(data)
# print(data.shape)
# print(target)
# exit(0)
optimizer.zero_grad()
output = model(data)
# print(output.shape)
loss = criterion(output, target)
if epoch%40==0:
optimizer.param_groups[0]['lr']=optimizer.param_groups[0]['lr']*0.1
optimizer.zero_grad()
loss.backward()
for p in list(model.parameters()):
if hasattr(p,'org'):
p.data.copy_(p.org)
optimizer.step()
for p in list(model.parameters()):
if hasattr(p,'org'):
p.org.copy_(p.data.clamp_(-1,1))
if batch_idx % args.log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
def test():
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
data = torch.tensor(data, dtype=torch.float)
target = target.long()
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data), Variable(target)
output = model(data)
test_loss += criterion(output, target).item() # sum up batch loss
pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
correct += pred.eq(target.data.view_as(pred)).cpu().sum()
test_loss /= len(test_loader.dataset)
test_acc = 100. * correct / len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
return test_acc
max_test_acc = 0
for epoch in range(1, args.epochs + 1):
train(epoch)
test_acc = test()
if test_acc > max_test_acc:
max_test_acc = test_acc
torch.save(model.state_dict(), "model_parameter_best_fp32.pkl")
print("Best test acc: %.4f.\n" % max_test_acc)
if epoch%40==0:
optimizer.param_groups[0]['lr']=optimizer.param_groups[0]['lr']*0.1
import torch
import torch.nn as nn
from qtorch.quant import fixed_point_quantize as fpq
def to_int8(tensor):
return fpq(tensor, 8, 4)
class Int8_Linear(nn.Linear):
def __init__(self, *kargs, **kwargs):
super(Int8_Linear, self).__init__(*kargs, **kwargs)
def forward(self, input):
input.data=to_int8(input.data)
if not hasattr(self.weight,'org'):
self.weight.org=self.weight.data.clone()
self.weight.data=to_int8(self.weight.org)
out = nn.functional.linear(input, self.weight)
if not self.bias is None:
self.bias.org=self.bias.data.clone()
out += self.bias.view(1, -1).expand_as(out)
return out
class Int8_Conv2d(nn.Conv2d):
def __init__(self, *kargs, **kwargs):
super(Int8_Conv2d, self).__init__(*kargs, **kwargs)
def forward(self, input):
input.data = to_int8(input.data)
if not hasattr(self.weight,'org'):
self.weight.org=self.weight.data.clone()
self.weight.data=to_int8(self.weight.org)
out = nn.functional.conv2d(input, self.weight, None, self.stride,
self.padding, self.dilation, self.groups)
if not self.bias is None:
self.bias.org=self.bias.data.clone()
out += self.bias.view(1, -1, 1, 1).expand_as(out)
return out
class Int8_MaxPool2d(nn.MaxPool2d):
def __init__(self, *kargs, **kwargs):
super(Int8_MaxPool2d, self).__init__(*kargs, **kwargs)
def forward(self, input):
input.data = to_int8(input.data)
out = nn.functional.max_pool2d(input, self.kernel_size, self.stride, self.padding, self.dilation)
return out
\ No newline at end of file
from xml.dom import minidom
import os
import numpy as np
import cv2
from tqdm import tqdm
import random
import torch
from torchvision import transforms
from net import Net
import numpy as np
random.seed(2022)
path = ["data/Annotations/", "data/JPEGImages/"]
img_size = 32
# encoding: utf-8
def IOU(rec1,rec2):
left_column_max = max(rec1[0],rec2[0])
right_column_min = min(rec1[2],rec2[2])
up_row_max = max(rec1[1],rec2[1])
down_row_min = min(rec1[3],rec2[3])
#两矩形无相交区域的情况
if left_column_max>=right_column_min or down_row_min<=up_row_max:
return 0
# 两矩形有相交区域的情况
else:
S1 = (rec1[2]-rec1[0])*(rec1[3]-rec1[1])
S2 = (rec2[2]-rec2[0])*(rec2[3]-rec2[1])
S_cross = (down_row_min-up_row_max)*(right_column_min-left_column_max)
return S_cross/(S1+S2-S_cross)
model = Net(2)
model.load_state_dict(torch.load("model_parameter.pkl"))
model.eval()
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
transform_gray=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5, ), (0.5, ))
])
files = os.listdir(path[0])
logThreshold = np.log(0.95)
cnt = 0
for i in range(3300, 5000):
f = files[i]
doc = minidom.parse(path[0]+f)
fileName = doc.getElementsByTagName("filename")[0].firstChild.data
# img = Image.open(path[1]+fileName)
im = cv2.imread(path[1]+fileName)
objects = doc.getElementsByTagName("object")
detectionList = []
for o in objects:
if o.getElementsByTagName("name")[0].firstChild.data == "car":
bndbox = o.getElementsByTagName("bndbox")[0]
xmin = int(bndbox.getElementsByTagName("xmin")[0].firstChild.data)
xmax = int(bndbox.getElementsByTagName("xmax")[0].firstChild.data)
ymin = int(bndbox.getElementsByTagName("ymin")[0].firstChild.data)
ymax = int(bndbox.getElementsByTagName("ymax")[0].firstChild.data)
detectionList.append((xmin, ymin, xmax, ymax))
if len(detectionList):
ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()
ss.setBaseImage(im)
method = 'f' # f=fast, q=quality
if method == 'f': # fast but low recall
ss.switchToSelectiveSearchFast()
elif method == 'q': # high recall but slow
ss.switchToSelectiveSearchQuality()
rects = ss.process() # f:453, q:1354
rects = [(i[0], i[1], i[0]+i[2], i[1]+i[3]) for i in rects]
X = []
for rect in rects:
# res = cv2.resize(im[rect[1]:rect[3], :][:, rect[0]:rect[2]], (100, 100)).reshape(-1)
res = cv2.resize(im[rect[1]:rect[3], :][:, rect[0]:rect[2]], (img_size, img_size))
res = cv2.cvtColor(res, cv2.COLOR_BGR2GRAY)
res = (transform_gray(res).reshape(-1) > 0).tolist()
# X = [res for i in range(1)]
X.append(res)
X = np.array(X, dtype=np.int8).reshape(-1, 1, img_size, img_size)
X = torch.tensor(X, dtype=torch.float)
Y = model(X)
# print(Y[:, 0])
# print(Y[:, 1])
predictList = []
for j in range(len(rects)):
# if Y[j][1] > logThreshold:
if Y[j][1] > 0.8:
predictList.append((rects[j], float(Y[j][1])))
predictList.sort(key=lambda item: item[1], reverse=True)
del X, Y, rects
M = []
H = predictList
while len(H):
rect = H[0][0]
Hnew = []
for h in H:
if IOU(rect, h[0]) <= 0.05:
Hnew.append(h)
H = Hnew
M.append(rect)
for rect in M:
cv2.rectangle(im, (rect[0], rect[1]), (rect[2], rect[3]), (0, 0, 255), 1, cv2.LINE_AA)
if len(M):
cv2.imwrite('pic'+str(cnt)+'.png', im)
cnt = cnt + 1
if cnt > 20:
exit(0)
\ No newline at end of file
import torch
import torch.nn as nn
from int8_modules import *
class Net(nn.Module):
def __init__(self, num_classes=1000):
super(Net, self).__init__()
# self.ratioInfl=1
self.features = nn.Sequential(
Int8_Conv2d(3, 10, kernel_size=11, stride=1, padding=1),
# BinarizeConv2d(1, int(64*self.ratioInfl), kernel_size=5, stride=2, padding=1),
nn.BatchNorm2d(10, affine=False),
nn.ReLU(inplace=True),
Int8_MaxPool2d(kernel_size=3, stride=2),
Int8_Conv2d(10, 10, kernel_size=5, padding=2),
# BinarizeConv2d(int(64*self.ratioInfl), int(192*self.ratioInfl), kernel_size=3, padding=1),
nn.BatchNorm2d(10, affine=False),
nn.ReLU(inplace=True),
Int8_MaxPool2d(kernel_size=3, stride=2),
Int8_Conv2d(10, 32, kernel_size=3, padding=1),
nn.BatchNorm2d(32, affine=False),
nn.ReLU(inplace=True),
Int8_Conv2d(32, 32, kernel_size=3, padding=1),
nn.BatchNorm2d(32, affine=False),
nn.ReLU(inplace=True),
Int8_Conv2d(32, 32, kernel_size=3, padding=1),
nn.BatchNorm2d(32, affine=False),
nn.ReLU(inplace=True),
Int8_MaxPool2d(kernel_size=3, stride=2),
)
self.classifier_1 = nn.Sequential(
Int8_Linear(32 * 2 * 2, 1024),
# BinarizeLinear(256 * 1 * 1, 4096),
nn.BatchNorm1d(1024),
nn.ReLU(inplace=True),
)
self.classifier_2 = nn.Sequential(
Int8_Linear(256, 256),
nn.BatchNorm1d(256),
nn.ReLU(inplace=True),
Int8_Linear(256, 64),
nn.BatchNorm1d(64),
nn.ReLU(inplace=True),
)
self.classifier_3 = nn.Sequential(
Int8_Linear(256, num_classes),
nn.BatchNorm1d(num_classes),
nn.Softmax()
)
def forward(self, x):
# print(x.shape)
x = x.reshape(-1, 3, 32, 32)
x = self.features(x)
# print(x.shape)
# exit()
# exit(0)
# x = x.view(-1, 256 * 1 * 1)
x = x.view(-1, 32 * 2 * 2)
# x = self.classifier(x)
x = self.classifier_1(x)
B, C = x.size()
x = x.view(4 * B, 256)
x = self.classifier_2(x)
x = x.view(B, 256)
x = self.classifier_3(x)
return x
\ No newline at end of file
from xml.dom import minidom
import os
import numpy as np
import cv2
from tqdm import tqdm
import random
import torchvision.transforms as transforms
import psutil, sys
random.seed(2022)
path = ["../data/Annotations/", "../data/JPEGImages/"]
img_size = 32
# encoding: utf-8
def IOU(rec1,rec2):
left_column_max = max(rec1[0],rec2[0])
right_column_min = min(rec1[2],rec2[2])
up_row_max = max(rec1[1],rec2[1])
down_row_min = min(rec1[3],rec2[3])
#两矩形无相交区域的情况
if left_column_max>=right_column_min or down_row_min<=up_row_max:
return 0
# 两矩形有相交区域的情况
else:
S1 = (rec1[2]-rec1[0])*(rec1[3]-rec1[1])
S2 = (rec2[2]-rec2[0])*(rec2[3]-rec2[1])
S_cross = (down_row_min-up_row_max)*(right_column_min-left_column_max)
return S_cross/(S1+S2-S_cross)
X = []
Y = []
cnt = 0
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
transform_gray=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))
])
with tqdm(total=len(os.listdir(path[0]))) as bar:
for file in os.listdir(path[0]):
doc = minidom.parse(path[0]+file)
fileName = doc.getElementsByTagName("filename")[0].firstChild.data
# img = Image.open(path[1]+fileName)
im = cv2.imread(path[1]+fileName)
objects = doc.getElementsByTagName("object")
detectionList = []
for o in objects:
if o.getElementsByTagName("name")[0].firstChild.data == "car":
bndbox = o.getElementsByTagName("bndbox")[0]
xmin = int(bndbox.getElementsByTagName("xmin")[0].firstChild.data)
xmax = int(bndbox.getElementsByTagName("xmax")[0].firstChild.data)
ymin = int(bndbox.getElementsByTagName("ymin")[0].firstChild.data)
ymax = int(bndbox.getElementsByTagName("ymax")[0].firstChild.data)
detectionList.append((xmin, ymin, xmax, ymax))
if len(detectionList):
ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()
ss.setBaseImage(im)
method = 'f' # f=fast, q=quality
if method == 'f': # fast but low recall
ss.switchToSelectiveSearchFast()
elif method == 'q': # high recall but slow
ss.switchToSelectiveSearchQuality()
rects = ss.process() # f:453, q:1354
rects = [(i[0], i[1], i[0]+i[2], i[1]+i[3]) for i in rects]
for rect in rects:
positive = 0
# negative = 1
for i in detectionList:
iou = IOU(rect, i)
if iou > 0.5:
positive = positive + 1
# if iou:
# negative = 0
if positive > 1 or (not positive and random.random() > 0.1):
continue
# res = cv2.resize(im[rect[1]:rect[3], :][:, rect[0]:rect[2]], (100, 100)).reshape(-1)
res = cv2.resize(im[rect[1]:rect[3], :][:, rect[0]:rect[2]], (img_size, img_size))
# res = cv2.cvtColor(res, cv2.COLOR_BGR2GRAY)
# res = (transform_gray(res).reshape(-1) > 0).tolist()
res = (transform(res).reshape(-1) > 0).tolist()
# print(len(X), sys.getsizeof(X)/(1024*1024))
# res = np.array(res)
# print(len(res), res)
# exit(0)
X.append(res)
# del res
# memory = psutil.virtual_memory()
# print(float(memory.total)/(1024*1024*1024), float(memory.used)/(1024*1024*1024))
if positive:
Y.append(1)
else:
Y.append(0)
bar.update(1)
if len(X) > 5000:
X = np.array(X, dtype=np.int8).reshape((-1, 3, img_size, img_size))
Y = np.array(Y)
np.save("X"+str(cnt)+".npy", X)
np.save("Y"+str(cnt)+".npy", Y)
cnt = cnt + 1
if cnt == 5:
break
del X,Y,rects
X, Y = [], []
# X = np.array(X).reshape((-1, 100, 100, 3))
# Y = np.array(Y)
# train_data, val_data = train_test_split([i for i in range(len(X))], test_size=0.2, random_state=2022)
# np.save("train_x.npy", X[train_data])
# np.save("train_y.npy", Y[train_data])
# np.save("val_x.npy", X[val_data])
# np.save("val_y.npy", Y[val_data])
\ No newline at end of file
import numpy as np
from sklearn.model_selection import train_test_split
X = np.load("X0.npy")
Y = np.load("Y0.npy")
for i in range(1, 5):
tmpX = np.load("X"+str(i)+".npy")
tmpY = np.load("Y"+str(i)+".npy")
X = np.concatenate([X, tmpX],axis=0)
Y = np.concatenate([Y, tmpY],axis=0)
# print(X.size, X.itemsize, X.size * X.itemsize)
# np.save("X.npy", X)
# np.save("Y.npy", Y)
train_data, val_data = train_test_split([i for i in range(len(X))], test_size=0.2, random_state=2022)
np.save("train_x.npy", X[train_data])
np.save("train_y.npy", Y[train_data])
np.save("val_x.npy", X[val_data])
np.save("val_y.npy", Y[val_data])
\ No newline at end of file
# Detection-int8
- `python preprocess.py`
- `python process.py`
- `python cnn.py`
- `python main.py`
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment