Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# file: ~/.gitignore_global
.DS_Store
.idea
__pycache__
model_processing/__pycache__
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
# ml-diplom
такс, тут выяснилось (хорошо, что заранее) что у одной картинки может быть несколько ответов
поэтому давайте для каждой тренировчной картинки сделаем набор из лэйблов
затем оставим только те лэйблы у которых лэйбл-карта не 0
Empty file removed heh2
Empty file.
151 changes: 151 additions & 0 deletions image_loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
from sklearn.utils import shuffle
import torch.utils.data.dataset
import os
from torchvision import transforms
from PIL import Image
import torch
from multiprocessing.pool import ThreadPool
from properties import classes, field_id, field_train, image_size, num_workers

name_delimiter = '_.'
prefix = 'ISIC_'
attribute = '_attribute_'

composite = transforms.Compose([
transforms.Scale((image_size, image_size)),
transforms.ToTensor(),
])


def images_filter(dir_path: str, suffix: str):
arrays = []
for _, _, f in os.walk(dir_path):
for file in f:
if not file.endswith(suffix):
continue
id = file.split(suffix)[0].split(prefix)[1]
arrays.append((id, os.path.join(dir_path, file)))
return arrays


def map_inputs_labels(data_inputs_path: str, data_labels_path: str):
"""
composed inputs images and labels images to dict with attributes
:return: dict<img_id, <path to original image, path to label image, attributes>>
"""
inputs = images_filter(data_inputs_path, '.jpg')
labels = images_filter(data_labels_path, '.png')
# <id, attribute_name, path_to_file>
labels_generator = map(lambda x: (x[0].split(attribute)[0], x[0].split(attribute)[1], x[1]), labels)

labels_dict = dict()
for ids, attr, path in labels_generator:
if ids in labels_dict:
labels_dict[ids][attr] = path
else:
labels_dict[ids] = {attr: path}

result = []
for ids, path in inputs:
labels_dict[ids]['id'] = ids
labels_dict[ids]['train'] = path
result.append(labels_dict[ids])
return result


def split_set(values: list, test_size: int, validate_size: int):
"""
get test set, validate set from list of images, previous shuffled
:param values: result of execute map_inputs_label
:param test_size: train set size
:param validate_size: validate set size
:return: train set, validate set
"""
shuffled = shuffle(values)
return shuffled[0:test_size], shuffled[test_size:test_size + validate_size]


def map_cell(cell: dict):
for i in classes:
cell[i] = composite(Image.open(cell[i]))
cell[field_train] = composite(Image.open(cell[field_train]))
return cell


def prepare_data(*args):
pool = ThreadPool(num_workers)
for array in args:
for v in array:
pool.apply(map_cell, args=(v,))
pool.close()
pool.join()


def save_data(path: str, data: dict):
ids = data[field_id]
os.makedirs(os.path.join(path, ids), exist_ok=True)
for i in classes:
torch.save(data[i], os.path.join(path, ids, i))
torch.save(data[field_train], os.path.join(path, ids, field_train))


# either train_path or test_path
def load_tensors(path: str):
result = []
for tensors in os.listdir(path):
tensor_dir = os.path.join(path, tensors)
if not os.path.isdir(tensor_dir):
continue
res = dict()
res[field_id] = tensors
for fl in os.listdir(tensor_dir):
tensor_file = os.path.join(tensor_dir, fl)
if not os.path.isfile(tensor_file):
continue
res[fl] = torch.load(tensor_file)
result.append(res)
return result


def remove_empty_mask(cell: dict):
for c in classes:
if cell[c].sum() > 1:
cell.pop(c)
return cell


def apply_resize(cell: dict):
scale = transforms.Compose([
transforms.ToPILImage(),
transforms.Scale((image_size, image_size)),
transforms.ToTensor()
])
for c in classes:
cell[c] = scale(cell[c])
cell[field_train] = scale(cell[field_train])
return cell


class CustomDataset(torch.utils.data.dataset.Dataset):
"""
data -- list with dict of result load_tensors
"""

def __init__(self, data: list):
self.data = data
self.inputs = [i[field_train] for i in data]
self.labels = [self.__item_to_tensor(i) for i in data]

def __len__(self):
return len(self.inputs)

def __getitem__(self, item):
return self.inputs[item], self.labels[item]

@staticmethod
def __item_to_tensor(d: dict) -> torch.Tensor:
t = torch.tensor([0 for _ in classes])
for idx, cl in enumerate(classes):
if cl in d:
t[idx] = 1
return t
44 changes: 44 additions & 0 deletions model_processing/train_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from torchvision import models
import torch.utils.data.dataset
import torch
import torch.nn as nn
from torch.optim.optimizer import Optimizer
from model_processing.validate_model import ValidateModel


class TrainModel:
def __init__(self, validator: ValidateModel, device='cpu'):
self.device = device
self.validator = validator

def train_model(self, m: models.AlexNet, train_loader: torch.utils.data.DataLoader,
validate_loader: torch.utils.data.DataLoader,
criterion: nn.CrossEntropyLoss,
optimizer: Optimizer, epochs: int) -> models.AlexNet:

self.validator.flush()

for epoch in range(epochs):
print("Epoch: {}/{}".format(epoch + 1, epochs))
m = self._train_model_single_epoch(m, train_loader, criterion, optimizer)
acc = self.validator.validate_model_single_epoch(m, validate_loader)
print("acc: {}".format(acc))
print("-" * 20)

best_model = m.load_state_dict(self.validator.best_model_weights)
print("best acc: {}".format(self.validator.best_accuracy))
return best_model

def _train_model_single_epoch(self, m: models.AlexNet, data_loader: torch.utils.data.DataLoader,
criterion: nn.CrossEntropyLoss, optimizer: Optimizer) -> models.AlexNet:
m.train()
for inputs, labels in data_loader:
inputs = inputs.to(self.device)
labels = labels.type(torch.FloatTensor).to(self.device)
optimizer.zero_grad()

model_result = m(inputs)
loss = criterion(model_result, labels)
loss.backward()
optimizer.step()
return m
49 changes: 49 additions & 0 deletions model_processing/validate_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from torchvision import models
import torch.utils.data.dataset
import torch
import copy
from sklearn.metrics import f1_score


class ValidateModel:
def __init__(self, labels_number: int, device='cpu'):
self.device = device
self.labels_number = labels_number

self.iterate_accuracy = []
self.best_accuracy = 0.0
self.best_model_weights = None

def validate_model_single_epoch(self, m: models.AlexNet, data_loader: torch.utils.data.DataLoader) -> float:

with torch.set_grad_enabled(False):
trust_answer = [[] for _ in range(self.labels_number)]
model_answer = [[] for _ in range(self.labels_number)]
m.eval()
for inputs, labels in data_loader:
inputs = inputs.to(self.device)
labels = labels.to(self.device)

model_result = m(inputs)
# all vectors size 5
for i, t in enumerate(model_result):
# all classes
for j, v in enumerate(t):
dst_0 = abs(v.item())
dst_1 = abs(1 - v.item())
trust = labels[i][j].item()
ma = 1 if dst_1 < dst_0 else 0
trust_answer[j].append(trust)
model_answer[j].append(ma)

res = sum([f1_score(t, m) for t, m in zip(trust_answer, model_answer)]) / self.labels_number
if res > self.best_accuracy:
self.best_accuracy = res
self.best_model_weights = copy.deepcopy(m.state_dict())
self.iterate_accuracy.append(res)
return res

def flush(self):
self.iterate_accuracy = []
self.best_accuracy = 0.0
self.best_model_weights = None
Loading