From 56489af7e7e22f86430f3762139eecc2bde798a5 Mon Sep 17 00:00:00 2001 From: Rohit R Date: Wed, 4 May 2022 19:43:31 +0530 Subject: [PATCH 01/28] added docs --- src/subpixel/data.py | 20 ++++++++++++++++++++ src/subpixel/model.py | 5 +++++ src/subpixel/utils.py | 28 ++++++++++++++++------------ 3 files changed, 41 insertions(+), 12 deletions(-) diff --git a/src/subpixel/data.py b/src/subpixel/data.py index 4994799..43245da 100644 --- a/src/subpixel/data.py +++ b/src/subpixel/data.py @@ -18,6 +18,9 @@ class ImageDataset(Dataset): + ''' + Class that takes in the path of the dataset and converts it into a torch.utils.data.Dataset object. + ''' def __init__(self, path, mode, device, transforms=None, train=True): super().__init__() @@ -84,6 +87,16 @@ def __len__(self): def get_dataset(path, mode, device, transforms=None): + ''' + Function that takes in the path and generates a trainset and valset (if present). + + path: str + mode: str + device: str + transforms: albumentations.transforms + + Returns trainset and valset + ''' trainset = ImageDataset( f"{path}train\\", mode, device, transforms=transforms, train=True @@ -95,5 +108,12 @@ def get_dataset(path, mode, device, transforms=None): return trainset def get_dataloader(datset, b_size, shuffle): + ''' + Converts the dataset to a DataLoader. + + dataset: torch.utils.data.Dataset + + Returns torch.utils.data.DataLoader + ''' return DataLoader(datset, b_size, shuffle) diff --git a/src/subpixel/model.py b/src/subpixel/model.py index 40288eb..6602b1b 100644 --- a/src/subpixel/model.py +++ b/src/subpixel/model.py @@ -78,6 +78,11 @@ def fit(self,trainset : Union[str,nn.Module], loss_fun : nn.Module,optimizer : s return self.history def find_size(self): + + '''Finds the size occupied by the trainable model parameters in CUDA memory. + + Returns the total number of trainable parameters and the size occupied. + ''' p_total = sum(p.numel() for p in self.parameters() if p.requires_grad) bits = 32. diff --git a/src/subpixel/utils.py b/src/subpixel/utils.py index 881462c..2fa237e 100644 --- a/src/subpixel/utils.py +++ b/src/subpixel/utils.py @@ -10,26 +10,27 @@ import numpy as np import random -# from subpixel.model import Model +# def show_batch(data): +# pass -def show_batch(data): - pass +# def EncodingToClass(lst, classes): -def EncodingToClass(lst, classes): +# lst = list(lst.detach().squeeze(0).numpy()) +# return classes[lst.index(max(lst))] - lst = list(lst.detach().squeeze(0).numpy()) - return classes[lst.index(max(lst))] - -def get_boxxes(t): - # '{x, y, h, w, [classes]}' -> [x, y, h, w, classes] - bbox = list(json.loads(t).values()) - return bbox[:-1] + bbox[-1] +# def get_boxxes(t): +# # '{x, y, h, w, [classes]}' -> [x, y, h, w, classes] +# bbox = list(json.loads(t).values()) +# return bbox[:-1] + bbox[-1] def seed_everything(seed=42): + ''' + Seeds EVERYTHING. + ''' random.seed(seed) os.environ["PYTHONHASHSEED"] = str(seed) @@ -41,6 +42,9 @@ def seed_everything(seed=42): def init_model(m): + ''' + Initialises model parameters with xavier normalisation method. + ''' seed_everything() @@ -111,7 +115,7 @@ def findLR( model : nn.Module, dataset : nn.Module, loss_fn : nn.Module ,optimiz -def find_batch_size(model : nn.Module, dataset : nn.Module) -> None: +def find_batch_size(model : nn.Module, dataset : nn.Module): ''' Finds the batch size to be set for ideal GPU usage (95% default) From 2df2079abce45fc4d63630dcaada989772ca2b37 Mon Sep 17 00:00:00 2001 From: Rohit R Date: Wed, 4 May 2022 19:44:11 +0530 Subject: [PATCH 02/28] minor bug fix --- src/subpixel/utils.py | 68 +++++++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 35 deletions(-) diff --git a/src/subpixel/utils.py b/src/subpixel/utils.py index 2fa237e..b082e8f 100644 --- a/src/subpixel/utils.py +++ b/src/subpixel/utils.py @@ -21,16 +21,16 @@ # return classes[lst.index(max(lst))] -# def get_boxxes(t): -# # '{x, y, h, w, [classes]}' -> [x, y, h, w, classes] -# bbox = list(json.loads(t).values()) -# return bbox[:-1] + bbox[-1] +def get_boxxes(t): + # '{x, y, h, w, [classes]}' -> [x, y, h, w, classes] + bbox = list(json.loads(t).values()) + return bbox[:-1] + bbox[-1] def seed_everything(seed=42): - ''' + """ Seeds EVERYTHING. - ''' + """ random.seed(seed) os.environ["PYTHONHASHSEED"] = str(seed) @@ -42,9 +42,9 @@ def seed_everything(seed=42): def init_model(m): - ''' + """ Initialises model parameters with xavier normalisation method. - ''' + """ seed_everything() @@ -58,9 +58,16 @@ def init_model(m): nn.init.xavier_normal_(m.weight.data) - -def findLR( model : nn.Module, dataset : nn.Module, loss_fn : nn.Module ,optimizer : str , start_lr : float=1e-7, end_lr : float=1e-1, steps : float=100): - ''' +def findLR( + model: nn.Module, + dataset: nn.Module, + loss_fn: nn.Module, + optimizer: str, + start_lr: float = 1e-7, + end_lr: float = 1e-1, + steps: float = 100, +): + """ Finds the ideal initial LR for optimal training. model : nn.Module , the model for which ideal LR needs to be found. @@ -75,20 +82,18 @@ def findLR( model : nn.Module, dataset : nn.Module, loss_fn : nn.Module ,optimiz end_lr : upper bound of the learning rate to be checked. steps : number of learning rates between start_lr and end_lr to be checked. - ''' + """ seed_everything() lr = [] loss = [] - optimizer = get_optimizer(model,lr=start_lr) + optimizer = get_optimizer(model, lr=start_lr) dx = (end_lr - start_lr) / steps - x = find_batch_size(model, dataset) + x = find_batch_size(model, dataset) if len(dataset) // steps < x: x = len(dataset) // steps - - scheduler = torch.optim.lr_scheduler.LambdaLR( - optimizer, lambda epoch: epoch + dx - ) + + scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lambda epoch: epoch + dx) Dataloader = iter(DataLoader(dataset, x, True)) model.train() @@ -114,16 +119,15 @@ def findLR( model : nn.Module, dataset : nn.Module, loss_fn : nn.Module ,optimiz return lr[numpy.argmin(diff(loss) / dx)], loss, lr - -def find_batch_size(model : nn.Module, dataset : nn.Module): - ''' +def find_batch_size(model: nn.Module, dataset: nn.Module): + """ Finds the batch size to be set for ideal GPU usage (95% default) model : nn.Module , model being trained. dataset : nn.Module , dataset to be loaded. - ''' + """ p, total_bits = model.find_size() f_before = torch.cuda.memory_reserved(0) - torch.cuda.memory_allocated(0) @@ -144,21 +148,15 @@ def find_batch_size(model : nn.Module, dataset : nn.Module): return b_size -def get_optimizer(model : nn.Module, optim : str = "adam", lr : float = 1e-3, weight_decay : float = 1e-5): - ''' +def get_optimizer( + model: nn.Module, optim: str = "adam", lr: float = 1e-3, weight_decay: float = 1e-5 +): + """ returns torch.optim optimizer instance given optim string - ''' + """ if optim == "adam": - return torch.optim.Adam( - model.parameters(), - lr= lr, - weight_decay= weight_decay - ) + return torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) elif optim == "sgd": - return torch.optim.SGD( - model.parameters(), - lr, - weight_decay= weight_decay - ) + return torch.optim.SGD(model.parameters(), lr, weight_decay=weight_decay) else: raise NotImplementedError("Optimizer not implemented yet!!") From 2e19f4278dee7db81900c66ec84c595acdee774b Mon Sep 17 00:00:00 2001 From: audi1712 Date: Sat, 7 May 2022 16:12:47 +0530 Subject: [PATCH 03/28] class Model now accepts custom models without arch.json.....(untested) --- src/subpixel/model.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/subpixel/model.py b/src/subpixel/model.py index 6602b1b..7286291 100644 --- a/src/subpixel/model.py +++ b/src/subpixel/model.py @@ -13,8 +13,12 @@ class Model(nn.Module): ''' Converts model architecture from JSON to a trainable model and has a fit function that can train the model on the given dataset when called. ''' - def __init__(self,path = 'arch.json') -> None: + def __init__(self,model : nn.Module = None,path :str = 'arch.json') -> None: super(Model,self).__init__() + if isinstance(model,nn.Module): + self.pre_defined_model = True + self.model = model + return JSON_file = open(path,"r") arch = json.load(JSON_file) @@ -35,6 +39,8 @@ def __init__(self,path = 'arch.json') -> None: def forward(self,*X): + if self.pre_defined_model: + return self.model(*X) outputs = [] @@ -73,7 +79,7 @@ def fit(self,trainset : Union[str,nn.Module], loss_fun : nn.Module,optimizer : s valset (optional): nn.Module | None , default None, provides validation set. Note:- if trainset is str automatically valset is taken from directory structure. ''' - self.trainer = Trainer(self, trainset= trainset, epochs= 10, learning_rate= lr) + self.trainer = Trainer(self, trainset= trainset, epochs= 10, learning_rate= lr, loss_fn= loss_fun, optimizer= optimizer, mode= mode, valset= valset) self.history = self.trainer.fit() return self.history From 75e4871db9062736d2b82b4748222db9f91546bb Mon Sep 17 00:00:00 2001 From: Rohit R Date: Mon, 9 May 2022 22:30:19 +0530 Subject: [PATCH 04/28] fixed acc --- src/subpixel/train.py | 43 +++++++++++++++++-------------------------- 1 file changed, 17 insertions(+), 26 deletions(-) diff --git a/src/subpixel/train.py b/src/subpixel/train.py index a2b400c..af59775 100644 --- a/src/subpixel/train.py +++ b/src/subpixel/train.py @@ -14,29 +14,21 @@ device = "cuda" if torch.cuda.is_available() else "cpu" -def accuracy(out: torch.Tensor, labels: torch.Tensor): # NEEDS TO BE CHANGED - ''' +def accuracy(out: torch.Tensor, labels: torch.Tensor): + """ Finds the accuracy of the model by comparing the output of the model to the labels. out: tensor labels: tensor - ''' - - c = 0 - - preds = torch.round(out) - preds = preds.detach().cpu().numpy().tolist() - labels = labels.cpu().numpy().tolist() - - for label, pred in zip(labels, preds): - if pred == label: - c += 1 - - return c / len(out) + """ + try: + return (out == labels).sum().item() / out.size(0) * out.size(1) * out.size(2) + except: + return (out == labels).sum().item() / out.size(0) * out.size(1) class Trainer: - ''' + """ class that has all the funcions and variables to train a model on your custom dataset. model: nn.Module @@ -52,7 +44,8 @@ class that has all the funcions and variables to train a model on your custom da model_save_path: str shuffle: bool device: str ["cpu", "cuda"] - ''' + """ + def __init__( self, model, @@ -86,9 +79,7 @@ def __init__( trainset, self.mode, device, transforms ) except: - self.trainset = get_dataset( - trainset, self.mode, device, transforms - ) + self.trainset = get_dataset(trainset, self.mode, device, transforms) elif isinstance(trainset, Dataset) or isinstance(trainset, ImageDataset): self.trainset = trainset @@ -114,12 +105,12 @@ def __init__( self.val_dl = get_dataloader(self.valset, self.b_size, self.shuffle) def fit(self): - ''' + """ Function that has the training loop implemented. It inherits all the necessary components from the Trainer class. Returns the loss values and acc values if applicable. - ''' + """ flag = self.mode == "classification" or self.mode == "detection" scaler = torch.cuda.amp.GradScaler() @@ -217,11 +208,11 @@ def fit(self): return losses def test_sample(self, image, label=None): - ''' + """ Used to test the model on one image. Returns the prediction. - ''' + """ pred = self.model(image) @@ -232,11 +223,11 @@ def test_sample(self, image, label=None): return pred def evaluate(self, test_path): - ''' + """ Used to evaluate the model on the test dataset. Returns the losses. - ''' + """ test_dl = get_dataloader( ImageDataset(test_path, self.mode, device), self.b_size, False From e677c889116116910bc42e25bf8bae5862ffd413 Mon Sep 17 00:00:00 2001 From: Rohit R Date: Mon, 9 May 2022 22:31:46 +0530 Subject: [PATCH 05/28] removed x100 in train for acc --- src/subpixel/train.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/subpixel/train.py b/src/subpixel/train.py index af59775..4d1f300 100644 --- a/src/subpixel/train.py +++ b/src/subpixel/train.py @@ -177,7 +177,7 @@ def fit(self): ) print( - f"{epoch+1}/{self.epochs} -- Train Loss: {losses['train'][-1]} -- Train acc: {acc['train'][-1] *100}% -- Val Loss: {losses['val'][-1]} -- Val acc: {acc['val'][-1]*100}%" + f"{epoch+1}/{self.epochs} -- Train Loss: {losses['train'][-1]} -- Train acc: {acc['train'][-1]}% -- Val Loss: {losses['val'][-1]} -- Val acc: {acc['val'][-1]}%" ) else: print( @@ -192,7 +192,7 @@ def fit(self): ) print( - f"{epoch+1}/{self.epochs} -- Train Loss: {losses['train'][-1]} -- Train acc: {acc['train'][-1] * 100}%" + f"{epoch+1}/{self.epochs} -- Train Loss: {losses['train'][-1]} -- Train acc: {acc['train'][-1]}%" ) else: print( From 97b65c48d183e367c19ba627ce99e785078dda65 Mon Sep 17 00:00:00 2001 From: Rohit R Date: Mon, 9 May 2022 23:01:43 +0530 Subject: [PATCH 06/28] test changes --- src/subpixel/test.py | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/src/subpixel/test.py b/src/subpixel/test.py index 010d796..7883746 100644 --- a/src/subpixel/test.py +++ b/src/subpixel/test.py @@ -5,41 +5,48 @@ from torch.utils import data import torchvision from model import Model -from PIL import Image +from data import ImageDataset -dataset = torchvision.datasets.FashionMNIST("./", download=True) +# dataset = torchvision.datasets.FashionMNIST("./", download=True) class Datas(torch.utils.data.Dataset): - def __init__(self, dataset) -> None: + def __init__(self, dataset): super().__init__() + self.dataset = dataset def __getitem__(self, index): return ( torch.tensor(np.array(self.dataset[index][0])).unsqueeze(0).float().cuda(), - torch.tensor([1 if i == self.dataset[index][1] else 0 for i in range(10)]).float().cuda() + torch.tensor([1 if i == self.dataset[index][1] else 0 for i in range(10)]) + .float() + .cuda(), ) def __len__(self): - # return 1000 + return len(self.dataset) class Test: - def __init__(self, model, dataset, loss_fun) -> None: + def __init__(self, model, dataset_path, loss_fun, mode, device, transforms=None): + self.model = model - # x = int(0.1*len(dataset)) if int(len(dataset))<100 else 100 - self.dataset= dataset + self.mode = mode + self.dataset = ImageDataset( + dataset_path, mode=mode, device=device, transforms=transforms + ) self.loss_fun = loss_fun def test(self): - print("Testing!") - self.model.fit(self.dataset, self.loss_fun, optimizer= "adam") + + self.model.fit(self.dataset, self.loss_fun, mode=self.mode, optimizer="adam") + -datase = Datas(dataset) +datase = Datas(dataset_path) model = Model().cuda() -tes = Test(model, datase, loss_fun= nn.MSELoss()) +tes = Test(model, datase, loss_fun=nn.MSELoss()) tes.test() From a2723b068bdc7247515536dd20e1bdd9632da323 Mon Sep 17 00:00:00 2001 From: Rohit R Date: Sun, 15 May 2022 23:39:52 +0530 Subject: [PATCH 07/28] new files --- src/subpixel/ml/experimental/gradcam.py | 0 src/subpixel/ml/tabular.py | 4 ++++ 2 files changed, 4 insertions(+) create mode 100644 src/subpixel/ml/experimental/gradcam.py create mode 100644 src/subpixel/ml/tabular.py diff --git a/src/subpixel/ml/experimental/gradcam.py b/src/subpixel/ml/experimental/gradcam.py new file mode 100644 index 0000000..e69de29 diff --git a/src/subpixel/ml/tabular.py b/src/subpixel/ml/tabular.py new file mode 100644 index 0000000..7daa5d4 --- /dev/null +++ b/src/subpixel/ml/tabular.py @@ -0,0 +1,4 @@ +import sklearn +import numpy as np +import pandas as pd + From 57a11ac2684b481dd5130cd8e1c0c6f8fe8bb89c Mon Sep 17 00:00:00 2001 From: audi1712 Date: Wed, 8 Jun 2022 22:43:11 +0530 Subject: [PATCH 08/28] updated folder structure --- src/subpixel/{ => vision}/data.py | 238 +++++++------- src/subpixel/{ => vision}/train.py | 486 ++++++++++++++--------------- 2 files changed, 362 insertions(+), 362 deletions(-) rename src/subpixel/{ => vision}/data.py (96%) rename src/subpixel/{ => vision}/train.py (96%) diff --git a/src/subpixel/data.py b/src/subpixel/vision/data.py similarity index 96% rename from src/subpixel/data.py rename to src/subpixel/vision/data.py index 43245da..9f3c05d 100644 --- a/src/subpixel/data.py +++ b/src/subpixel/vision/data.py @@ -1,119 +1,119 @@ -# file that scans for data from ordered folders and generates DataLoader class. -# ----------------------------------------------------------------------------------------------- -# Classification - data/train/images, data/train/train_data.csv, data/val/images and data/val/val_data.csv. -# Segmentation - data/train/images, data/train/masks, data/train/train_data.csv, data/val/images, data/val/masks and data/val/val_data.csv. -# Object Detection - data/train/images, data/train/train_data.csv, data/val/images and data/val/val_data.csv. -# bboxes - [x, y, h, w, classes] - -from torch.utils.data import Dataset -import torch -import pandas as pd -import numpy as np -from PIL import Image -from utils import * -import warnings - -warnings.filterwarnings("ignore") -torch.cuda.empty_cache() - - -class ImageDataset(Dataset): - ''' - Class that takes in the path of the dataset and converts it into a torch.utils.data.Dataset object. - ''' - def __init__(self, path, mode, device, transforms=None, train=True): - super().__init__() - - self.transforms, self.mode, self.device = transforms, mode, device - self.path = path - - self.df = pd.read_csv(f"{self.path}\\data.csv")[:1] - - if mode == "classification": - self.classes = self.df["class"].unique() - self.df[self.classes] = pd.get_dummies(self.df["class"]) - del self.df["class"] - - if mode == "detection": - for i in range(len(self.df)): - self.df["labels"].iloc[i] = get_boxxes(self.df["labels"].iloc[i]) - - def __getitem__(self, idx): - - img_path = f"{self.path}\\images\\" + self.df["img_path"].iloc[idx] - img = np.array(Image.open(img_path).convert("RGB")) - - if self.mode == "classification": - - label = torch.tensor(np.array(self.df[self.classes].iloc[idx])) - - if self.transforms: - - transformed = self.transforms(image=img) - img = transformed["image"] - - elif self.mode == "detection": - - label = np.array(self.df["labels"].iloc[idx]) - - if self.transforms: - - transformed = self.transforms(image=img, bboxes=label) - img = transformed["image"] - label = transformed["bboxes"] - - label = torch.tensor(label) - - elif self.mode == "segmentation": - - img_path = f"{self.path}\\masks\\" + self.df["mask_path"].iloc[idx] - label = np.array(Image.open(img_path).convert("RGB")) - - if self.transforms: - - transformed = self.transforms(image=img, mask=label) - img = transformed["image"] - label = transformed["mask"] - - label = torch.tensor(label).permute(2, 0, 1) - - return ( - torch.tensor(img).permute(2, 0, 1).float().to(self.device), - label.float().to(self.device), - ) - - def __len__(self): - return len(self.df) - - -def get_dataset(path, mode, device, transforms=None): - ''' - Function that takes in the path and generates a trainset and valset (if present). - - path: str - mode: str - device: str - transforms: albumentations.transforms - - Returns trainset and valset - ''' - - trainset = ImageDataset( - f"{path}train\\", mode, device, transforms=transforms, train=True - ) - try: - valset = ImageDataset(f"{path}\\val\\", mode, device, train=False) - return trainset, valset - except FileNotFoundError: - return trainset - -def get_dataloader(datset, b_size, shuffle): - ''' - Converts the dataset to a DataLoader. - - dataset: torch.utils.data.Dataset - - Returns torch.utils.data.DataLoader - ''' - - return DataLoader(datset, b_size, shuffle) +# file that scans for data from ordered folders and generates DataLoader class. +# ----------------------------------------------------------------------------------------------- +# Classification - data/train/images, data/train/train_data.csv, data/val/images and data/val/val_data.csv. +# Segmentation - data/train/images, data/train/masks, data/train/train_data.csv, data/val/images, data/val/masks and data/val/val_data.csv. +# Object Detection - data/train/images, data/train/train_data.csv, data/val/images and data/val/val_data.csv. +# bboxes - [x, y, h, w, classes] + +from torch.utils.data import Dataset +import torch +import pandas as pd +import numpy as np +from PIL import Image +from utils import * +import warnings + +warnings.filterwarnings("ignore") +torch.cuda.empty_cache() + + +class ImageDataset(Dataset): + ''' + Class that takes in the path of the dataset and converts it into a torch.utils.data.Dataset object. + ''' + def __init__(self, path, mode, device, transforms=None, train=True): + super().__init__() + + self.transforms, self.mode, self.device = transforms, mode, device + self.path = path + + self.df = pd.read_csv(f"{self.path}\\data.csv")[:1] + + if mode == "classification": + self.classes = self.df["class"].unique() + self.df[self.classes] = pd.get_dummies(self.df["class"]) + del self.df["class"] + + if mode == "detection": + for i in range(len(self.df)): + self.df["labels"].iloc[i] = get_boxxes(self.df["labels"].iloc[i]) + + def __getitem__(self, idx): + + img_path = f"{self.path}\\images\\" + self.df["img_path"].iloc[idx] + img = np.array(Image.open(img_path).convert("RGB")) + + if self.mode == "classification": + + label = torch.tensor(np.array(self.df[self.classes].iloc[idx])) + + if self.transforms: + + transformed = self.transforms(image=img) + img = transformed["image"] + + elif self.mode == "detection": + + label = np.array(self.df["labels"].iloc[idx]) + + if self.transforms: + + transformed = self.transforms(image=img, bboxes=label) + img = transformed["image"] + label = transformed["bboxes"] + + label = torch.tensor(label) + + elif self.mode == "segmentation": + + img_path = f"{self.path}\\masks\\" + self.df["mask_path"].iloc[idx] + label = np.array(Image.open(img_path).convert("RGB")) + + if self.transforms: + + transformed = self.transforms(image=img, mask=label) + img = transformed["image"] + label = transformed["mask"] + + label = torch.tensor(label).permute(2, 0, 1) + + return ( + torch.tensor(img).permute(2, 0, 1).float().to(self.device), + label.float().to(self.device), + ) + + def __len__(self): + return len(self.df) + + +def get_dataset(path, mode, device, transforms=None): + ''' + Function that takes in the path and generates a trainset and valset (if present). + + path: str + mode: str + device: str + transforms: albumentations.transforms + + Returns trainset and valset + ''' + + trainset = ImageDataset( + f"{path}train\\", mode, device, transforms=transforms, train=True + ) + try: + valset = ImageDataset(f"{path}\\val\\", mode, device, train=False) + return trainset, valset + except FileNotFoundError: + return trainset + +def get_dataloader(datset, b_size, shuffle): + ''' + Converts the dataset to a DataLoader. + + dataset: torch.utils.data.Dataset + + Returns torch.utils.data.DataLoader + ''' + + return DataLoader(datset, b_size, shuffle) diff --git a/src/subpixel/train.py b/src/subpixel/vision/train.py similarity index 96% rename from src/subpixel/train.py rename to src/subpixel/vision/train.py index 4d1f300..b862bb7 100644 --- a/src/subpixel/train.py +++ b/src/subpixel/vision/train.py @@ -1,243 +1,243 @@ -from torch.utils.data import Dataset -import torch -from tqdm import tqdm -import warnings -from data import ImageDataset, get_dataloader, get_dataset -import numpy as np -import torch.nn as nn -from utils import findLR, find_batch_size, get_optimizer - - -warnings.filterwarnings("ignore") -torch.cuda.empty_cache() - -device = "cuda" if torch.cuda.is_available() else "cpu" - - -def accuracy(out: torch.Tensor, labels: torch.Tensor): - """ - Finds the accuracy of the model by comparing the output of the model to the labels. - - out: tensor - labels: tensor - """ - try: - return (out == labels).sum().item() / out.size(0) * out.size(1) * out.size(2) - except: - return (out == labels).sum().item() / out.size(0) * out.size(1) - - -class Trainer: - """ - class that has all the funcions and variables to train a model on your custom dataset. - - model: nn.Module - trainset: str or (Dataset, ImageDataset) - transforms: - optimizer: str - valset: (Dataset, ImageDataset) - epochs: int - mode: str ["classification", "detection", "segmentation"] - loss_fn: nn.Module - learning_rate: float - weight_decay: float - model_save_path: str - shuffle: bool - device: str ["cpu", "cuda"] - """ - - def __init__( - self, - model, - trainset, - transforms=None, - optimizer="adam", - valset=None, - epochs=10, - mode="classification", - loss_fn=nn.MSELoss(), - learning_rate=None, - weight_decay=1e-5, - model_save_path="./", - shuffle=True, - device="cpu", - ): - self.model = model.cuda() if device == "cuda" else model - self.valset = valset - self.epochs = epochs - self.mode = mode - self.loss_fn = loss_fn - self.weight_decay = weight_decay - self.model_save_path = model_save_path - self.learning_rate = learning_rate - self.shuffle = shuffle - self.device = device - - if isinstance(trainset, str): - try: - self.trainset, self.valset = get_dataset( - trainset, self.mode, device, transforms - ) - except: - self.trainset = get_dataset(trainset, self.mode, device, transforms) - - elif isinstance(trainset, Dataset) or isinstance(trainset, ImageDataset): - self.trainset = trainset - self.valset = valset - - self.b_size = find_batch_size(model, self.trainset) - - if learning_rate == None: - self.learning_rate = findLR( - self.model, self.trainset, self.loss_fn, optimizer - )[0] - - self.optimizer = get_optimizer( - self.model, - optim=optimizer, - lr=self.learning_rate, - weight_decay=self.weight_decay, - ) - - self.train_dl = get_dataloader(self.trainset, self.b_size, self.shuffle) - - if self.valset != None: - self.val_dl = get_dataloader(self.valset, self.b_size, self.shuffle) - - def fit(self): - """ - Function that has the training loop implemented. - It inherits all the necessary components from the Trainer class. - - Returns the loss values and acc values if applicable. - """ - - flag = self.mode == "classification" or self.mode == "detection" - scaler = torch.cuda.amp.GradScaler() - losses = {"train": [], "val": []} - acc = {"train": [], "val": []} - - for epoch in range(self.epochs): - - epoch_loss = {"train": [], "val": []} - epoch_acc = {"train": [], "val": []} - - self.model.train() - for img, label in tqdm(self.train_dl): - - with torch.cuda.amp.autocast(): - - pred = self.model(img) - loss = self.loss_fn(pred, label) - - epoch_loss["train"].append(loss) - - if self.mode == "classification": - a = accuracy(pred, label) - epoch_acc["train"].append(a) - - elif self.mode == "detection": - a = accuracy(pred[1:5], label[1:5]) - epoch_acc["train"].append(a) - - scaler.scale(loss).backward() - scaler.step(self.optimizer) - scaler.update() - self.optimizer.zero_grad() - - losses["train"].append(sum(epoch_loss["train"]) / len(epoch_loss["train"])) - - if self.valset != None: - - self.model.eval() - for img, label in tqdm(self.val_dl): - - with torch.cuda.amp.autocast(): - - pred = self.model(img) - loss = self.loss_fn(pred, label) - - epoch_loss["val"].append(loss) - - if self.mode == "classification": - a = accuracy(pred, label) - epoch_acc["val"].append(a) - - elif self.mode == "detection": - a = accuracy(pred[1:5], label[1:5]) - epoch_acc["val"].append(a) - - losses["val"].append(sum(epoch_loss["val"]) / len(epoch_loss["val"])) - - if flag: - - acc["val"].append(sum(epoch_acc["val"]) / len(epoch_acc["val"])) - acc["train"].append( - sum(epoch_acc["train"]) / len(epoch_acc["train"]) - ) - - print( - f"{epoch+1}/{self.epochs} -- Train Loss: {losses['train'][-1]} -- Train acc: {acc['train'][-1]}% -- Val Loss: {losses['val'][-1]} -- Val acc: {acc['val'][-1]}%" - ) - else: - print( - f"{epoch+1}/{self.epochs} -- Train Loss: {losses['train'][-1]} -- Val Loss: {losses['val'][-1]}" - ) - - else: - - if flag: - acc["train"].append( - sum(epoch_acc["train"]) / len(epoch_acc["train"]) - ) - - print( - f"{epoch+1}/{self.epochs} -- Train Loss: {losses['train'][-1]} -- Train acc: {acc['train'][-1]}%" - ) - else: - print( - f"{epoch+1}/{self.epochs} -- Train Loss: {losses['train'][-1]}" - ) - - torch.save(self.model, f"{self.model_save_path}\\model") - - if flag: - return losses, acc - - else: - return losses - - def test_sample(self, image, label=None): - """ - Used to test the model on one image. - - Returns the prediction. - """ - - pred = self.model(image) - - if label != None: - loss = self.loss_fn(label, pred).detach() - return pred, loss - - return pred - - def evaluate(self, test_path): - """ - Used to evaluate the model on the test dataset. - - Returns the losses. - """ - - test_dl = get_dataloader( - ImageDataset(test_path, self.mode, device), self.b_size, False - ) - losses = [] - - for img, label in test_dl: - pred = self.model(img) - loss = self.loss_fn(label, pred).detach() - losses.append(loss) - - return sum(losses) / len(losses) - +from torch.utils.data import Dataset +import torch +from tqdm import tqdm +import warnings +from data import ImageDataset, get_dataloader, get_dataset +import numpy as np +import torch.nn as nn +from utils import findLR, find_batch_size, get_optimizer + + +warnings.filterwarnings("ignore") +torch.cuda.empty_cache() + +device = "cuda" if torch.cuda.is_available() else "cpu" + + +def accuracy(out: torch.Tensor, labels: torch.Tensor): + """ + Finds the accuracy of the model by comparing the output of the model to the labels. + + out: tensor + labels: tensor + """ + try: + return (out == labels).sum().item() / out.size(0) * out.size(1) * out.size(2) + except: + return (out == labels).sum().item() / out.size(0) * out.size(1) + + +class Trainer: + """ + class that has all the funcions and variables to train a model on your custom dataset. + + model: nn.Module + trainset: str or (Dataset, ImageDataset) + transforms: + optimizer: str + valset: (Dataset, ImageDataset) + epochs: int + mode: str ["classification", "detection", "segmentation"] + loss_fn: nn.Module + learning_rate: float + weight_decay: float + model_save_path: str + shuffle: bool + device: str ["cpu", "cuda"] + """ + + def __init__( + self, + model, + trainset, + transforms=None, + optimizer="adam", + valset=None, + epochs=10, + mode="classification", + loss_fn=nn.MSELoss(), + learning_rate=None, + weight_decay=1e-5, + model_save_path="./", + shuffle=True, + device="cpu", + ): + self.model = model.cuda() if device == "cuda" else model + self.valset = valset + self.epochs = epochs + self.mode = mode + self.loss_fn = loss_fn + self.weight_decay = weight_decay + self.model_save_path = model_save_path + self.learning_rate = learning_rate + self.shuffle = shuffle + self.device = device + + if isinstance(trainset, str): + try: + self.trainset, self.valset = get_dataset( + trainset, self.mode, device, transforms + ) + except: + self.trainset = get_dataset(trainset, self.mode, device, transforms) + + elif isinstance(trainset, Dataset) or isinstance(trainset, ImageDataset): + self.trainset = trainset + self.valset = valset + + self.b_size = find_batch_size(model, self.trainset) + + if learning_rate == None: + self.learning_rate = findLR( + self.model, self.trainset, self.loss_fn, optimizer + )[0] + + self.optimizer = get_optimizer( + self.model, + optim=optimizer, + lr=self.learning_rate, + weight_decay=self.weight_decay, + ) + + self.train_dl = get_dataloader(self.trainset, self.b_size, self.shuffle) + + if self.valset != None: + self.val_dl = get_dataloader(self.valset, self.b_size, self.shuffle) + + def fit(self): + """ + Function that has the training loop implemented. + It inherits all the necessary components from the Trainer class. + + Returns the loss values and acc values if applicable. + """ + + flag = self.mode == "classification" or self.mode == "detection" + scaler = torch.cuda.amp.GradScaler() + losses = {"train": [], "val": []} + acc = {"train": [], "val": []} + + for epoch in range(self.epochs): + + epoch_loss = {"train": [], "val": []} + epoch_acc = {"train": [], "val": []} + + self.model.train() + for img, label in tqdm(self.train_dl): + + with torch.cuda.amp.autocast(): + + pred = self.model(img) + loss = self.loss_fn(pred, label) + + epoch_loss["train"].append(loss) + + if self.mode == "classification": + a = accuracy(pred, label) + epoch_acc["train"].append(a) + + elif self.mode == "detection": + a = accuracy(pred[1:5], label[1:5]) + epoch_acc["train"].append(a) + + scaler.scale(loss).backward() + scaler.step(self.optimizer) + scaler.update() + self.optimizer.zero_grad() + + losses["train"].append(sum(epoch_loss["train"]) / len(epoch_loss["train"])) + + if self.valset != None: + + self.model.eval() + for img, label in tqdm(self.val_dl): + + with torch.cuda.amp.autocast(): + + pred = self.model(img) + loss = self.loss_fn(pred, label) + + epoch_loss["val"].append(loss) + + if self.mode == "classification": + a = accuracy(pred, label) + epoch_acc["val"].append(a) + + elif self.mode == "detection": + a = accuracy(pred[1:5], label[1:5]) + epoch_acc["val"].append(a) + + losses["val"].append(sum(epoch_loss["val"]) / len(epoch_loss["val"])) + + if flag: + + acc["val"].append(sum(epoch_acc["val"]) / len(epoch_acc["val"])) + acc["train"].append( + sum(epoch_acc["train"]) / len(epoch_acc["train"]) + ) + + print( + f"{epoch+1}/{self.epochs} -- Train Loss: {losses['train'][-1]} -- Train acc: {acc['train'][-1]}% -- Val Loss: {losses['val'][-1]} -- Val acc: {acc['val'][-1]}%" + ) + else: + print( + f"{epoch+1}/{self.epochs} -- Train Loss: {losses['train'][-1]} -- Val Loss: {losses['val'][-1]}" + ) + + else: + + if flag: + acc["train"].append( + sum(epoch_acc["train"]) / len(epoch_acc["train"]) + ) + + print( + f"{epoch+1}/{self.epochs} -- Train Loss: {losses['train'][-1]} -- Train acc: {acc['train'][-1]}%" + ) + else: + print( + f"{epoch+1}/{self.epochs} -- Train Loss: {losses['train'][-1]}" + ) + + torch.save(self.model, f"{self.model_save_path}\\model") + + if flag: + return losses, acc + + else: + return losses + + def test_sample(self, image, label=None): + """ + Used to test the model on one image. + + Returns the prediction. + """ + + pred = self.model(image) + + if label != None: + loss = self.loss_fn(label, pred).detach() + return pred, loss + + return pred + + def evaluate(self, test_path): + """ + Used to evaluate the model on the test dataset. + + Returns the losses. + """ + + test_dl = get_dataloader( + ImageDataset(test_path, self.mode, device), self.b_size, False + ) + losses = [] + + for img, label in test_dl: + pred = self.model(img) + loss = self.loss_fn(label, pred).detach() + losses.append(loss) + + return sum(losses) / len(losses) + From 928b4e1797bca4610b58a625b003f06c80d96bc6 Mon Sep 17 00:00:00 2001 From: Rohit R Date: Wed, 8 Jun 2022 22:49:34 +0530 Subject: [PATCH 09/28] folder --- src/subpixel/{ml => }/experimental/gradcam.py | 0 src/subpixel/utils.py | 10 ---------- 2 files changed, 10 deletions(-) rename src/subpixel/{ml => }/experimental/gradcam.py (100%) diff --git a/src/subpixel/ml/experimental/gradcam.py b/src/subpixel/experimental/gradcam.py similarity index 100% rename from src/subpixel/ml/experimental/gradcam.py rename to src/subpixel/experimental/gradcam.py diff --git a/src/subpixel/utils.py b/src/subpixel/utils.py index b082e8f..0ade175 100644 --- a/src/subpixel/utils.py +++ b/src/subpixel/utils.py @@ -11,16 +11,6 @@ import random -# def show_batch(data): -# pass - - -# def EncodingToClass(lst, classes): - -# lst = list(lst.detach().squeeze(0).numpy()) -# return classes[lst.index(max(lst))] - - def get_boxxes(t): # '{x, y, h, w, [classes]}' -> [x, y, h, w, classes] bbox = list(json.loads(t).values()) From 3b17f9137442122ceacaeb86daa250bbc4a45047 Mon Sep 17 00:00:00 2001 From: audi1712 Date: Wed, 8 Jun 2022 22:53:03 +0530 Subject: [PATCH 10/28] import trainer fixed --- src/subpixel/model.py | 4 ++-- src/subpixel/vision/train.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/subpixel/model.py b/src/subpixel/model.py index 7286291..58d4565 100644 --- a/src/subpixel/model.py +++ b/src/subpixel/model.py @@ -4,7 +4,7 @@ import torch.nn as nn import json from torchinfo import summary -from train import Trainer +from vision.train import visionTrainer from utils import findLR, find_batch_size import numpy as np @@ -79,7 +79,7 @@ def fit(self,trainset : Union[str,nn.Module], loss_fun : nn.Module,optimizer : s valset (optional): nn.Module | None , default None, provides validation set. Note:- if trainset is str automatically valset is taken from directory structure. ''' - self.trainer = Trainer(self, trainset= trainset, epochs= 10, learning_rate= lr, loss_fn= loss_fun, optimizer= optimizer, mode= mode, valset= valset) + self.trainer = visionTrainer(self, trainset= trainset, epochs= 10, learning_rate= lr, loss_fn= loss_fun, optimizer= optimizer, mode= mode, valset= valset) self.history = self.trainer.fit() return self.history diff --git a/src/subpixel/vision/train.py b/src/subpixel/vision/train.py index b862bb7..06c2b40 100644 --- a/src/subpixel/vision/train.py +++ b/src/subpixel/vision/train.py @@ -27,7 +27,7 @@ def accuracy(out: torch.Tensor, labels: torch.Tensor): return (out == labels).sum().item() / out.size(0) * out.size(1) -class Trainer: +class visionTrainer: """ class that has all the funcions and variables to train a model on your custom dataset. From e9249fe0af4e530b80946dd2f2358ebfc825c11b Mon Sep 17 00:00:00 2001 From: audi1712 Date: Wed, 8 Jun 2022 22:54:17 +0530 Subject: [PATCH 11/28] test errors --- src/subpixel/test.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/subpixel/test.py b/src/subpixel/test.py index 7883746..ac992a2 100644 --- a/src/subpixel/test.py +++ b/src/subpixel/test.py @@ -5,7 +5,7 @@ from torch.utils import data import torchvision from model import Model -from data import ImageDataset +from vision.data import ImageDataset # dataset = torchvision.datasets.FashionMNIST("./", download=True) @@ -45,8 +45,8 @@ def test(self): -datase = Datas(dataset_path) -model = Model().cuda() -tes = Test(model, datase, loss_fun=nn.MSELoss()) -tes.test() +#datase = Datas(dataset_path) +#model = Model().cuda() +#tes = Test(model, datase, loss_fun=nn.MSELoss()) +#tes.test() From c930cb59b1970f919ee4803fe945865c653b3018 Mon Sep 17 00:00:00 2001 From: Rohit R Date: Mon, 13 Jun 2022 13:38:14 +0530 Subject: [PATCH 12/28] chuma --- src/subpixel/ml/tabular.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/subpixel/ml/tabular.py b/src/subpixel/ml/tabular.py index 7daa5d4..b36a289 100644 --- a/src/subpixel/ml/tabular.py +++ b/src/subpixel/ml/tabular.py @@ -2,3 +2,4 @@ import numpy as np import pandas as pd +# changes From 39da4c71afee71b71f716b74f57d5e3d620d84df Mon Sep 17 00:00:00 2001 From: Rohit R Date: Thu, 23 Jun 2022 18:37:16 +0530 Subject: [PATCH 13/28] finally ml --- src/subpixel/ml/tabular.py | 5 -- src/subpixel/ml/utils.py | 180 +++++++++++++++++++++++++++++++++++++ 2 files changed, 180 insertions(+), 5 deletions(-) delete mode 100644 src/subpixel/ml/tabular.py create mode 100644 src/subpixel/ml/utils.py diff --git a/src/subpixel/ml/tabular.py b/src/subpixel/ml/tabular.py deleted file mode 100644 index b36a289..0000000 --- a/src/subpixel/ml/tabular.py +++ /dev/null @@ -1,5 +0,0 @@ -import sklearn -import numpy as np -import pandas as pd - -# changes diff --git a/src/subpixel/ml/utils.py b/src/subpixel/ml/utils.py new file mode 100644 index 0000000..a2222f3 --- /dev/null +++ b/src/subpixel/ml/utils.py @@ -0,0 +1,180 @@ +from statistics import median +import sklearn +import matplotlib.pyplot as plt +import seaborn as sns +import itertools +from sklearn.ensemble import ExtraTreesClassifier +import pandas as pd + + +def accuracy(y_true, y_pred): + return sklearn.metrics.accuracy_score(y_true, y_pred) + + +def correlation_matrix(df, cols=False): + + if cols: + df = df[cols] + + return df.corr() + + +def find_outliers(df, cols=False, remove=False): + + if cols: + df = df[cols] + numeric_cols = df._get_numeric_data().columns.tolist() + else: + numeric_cols = df._get_numeric_data().columns.tolist() + + outliers = {} + + for col in numeric_cols: + + outlier_list = [] + + q1 = df[col].quantile(0.25) + q3 = df[col].quantile(0.75) + iqr = q3 - q1 + + low_bound = q1 - (iqr * 1.5) + high_bound = q3 + (iqr * 1.5) + + for i, val in enumerate(df[col]): + if val < low_bound or val > high_bound: + outlier_list.append(i) + + if remove: + df.drop(df.index[i], inplace=True) + + outliers[col] = outlier_list + + return outliers + + +def boxplot(df, cols=False): + + if cols: + df = df[cols] + numeric_cols = df._get_numeric_data().columns.tolist() + else: + numeric_cols = df._get_numeric_data().columns.tolist() + + i = 1 + plt.figure(figsize=(15, 25)) + for col in numeric_cols: + plt.subplot(6, 3, i) + sns.boxplot(y=df[col], color="green") + i += 1 + + plt.show() + + +def get_combinations(list_of_values): + return list(itertools.combinations(list_of_values, 2)) + + +# NEEDS TO BE CHANGED TO DISPLAY IN SAME PAGE +def feature_correlation(df, cols=False, kind="reg"): + + if cols: + lst = get_combinations(cols) + + else: + lst = get_combinations(df.columns) + + for i, j in lst: + + sns.jointplot(x=i, y=j, data=df, kind=kind, truncate=False, color="m", height=7) + + plt.show() + + +def fill_nan_with_mean(df): + for col in df.columns: + df[col] = df[col].fillna(df[col].mean()) + return df + + +def delete_row_with_nan(df): + df.dropna(inplace=True) + return df + + +def pie_chart(df, col): + + df[col].value_counts().plot(kind="pie", autopct="%1.1f%%") + plt.show() + + +def count_plot(df, col): + + df[col].value_counts().plot(kind="bar") + plt.show() + + +def feature_importance(x, y, show_plot=False): + model = ExtraTreesClassifier() + model.fit(x, y) + + feat_importances = pd.Series(model.feature_importances_, index=x.columns) + + if show_plot: + feat_importances.nlargest(12).plot(kind="barh") + plt.show() + + return feat_importances + + +def histogram(df, cols, bins=10): + + n = len(cols) + + plt.figure(figsize=(10, 10)) + + for i, col in enumerate(cols): + plt.subplot(n, 1, i + 1) + sns.histplot( + df[col], + bins=bins, + color="Red", + kde_kws={"color": "y", "lw": 3, "label": "KDE"}, + ) + + plt.show() + + +def get_median(df, col): + return df[col].median() + + +def get_mean(df, col): + return df[col].mean() + + +def check_for_outliers(df, cols=False, threshold=10): + + cols = df.columns if cols is False else cols + + cols_with_outliers = [] + + for col in cols: + mean = get_mean(df, col) + median = get_median(df, col) + + if abs(mean - median) > (threshold / 100) * max(mean, median): + cols_with_outliers.append(col) + + return cols_with_outliers + + +def get_correlation_with_target(df, target, cols=False): + + if cols: + df = df[cols] + + return df.corrwith(target).sort_values(ascending=False) + + +def get_kurtosis(df, col): + return df[col].kurtosis() From 610e38396859dca5ae74e030b5069f9e843ec139 Mon Sep 17 00:00:00 2001 From: Rohit R Date: Thu, 23 Jun 2022 18:38:09 +0530 Subject: [PATCH 14/28] . --- src/subpixel/ml/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/subpixel/ml/utils.py b/src/subpixel/ml/utils.py index a2222f3..040c9ff 100644 --- a/src/subpixel/ml/utils.py +++ b/src/subpixel/ml/utils.py @@ -1,4 +1,3 @@ -from statistics import median import sklearn import matplotlib.pyplot as plt import seaborn as sns From 1b53cd2c4397d17b6ccb9396f31d2385d1788e32 Mon Sep 17 00:00:00 2001 From: Rohit R Date: Thu, 23 Jun 2022 19:56:41 +0530 Subject: [PATCH 15/28] added training --- src/subpixel/ml/train.py | 74 ++++++++++++++++++++++++++++++++++++++++ src/subpixel/ml/utils.py | 32 +++++++++++++++++ 2 files changed, 106 insertions(+) create mode 100644 src/subpixel/ml/train.py diff --git a/src/subpixel/ml/train.py b/src/subpixel/ml/train.py new file mode 100644 index 0000000..a28606f --- /dev/null +++ b/src/subpixel/ml/train.py @@ -0,0 +1,74 @@ +import sklearn +from sklearn.utils import resample +from sklearn.neural_network import MLPClassifier +from sklearn.neighbors import KNeighborsClassifier +from sklearn.svm import SVC +from sklearn.gaussian_process import GaussianProcessClassifier +from sklearn.gaussian_process.kernels import RBF +from sklearn.tree import DecisionTreeClassifier +from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier +from sklearn.naive_bayes import GaussianNB +from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis + + +class Regression: + def __init__(self, df, target_col, type=None): + + self.df = df + self.target_col = target_col + self.X = self.df[self.df.columns.difference([self.target_col])] + self.y = self.df[self.target_col] + self.type = type + + self.model_dict = { + "Nearest Neighbors": KNeighborsClassifier(3), + "Linear SVM": SVC(kernel="linear", C=0.025), + "RBF SVM": SVC(gamma=2, C=1), + "Gaussian Process": GaussianProcessClassifier(1.0 * RBF(1.0)), + "Decision Tree": DecisionTreeClassifier(max_depth=5), + "Random Forest": RandomForestClassifier( + max_depth=5, n_estimators=10, max_features=1 + ), + "Neural Net": MLPClassifier(alpha=1), + "AdaBoost": AdaBoostClassifier(), + "Naive Bayes": GaussianNB(), + "QDA": QuadraticDiscriminantAnalysis(), + } + + if self.type == "linear": + self.model = sklearn.linear_model.LinearRegression() + else: + self.model = self.model_dict[self.find_classfier()[0]] + + self.model.fit(self.X, self.y) + + def find_classfier(self): + + _THRESHOLD = 500 + + if len(self.df) > _THRESHOLD: + self._X = resample(self.X, replace=False, n_samples=_THRESHOLD) + self._y = resample(self.y, replace=False, n_samples=_THRESHOLD) + + else: + self._X = self.X + self._y = self.y + + model_scores = {} + + for name, model in zip(self.model_dict.keys(), self.model_dict.values()): + model.fit(self._X, self._y) + model_scores[name] = model.score(self._X, self._y) + + model_scores = dict(sorted(model_scores.items(), key=lambda item: item[1])) + + return list(model_scores.keys())[0], model_scores + + def predict(self, df): + return self.model.predict(df) + + def score(self): + return self.model.score(self.X, self.y) + + def score_with_test(self, X_test, y_test): + return self.model.score(X_test, y_test) diff --git a/src/subpixel/ml/utils.py b/src/subpixel/ml/utils.py index 040c9ff..07c90a9 100644 --- a/src/subpixel/ml/utils.py +++ b/src/subpixel/ml/utils.py @@ -177,3 +177,35 @@ def get_correlation_with_target(df, target, cols=False): def get_kurtosis(df, col): return df[col].kurtosis() + + +def get_skewness(df, col): + return df[col].skew() + + +def get_variance(df, col): + return df[col].var() + + +def get_count_of_unique_values(df, col): + return df[col].nunique() + + +def get_statistics(df, cols=False): + + if not cols: + cols = df.columns + + stats = {} + + for col in cols: + stats[col] = {} + stats[col]["unique_count"] = get_count_of_unique_values(df, col) + stats[col]["mean"] = get_mean(df, col) + stats[col]["median"] = get_median(df, col) + stats[col]["variance"] = get_variance(df, col) + stats[col]["skewness"] = get_skewness(df, col) + stats[col]["kurtosis"] = get_kurtosis(df, col) + + return stats + From 4dd99692da7a48a679a4a8d3d44f857ed4b8f6b8 Mon Sep 17 00:00:00 2001 From: Rohit R Date: Thu, 23 Jun 2022 20:22:41 +0530 Subject: [PATCH 16/28] docs --- src/subpixel/ml/train.py | 34 ++++++++ src/subpixel/ml/utils.py | 165 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 196 insertions(+), 3 deletions(-) diff --git a/src/subpixel/ml/train.py b/src/subpixel/ml/train.py index a28606f..bc07287 100644 --- a/src/subpixel/ml/train.py +++ b/src/subpixel/ml/train.py @@ -12,7 +12,17 @@ class Regression: + """ + Class that contains all the variables and funtions to train a model on the given data. + """ + def __init__(self, df, target_col, type=None): + """ + Init funtion of the Regression class. + :param df: Dataframe + :param target_col: Target column + :param type: Type of the model to train. + """ self.df = df self.target_col = target_col @@ -43,6 +53,11 @@ def __init__(self, df, target_col, type=None): self.model.fit(self.X, self.y) def find_classfier(self): + """ + Finds the best classifier for the given data. + + :return: Name of the best classifier and the model. + """ _THRESHOLD = 500 @@ -65,10 +80,29 @@ def find_classfier(self): return list(model_scores.keys())[0], model_scores def predict(self, df): + """ + Gets the predictions for the given data. + :param df: Dataframe + + :return: Predictions + """ + return self.model.predict(df) def score(self): + """ + Gets the score of the model on train data. + + :return: Score + """ + return self.model.score(self.X, self.y) def score_with_test(self, X_test, y_test): + """ + Gets the score of the model on test data. + + :param X_test: Score on Test data + """ + return self.model.score(X_test, y_test) diff --git a/src/subpixel/ml/utils.py b/src/subpixel/ml/utils.py index 07c90a9..bdcd7d6 100644 --- a/src/subpixel/ml/utils.py +++ b/src/subpixel/ml/utils.py @@ -7,10 +7,24 @@ def accuracy(y_true, y_pred): + """ + Function that finds the accuracy of a model based on the true and predicted values. + :param y_true: True values + :param y_pred: Predicted values + + :return: Accuracy of the model + """ + return sklearn.metrics.accuracy_score(y_true, y_pred) def correlation_matrix(df, cols=False): + """ + Gets the correlation matrix of the dataframe. + :param df: Dataframe + + :return: Correlation matrix + """ if cols: df = df[cols] @@ -19,10 +33,17 @@ def correlation_matrix(df, cols=False): def find_outliers(df, cols=False, remove=False): + """ + Finds outliers in each column of the dataframe. + :param df: Dataframe + :param cols: Columns to check for outliers + :param remove: If True, removes outliers from the dataframe + + :return: list of outliers and dataframe without outliers if remove is True. + """ if cols: - df = df[cols] - numeric_cols = df._get_numeric_data().columns.tolist() + numeric_cols = df[cols]._get_numeric_data().columns.tolist() else: numeric_cols = df._get_numeric_data().columns.tolist() @@ -48,10 +69,19 @@ def find_outliers(df, cols=False, remove=False): outliers[col] = outlier_list - return outliers + if remove: + return outliers, df + else: + return outliers def boxplot(df, cols=False): + """ + Shows a boxplot of the dataframe. + :param df: Dataframe + + :return: None + """ if cols: df = df[cols] @@ -70,11 +100,25 @@ def boxplot(df, cols=False): def get_combinations(list_of_values): + """ + Gets the combinations of the list of values. + :param list_of_values: List of values + + :return: List of combinations + """ return list(itertools.combinations(list_of_values, 2)) # NEEDS TO BE CHANGED TO DISPLAY IN SAME PAGE def feature_correlation(df, cols=False, kind="reg"): + """ + Gets the correlation matrix of the dataframe. + :param df: Dataframe + :param cols: Columns to check for outliers + :param kind: Type of plot to show + + :return: None + """ if cols: lst = get_combinations(cols) @@ -90,29 +134,66 @@ def feature_correlation(df, cols=False, kind="reg"): def fill_nan_with_mean(df): + """ + Fills the NaN values with the mean of the column. + :param df: Dataframe + + :return: Dataframe with NaN values filled with mean. + """ + for col in df.columns: df[col] = df[col].fillna(df[col].mean()) return df def delete_row_with_nan(df): + """ + Delete rows with NaN values. + :param df: Dataframe + + :return: Dataframe without rows with NaN values. + """ + df.dropna(inplace=True) return df def pie_chart(df, col): + """ + Pie chart of the dataframe. + :param df: Dataframe + :param col: Column to show in the pie chart + + :return: None + """ df[col].value_counts().plot(kind="pie", autopct="%1.1f%%") plt.show() def count_plot(df, col): + """ + Count chart of the dataframe. + :param df: Dataframe + :param col: Column to show in the count chart + + :return: None + """ df[col].value_counts().plot(kind="bar") plt.show() def feature_importance(x, y, show_plot=False): + """ + Gets the important features for the given target column. + :param x: Dataframe + :param y: Target column + :param show_plot: If True, shows the plot of the feature importance. + + :return: List of important features + """ + model = ExtraTreesClassifier() model.fit(x, y) @@ -126,6 +207,14 @@ def feature_importance(x, y, show_plot=False): def histogram(df, cols, bins=10): + """ + Shows a histogram of the dataframe. + :param df: Dataframe + :param cols: Columns to show in the histogram + :param bins: Number of bins in the histogram + + :return: None + """ n = len(cols) @@ -144,14 +233,38 @@ def histogram(df, cols, bins=10): def get_median(df, col): + """ + Gets the median of the column. + :param df: Dataframe + :param col: Column to get the median of + + :return: Median of the column + """ + return df[col].median() def get_mean(df, col): + """ + Gets the mean of the column. + :param df: Dataframe + :param col: Column to get the mean of + + :return: Mean of the column + """ + return df[col].mean() def check_for_outliers(df, cols=False, threshold=10): + """ + Finds columns that might have outliers in the dataframe. + :param df: Dataframe + :param cols: Columns to check for outliers + :param threshold: Threshold for deviation of mean from median + + :return: List of columns with outliers + """ cols = df.columns if cols is False else cols @@ -168,6 +281,13 @@ def check_for_outliers(df, cols=False, threshold=10): def get_correlation_with_target(df, target, cols=False): + """ + Gets the correlation between the target column and the other columns. + :param df: Dataframe + :param target: Target column + + :return: List of correlations + """ if cols: df = df[cols] @@ -176,22 +296,61 @@ def get_correlation_with_target(df, target, cols=False): def get_kurtosis(df, col): + """ + Gets the kurtosis of the column. + :param df: Dataframe + :param col: Column to get the kurtosis of + + :return: Kurtosis of the column + """ + return df[col].kurtosis() def get_skewness(df, col): + """ + Gets the skewness of the column. + :param df: Dataframe + :param col: Column to get the skewness of + + :return: skewness of the column + """ + return df[col].skew() def get_variance(df, col): + """ + Gets the variance of the column. + :param df: Dataframe + :param col: Column to get the variance of + + :return: variance of the column + """ + return df[col].var() def get_count_of_unique_values(df, col): + """ + Gets the count of unique values in the column. + :param df: Dataframe + :param col: Column to get the count of unique values of + + :return: count of unique values in the column + """ + return df[col].nunique() def get_statistics(df, cols=False): + """ + Gets the statistics of the dataframe. + :param df: Dataframe + :param cols: Columns to get the statistics of + + :return: Dictionary with the statistics + """ if not cols: cols = df.columns From 4bad47a3b9d086a358c47d0fcffbcd07c3144320 Mon Sep 17 00:00:00 2001 From: Rohit R Date: Mon, 27 Jun 2022 22:09:19 +0530 Subject: [PATCH 17/28] . --- src/subpixel/experimental/cpcv2.py | 76 ++++++++++++++++++++++++++++++ src/subpixel/ml/display.py | 0 2 files changed, 76 insertions(+) create mode 100644 src/subpixel/experimental/cpcv2.py create mode 100644 src/subpixel/ml/display.py diff --git a/src/subpixel/experimental/cpcv2.py b/src/subpixel/experimental/cpcv2.py new file mode 100644 index 0000000..50a4e0c --- /dev/null +++ b/src/subpixel/experimental/cpcv2.py @@ -0,0 +1,76 @@ +import torch +import numpy as np +import torch.nn as nn +from torch.autograd import Variable +import os +import time +import random +import torchvision.transforms as ttf +import cv2 +import matplotlib.pylab as plt + + +# WRONG +def get_overlapping_grids(img, kernal_size, stride): + + h, w, _ = img.shape + csteps = int((h / stride) - 1) + rsteps = int((w / stride) - 1) + + + crops = [] + img_full = np.random.randn(csteps * kernal_size, rsteps * kernal_size, 3) + + for i in range(csteps): + for j in range(rsteps): + crop = np.array( + img[ + stride * i : stride * i + kernal_size, + stride * j : stride * j + kernal_size, + :, + ] + ) + + img_full[ + kernal_size * i : kernal_size * i + kernal_size, + kernal_size * j : kernal_size * j + kernal_size, + :, + ] = crop + + crops.append(crop) + + return crops, img_full + + +def read_image(filename, resize=False): + + image = cv2.imread(filename) + + try: + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + except: + image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + + if resize: + image = cv2.resize(image, resize) + + return image + + +def display_images(images, nrows=3, ncols=3, cmap=None, title=None): + + fig, ax = plt.subplots(nrows=nrows, ncols=ncols, figsize=(10, 10)) + if title: + fig.suptitle(title, fontsize=20) + for i in range(ncols): + for j in range(nrows): + ax[i][j].imshow(images[i], cmap=cmap) + ax[i][j].axis("off") + plt.show() + + +img = read_image("D:\\Desktop\\test.jpeg") +grids, full = get_overlapping_grids(img, kernal_size=100, stride=100) +display_images(grids) +plt.imshow(full) +plt.show() \ No newline at end of file diff --git a/src/subpixel/ml/display.py b/src/subpixel/ml/display.py new file mode 100644 index 0000000..e69de29 From a26bf664e2cc4d104462dec5c4b3b8d54950bd95 Mon Sep 17 00:00:00 2001 From: Rohit R Date: Thu, 30 Jun 2022 11:41:52 +0530 Subject: [PATCH 18/28] model results --- src/subpixel/model_results.py | 42 +++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 src/subpixel/model_results.py diff --git a/src/subpixel/model_results.py b/src/subpixel/model_results.py new file mode 100644 index 0000000..f5c146c --- /dev/null +++ b/src/subpixel/model_results.py @@ -0,0 +1,42 @@ +import torch +import json +import matplotlib.pyplot as plt +import seaborn as sns + + +class ModelResults(): + def __init__(self, model, result_dict, output_path, testset= None): + + self.model = model + self.result_dict = result_dict + self.output_path = output_path + + if testset: + self.testset = testset + + self.epochs = result_dict['epochs'] + self.loss = result_dict['loss'] + self.acc = result_dict['acc'] + self.lr = result_dict['lr'] + + def save_results(self): + + plt.plot(self.epochs, self.loss) + plt.xlabel('Epochs') + plt.ylabel('Loss') + plt.savefig(self.output_path + 'loss_vs_epochs.png') + plt.close() + + plt.plot(self.epochs, self.lr) + plt.xlabel('Epochs') + plt.ylabel('Learning Rate') + plt.savefig(self.output_path + 'loss_vs_lr.png') + plt.close() + + if self.acc: + plt.plot(self.epochs, self.acc) + plt.xlabel('Epochs') + plt.ylabel('Accuracy') + plt.savefig(self.output_path + 'acc_vs_epochs.png') + plt.close() + From 69f211e3ae784bcf8e118b9ed029ebac91b4a94e Mon Sep 17 00:00:00 2001 From: Rohit R Date: Sun, 3 Jul 2022 14:03:38 +0530 Subject: [PATCH 19/28] new shit --- src/subpixel/experimental/cpcv2.py | 213 ++++++++++++++++++++++++----- src/subpixel/ml/model.py | 26 ++++ 2 files changed, 204 insertions(+), 35 deletions(-) create mode 100644 src/subpixel/ml/model.py diff --git a/src/subpixel/experimental/cpcv2.py b/src/subpixel/experimental/cpcv2.py index 50a4e0c..d9a4150 100644 --- a/src/subpixel/experimental/cpcv2.py +++ b/src/subpixel/experimental/cpcv2.py @@ -1,45 +1,50 @@ import torch import numpy as np import torch.nn as nn -from torch.autograd import Variable import os import time import random import torchvision.transforms as ttf +from torch.utils.data import Dataset, DataLoader, random_split import cv2 import matplotlib.pylab as plt +import warnings +warnings.filterwarnings("ignore") -# WRONG -def get_overlapping_grids(img, kernal_size, stride): - - h, w, _ = img.shape - csteps = int((h / stride) - 1) - rsteps = int((w / stride) - 1) +DEVICE = "cuda" if torch.cuda.is_available() else "cpu" - crops = [] - img_full = np.random.randn(csteps * kernal_size, rsteps * kernal_size, 3) +def get_grids(image, grid_size, overlap): - for i in range(csteps): - for j in range(rsteps): - crop = np.array( - img[ - stride * i : stride * i + kernal_size, - stride * j : stride * j + kernal_size, - :, - ] - ) - - img_full[ - kernal_size * i : kernal_size * i + kernal_size, - kernal_size * j : kernal_size * j + kernal_size, - :, - ] = crop + try: + h, w, _ = image.shape + except: + h, w = image.shape + + try: + h_grid, w_grid = grid_size + except: + h_grid, w_grid = grid_size, grid_size + + h_steps = (h - h_grid) // (h_grid * (1 - overlap)) + 1 + w_steps = w / w_grid + + grids = [] - crops.append(crop) + for i in range(int(h_steps)): + for j in range(int(w_steps)): - return crops, img_full + if j == 0: + w_start = 0 + else: + w_start = w_grid * (j - overlap) + w_start = int(np.round(w_start)) + + grid = image[i * h_grid : (i + 1) * h_grid, w_start : w_start + w_grid, :] + grids.append(grid) + + return grids def read_image(filename, resize=False): @@ -57,20 +62,158 @@ def read_image(filename, resize=False): return image -def display_images(images, nrows=3, ncols=3, cmap=None, title=None): - +def display_images(images, nrows=4, ncols=3, cmap=None, title=None): + fig, ax = plt.subplots(nrows=nrows, ncols=ncols, figsize=(10, 10)) if title: fig.suptitle(title, fontsize=20) + c = 0 for i in range(ncols): for j in range(nrows): - ax[i][j].imshow(images[i], cmap=cmap) - ax[i][j].axis("off") + ax[j][i].imshow(images[c], cmap=cmap) + ax[j][i].axis("off") + c += 1 plt.show() -img = read_image("D:\\Desktop\\test.jpeg") -grids, full = get_overlapping_grids(img, kernal_size=100, stride=100) -display_images(grids) -plt.imshow(full) -plt.show() \ No newline at end of file +class CPC_Dataset(Dataset): + def __init__(self, path, grid_size, overlap, transform=None): + + self.path = path + self.transform = transform + self.images = os.listdir(path) + self.images.sort() + self.grid_size, self.overlap = grid_size, overlap + + def __len__(self): + return len(self.images) + + def __getitem__(self, idx): + + img_path = os.path.join(self.path, self.images[idx]) + img = read_image(img_path) + grids = get_grids(img, self.grid_size, self.overlap) + + if self.transform: + for i, grid in enumerate(grids): + grids[i] = self.transform(grid) + + +class BasicBlock(nn.Module): + def __init__(self, in_channels, out_channels, stride=(2, 2)): + super(BasicBlock, self).__init__() + + self.stride = stride + + self.conv1 = nn.Conv2d(in_channels, out_channels, (3, 3), stride, (1, 1)) + self.bn = nn.BatchNorm2d(out_channels) + self.relu = nn.ReLU() + + self.conv2 = nn.Conv2d(out_channels, out_channels, (3, 3), (1, 1), (1, 1)) + + self.up = nn.Conv2d(in_channels, out_channels, (1, 1), (2, 2)) + + def forward(self, x): + + x_ = self.relu(self.bn(self.conv1(x))) + x_ = self.bn(self.conv2(x_)) + + if self.stride == (2, 2): + x = self.bn(self.up(x)) + + return x_ + x + + +class Resnet18(nn.Module): + def __init__(self): + super(Resnet18, self).__init__() + + self.conv1 = nn.Conv2d(3, 32, (7, 7), (2, 2), (3, 3), bias=False) + self.bn = nn.BatchNorm2d(32) + self.relu = nn.ReLU() + self.maxpool = nn.MaxPool2d(3, 2, 1, 1) + self.avgpool = nn.AdaptiveAvgPool2d(output_size=(1, 1)) + + self.layer1 = nn.ModuleList( + [BasicBlock(32, 32, stride=(1, 1)), BasicBlock(32, 32, stride=(1, 1))] + ) + self.layer2 = nn.ModuleList( + [BasicBlock(32, 64), BasicBlock(64, 64, stride=(1, 1))] + ) + self.layer3 = nn.ModuleList( + [BasicBlock(64, 128), BasicBlock(128, 128, stride=(1, 1))] + ) + self.layer4 = nn.ModuleList( + [BasicBlock(128, 256), BasicBlock(256, 256, stride=(1, 1))] + ) + + def forward(self, x): + + x = self.maxpool(self.relu(self.bn(self.conv1(x)))) + + for layer in self.layer1: + x = layer(x) + + for layer in self.layer2: + x = layer(x) + + for layer in self.layer3: + x = layer(x) + + for layer in self.layer4: + x = layer(x) + + x = self.avgpool(x) + return x + + +# class CPC_Model(nn.Module): +# def __init__(self): +# super().__init__() + +# self.model = Resnet18() + +# try: +# self.model = torch.load(r"../input/mri-scan/Encoder_2").to(DEVICE) +# except: +# pass + +# self.net = nn.Sequential( +# nn.Conv2d(256, 128, 1, 1), +# nn.Conv2d(128, 128, 1, 1), +# nn.Conv2d(128, 256, 1, 1), +# ) + +# def forward(self, crops): + +# embedding = self.model(crops[0].to(DEVICE)) +# for crop in crops[1:]: +# emb = self.model(crop.to(DEVICE)) +# embedding = torch.cat([embedding, emb], dim=0) + +# context = embedding.reshape((1, 256, 6, 6)) + +# if np.random.rand(1)[0] > 0.5: +# if np.random.rand(1)[0] > 0.5: +# top_half = context[:, :, :3, :] +# bottom_half = context[:, :, 3:, :] + +# return self.net(top_half) + +# else: +# bottom_half = context[:, :, 3:, :] +# top_half = context[:, :, :3, :] + +# return self.net(bottom_half) +# else: +# if np.random.rand(1)[0] > 0.5: +# right_half = context[:, :, :, 3:] +# left_half = context[:, :, :, :3] + +# return self.net(right_half) + +# else: +# left_half = context[:, :, :, :3] +# right_half = context[:, :, :, 3:] + +# return self.net(left_half) diff --git a/src/subpixel/ml/model.py b/src/subpixel/ml/model.py new file mode 100644 index 0000000..cc94d6b --- /dev/null +++ b/src/subpixel/ml/model.py @@ -0,0 +1,26 @@ +import torch +import torch.nn as nn +from tab_transformer_pytorch import TabTransformer + + +cont_mean_std = torch.randn(10, 2) + +## EXPERMIENTAL ## +# If normal ML models don't give good results and if the dataset is big enogh to use TabTransformer. +# Use exisiting train function or make a new train function to train TabTransformer on the custom dataset. +# Try to maybe find methods to find the best parameters for TabTransformer for the given task. + + +model = TabTransformer( + categories=(10, 5, 6, 5, 8), # tuple containing the number of unique values within each category + num_continuous=10, # number of continuous values + dim=32, # dimension, paper set at 32 + dim_out=1, # binary prediction, but could be anything + depth=6, # depth, paper recommended 6 + heads=8, # heads, paper recommends 8 + attn_dropout=0.1, # post-attention dropout + ff_dropout=0.1, # feed forward dropout + mlp_hidden_mults=(4, 2), # relative multiples of each hidden dimension of the last mlp to logits + mlp_act=nn.ReLU(), # activation for final mlp, defaults to relu, but could be anything else (selu etc) + continuous_mean_std=cont_mean_std, # (optional) - normalize the continuous values before layer norm +) From cfdbfca09c381bfee1b88bd12720bfb381c15117 Mon Sep 17 00:00:00 2001 From: Rohit R Date: Sun, 3 Jul 2022 15:51:34 +0530 Subject: [PATCH 20/28] gradcam --- src/subpixel/experimental/gradcam.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/subpixel/experimental/gradcam.py b/src/subpixel/experimental/gradcam.py index e69de29..7cc011a 100644 --- a/src/subpixel/experimental/gradcam.py +++ b/src/subpixel/experimental/gradcam.py @@ -0,0 +1,18 @@ +from torchcam.methods import SmoothGradCAMpp +import cv2 +import torch +from torchvision.transforms.functional import normalize + + +def get_activationMap(model, image, device='cpu'): + + cam_extractor = SmoothGradCAMpp(model) + + if isinstance(image, str): + image = cv2.cvtColor(cv2.imread(image), cv2.COLOR_BGR2RGB) + image = torch.tensor(image).permute(2, 0, 1).float() + image = normalize(image / 255., [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) + + out = model(image.unsqueeze(0).to(device)) + + return cam_extractor(out.squeeze(0).argmax().item(), out) From 7bde72fcfd188bd83b565edbec50020eac5b6982 Mon Sep 17 00:00:00 2001 From: Rohit R Date: Sun, 3 Jul 2022 16:02:29 +0530 Subject: [PATCH 21/28] gradcam bbox --- src/subpixel/experimental/gradcam.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/subpixel/experimental/gradcam.py b/src/subpixel/experimental/gradcam.py index 7cc011a..69b7ef2 100644 --- a/src/subpixel/experimental/gradcam.py +++ b/src/subpixel/experimental/gradcam.py @@ -2,6 +2,7 @@ import cv2 import torch from torchvision.transforms.functional import normalize +import numpy as np def get_activationMap(model, image, device='cpu'): @@ -16,3 +17,17 @@ def get_activationMap(model, image, device='cpu'): out = model(image.unsqueeze(0).to(device)) return cam_extractor(out.squeeze(0).argmax().item(), out) + + +def get_bbox(activation_maps, image_shape= None, threshold=0.5): + + if image_shape: + activation_maps = cv2.resize(activation_maps, image_shape) + + activation_map = activation_maps[0] + activation_map = activation_map > threshold + contours, _ = cv2.findContours(activation_map.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + + bbox = cv2.boundingRect(contours[0]) + + return bbox \ No newline at end of file From 1c6a3898fd61e59e252c33891e7050cf6e047e8e Mon Sep 17 00:00:00 2001 From: Rohit R Date: Sun, 3 Jul 2022 16:13:56 +0530 Subject: [PATCH 22/28] . --- src/subpixel/experimental/gradcam.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/subpixel/experimental/gradcam.py b/src/subpixel/experimental/gradcam.py index 69b7ef2..ca2613f 100644 --- a/src/subpixel/experimental/gradcam.py +++ b/src/subpixel/experimental/gradcam.py @@ -21,13 +21,13 @@ def get_activationMap(model, image, device='cpu'): def get_bbox(activation_maps, image_shape= None, threshold=0.5): - if image_shape: - activation_maps = cv2.resize(activation_maps, image_shape) - - activation_map = activation_maps[0] - activation_map = activation_map > threshold - contours, _ = cv2.findContours(activation_map.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - - bbox = cv2.boundingRect(contours[0]) - - return bbox \ No newline at end of file + if image_shape: + activation_maps = cv2.resize(activation_maps, image_shape) + + activation_map = activation_maps[0] + activation_map = activation_map > threshold + contours, _ = cv2.findContours(activation_map.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + + bbox = cv2.boundingRect(contours[0]) + + return bbox \ No newline at end of file From 946d6022f1b09ebebc135bbf2ce17ae37f4566a3 Mon Sep 17 00:00:00 2001 From: Rohit R Date: Mon, 4 Jul 2022 21:56:29 +0530 Subject: [PATCH 23/28] . --- src/subpixel/ml/EDA.py | 51 ++++++++++++++++++++++++++++++++++++++++ src/subpixel/ml/utils.py | 7 +++--- 2 files changed, 55 insertions(+), 3 deletions(-) create mode 100644 src/subpixel/ml/EDA.py diff --git a/src/subpixel/ml/EDA.py b/src/subpixel/ml/EDA.py new file mode 100644 index 0000000..e013277 --- /dev/null +++ b/src/subpixel/ml/EDA.py @@ -0,0 +1,51 @@ +from utils import * + + +class EDA: + def __init__(self, df, target_col=None): + self.df = df + self.target_col = target_col + + def show_corrMatrix(self): + return correlation_matrix(self.df) + + def get_importantFeatures(self): + + if self.target_col: + imp_features = feature_importance(self.df, self.target_col) + return imp_features + else: + raise Exception("Target column not specified.") + + def deal_withNaN(self, method="mean"): + if method == "mean": + return fill_nan_with_mean(self.df) + elif method == "delete": + return delete_row_with_nan(self.df) + else: + raise Exception("Method not supported.") + + def check_and_deal_wtihOutliers(self): + + cols_with_outliers = check_for_outliers(self.df) + if cols_with_outliers: + _, df = find_outliers(self.df, cols=cols_with_outliers, remove=True) + return df + else: + return None + + def data_stats(self): + return get_statistics(self.df) + + def show_chart(self, df, col, chart="pie"): + + if chart == "pie": + pie_chart(self.df, col) + elif chart == "count": + count_plot(self.df, col) + elif chart == "hist": + histogram(self.df, col) + elif chart == "box": + boxplot(self.df, col) + else: + raise Exception("Chart not supported.") diff --git a/src/subpixel/ml/utils.py b/src/subpixel/ml/utils.py index bdcd7d6..de3493f 100644 --- a/src/subpixel/ml/utils.py +++ b/src/subpixel/ml/utils.py @@ -29,7 +29,9 @@ def correlation_matrix(df, cols=False): if cols: df = df[cols] - return df.corr() + numeric_cols = df._get_numeric_data().columns.tolist() + + return df[numeric_cols].corr() def find_outliers(df, cols=False, remove=False): @@ -200,7 +202,7 @@ def feature_importance(x, y, show_plot=False): feat_importances = pd.Series(model.feature_importances_, index=x.columns) if show_plot: - feat_importances.nlargest(12).plot(kind="barh") + feat_importances.nlargest(len(x.columns) // 2).plot(kind="barh") plt.show() return feat_importances @@ -367,4 +369,3 @@ def get_statistics(df, cols=False): stats[col]["kurtosis"] = get_kurtosis(df, col) return stats - From 2c7a78ff259fe9d55f9df7bc3034e5f25db553e4 Mon Sep 17 00:00:00 2001 From: Rohit R Date: Fri, 8 Jul 2022 18:59:12 +0530 Subject: [PATCH 24/28] bug fixes --- src/subpixel/ml/utils.py | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/src/subpixel/ml/utils.py b/src/subpixel/ml/utils.py index de3493f..c1882ce 100644 --- a/src/subpixel/ml/utils.py +++ b/src/subpixel/ml/utils.py @@ -4,6 +4,7 @@ import itertools from sklearn.ensemble import ExtraTreesClassifier import pandas as pd +import json def accuracy(y_true, y_pred): @@ -34,7 +35,7 @@ def correlation_matrix(df, cols=False): return df[numeric_cols].corr() -def find_outliers(df, cols=False, remove=False): +def find_outliers(df, cols=False): """ Finds outliers in each column of the dataframe. :param df: Dataframe @@ -49,12 +50,10 @@ def find_outliers(df, cols=False, remove=False): else: numeric_cols = df._get_numeric_data().columns.tolist() - outliers = {} + outlier_idx = [] for col in numeric_cols: - outlier_list = [] - q1 = df[col].quantile(0.25) q3 = df[col].quantile(0.75) iqr = q3 - q1 @@ -64,17 +63,12 @@ def find_outliers(df, cols=False, remove=False): for i, val in enumerate(df[col]): if val < low_bound or val > high_bound: - outlier_list.append(i) - - if remove: - df.drop(df.index[i], inplace=True) + outlier_idx.append(i) - outliers[col] = outlier_list - - if remove: - return outliers, df - else: - return outliers + outlier_idx = list(set(outlier_idx)) + df = df.drop(index=outlier_idx) + + return outlier_idx, df def boxplot(df, cols=False): @@ -294,7 +288,7 @@ def get_correlation_with_target(df, target, cols=False): if cols: df = df[cols] - return df.corrwith(target).sort_values(ascending=False) + return df.corrwith(df[target]).sort_values(ascending=False)[1:] def get_kurtosis(df, col): @@ -345,7 +339,7 @@ def get_count_of_unique_values(df, col): return df[col].nunique() -def get_statistics(df, cols=False): +def get_statistics(df, cols=False, save= False): """ Gets the statistics of the dataframe. :param df: Dataframe @@ -368,4 +362,8 @@ def get_statistics(df, cols=False): stats[col]["skewness"] = get_skewness(df, col) stats[col]["kurtosis"] = get_kurtosis(df, col) + if save: + with open("stats.json", "w") as f: + json.dump(stats, f) + return stats From 1a877ab240ff94fab02310fa726720d91f7d29bf Mon Sep 17 00:00:00 2001 From: Rohit R Date: Fri, 8 Jul 2022 20:13:12 +0530 Subject: [PATCH 25/28] bug fixes --- src/subpixel/ml/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/subpixel/ml/utils.py b/src/subpixel/ml/utils.py index c1882ce..4e5c641 100644 --- a/src/subpixel/ml/utils.py +++ b/src/subpixel/ml/utils.py @@ -138,7 +138,7 @@ def fill_nan_with_mean(df): """ for col in df.columns: - df[col] = df[col].fillna(df[col].mean()) + df[col] = df[col].fillna(get_mean(df, col)) return df @@ -366,4 +366,4 @@ def get_statistics(df, cols=False, save= False): with open("stats.json", "w") as f: json.dump(stats, f) - return stats + return stats \ No newline at end of file From 441b1b23e98be384fee80e61b2ef2d0456aa35ee Mon Sep 17 00:00:00 2001 From: Rohit R Date: Fri, 8 Jul 2022 22:25:07 +0530 Subject: [PATCH 26/28] to do --- src/subpixel/ml/utils.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/subpixel/ml/utils.py b/src/subpixel/ml/utils.py index 4e5c641..cb32912 100644 --- a/src/subpixel/ml/utils.py +++ b/src/subpixel/ml/utils.py @@ -7,6 +7,9 @@ import json +# Make function to find if a column has classification type values that are not numeric, if True get_dummies. If False, do nothing. + + def accuracy(y_true, y_pred): """ Function that finds the accuracy of a model based on the true and predicted values. @@ -67,7 +70,7 @@ def find_outliers(df, cols=False): outlier_idx = list(set(outlier_idx)) df = df.drop(index=outlier_idx) - + return outlier_idx, df @@ -339,7 +342,7 @@ def get_count_of_unique_values(df, col): return df[col].nunique() -def get_statistics(df, cols=False, save= False): +def get_statistics(df, cols=False, save=False): """ Gets the statistics of the dataframe. :param df: Dataframe @@ -366,4 +369,4 @@ def get_statistics(df, cols=False, save= False): with open("stats.json", "w") as f: json.dump(stats, f) - return stats \ No newline at end of file + return stats From 964e9f66733755ff0ae23cddff37e2062126dc80 Mon Sep 17 00:00:00 2001 From: Rohit R Date: Tue, 12 Jul 2022 16:39:54 +0530 Subject: [PATCH 27/28] tabtransformer --- src/subpixel/ml/model.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/subpixel/ml/model.py b/src/subpixel/ml/model.py index cc94d6b..9e9e7b3 100644 --- a/src/subpixel/ml/model.py +++ b/src/subpixel/ml/model.py @@ -2,6 +2,8 @@ import torch.nn as nn from tab_transformer_pytorch import TabTransformer +# https://github.com/lucidrains/tab-transformer-pytorch + cont_mean_std = torch.randn(10, 2) From 39213b654431a5edf3c192a0634e6f23cce70d50 Mon Sep 17 00:00:00 2001 From: Rohit R Date: Tue, 19 Jul 2022 09:33:53 +0530 Subject: [PATCH 28/28] testing pull requests in github. --- src/subpixel/vision/train.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/subpixel/vision/train.py b/src/subpixel/vision/train.py index 06c2b40..9d88ada 100644 --- a/src/subpixel/vision/train.py +++ b/src/subpixel/vision/train.py @@ -241,3 +241,4 @@ def evaluate(self, test_path): return sum(losses) / len(losses) +# test pull request. \ No newline at end of file