From 56489af7e7e22f86430f3762139eecc2bde798a5 Mon Sep 17 00:00:00 2001
From: Rohit R <rajesh.rohit04@gmail.com>
Date: Wed, 4 May 2022 19:43:31 +0530
Subject: [PATCH 01/28] added docs

---
 src/subpixel/data.py  | 20 ++++++++++++++++++++
 src/subpixel/model.py |  5 +++++
 src/subpixel/utils.py | 28 ++++++++++++++++------------
 3 files changed, 41 insertions(+), 12 deletions(-)

diff --git a/src/subpixel/data.py b/src/subpixel/data.py
index 4994799..43245da 100644
--- a/src/subpixel/data.py
+++ b/src/subpixel/data.py
@@ -18,6 +18,9 @@
 
 
 class ImageDataset(Dataset):
+    '''
+    Class that takes in the path of the dataset and converts it into a torch.utils.data.Dataset object.
+    '''
     def __init__(self, path, mode, device, transforms=None, train=True):
         super().__init__()
 
@@ -84,6 +87,16 @@ def __len__(self):
 
 
 def get_dataset(path, mode, device, transforms=None):
+    '''
+    Function that takes in the path and generates a trainset and valset (if present).
+
+    path: str 
+    mode: str
+    device: str
+    transforms: albumentations.transforms
+
+    Returns trainset and valset
+    '''
 
     trainset = ImageDataset(
         f"{path}train\\", mode, device, transforms=transforms, train=True
@@ -95,5 +108,12 @@ def get_dataset(path, mode, device, transforms=None):
         return trainset
 
 def get_dataloader(datset, b_size, shuffle):
+    '''
+    Converts the dataset to a DataLoader.
+
+    dataset: torch.utils.data.Dataset
+
+    Returns torch.utils.data.DataLoader
+    '''
 
     return DataLoader(datset, b_size, shuffle)
diff --git a/src/subpixel/model.py b/src/subpixel/model.py
index 40288eb..6602b1b 100644
--- a/src/subpixel/model.py
+++ b/src/subpixel/model.py
@@ -78,6 +78,11 @@ def fit(self,trainset : Union[str,nn.Module], loss_fun : nn.Module,optimizer : s
         return self.history
 
     def find_size(self):
+
+        '''Finds the size occupied by the trainable model parameters in CUDA memory.
+        
+        Returns the total number of trainable parameters and the size occupied. 
+        '''
         
         p_total = sum(p.numel() for p in self.parameters() if p.requires_grad) 
         bits = 32.
diff --git a/src/subpixel/utils.py b/src/subpixel/utils.py
index 881462c..2fa237e 100644
--- a/src/subpixel/utils.py
+++ b/src/subpixel/utils.py
@@ -10,26 +10,27 @@
 import numpy as np
 import random
 
-# from subpixel.model import Model
 
+# def show_batch(data):
+#     pass
 
-def show_batch(data):
-    pass
 
+# def EncodingToClass(lst, classes):
 
-def EncodingToClass(lst, classes):
+#     lst = list(lst.detach().squeeze(0).numpy())
+#     return classes[lst.index(max(lst))]
 
-    lst = list(lst.detach().squeeze(0).numpy())
-    return classes[lst.index(max(lst))]
 
-
-def get_boxxes(t):
-    # '{x, y, h, w, [classes]}' -> [x, y, h, w, classes]
-    bbox = list(json.loads(t).values())
-    return bbox[:-1] + bbox[-1]
+# def get_boxxes(t):
+#     # '{x, y, h, w, [classes]}' -> [x, y, h, w, classes]
+#     bbox = list(json.loads(t).values())
+#     return bbox[:-1] + bbox[-1]
 
 
 def seed_everything(seed=42):
+    '''
+    Seeds EVERYTHING.
+    '''
 
     random.seed(seed)
     os.environ["PYTHONHASHSEED"] = str(seed)
@@ -41,6 +42,9 @@ def seed_everything(seed=42):
 
 
 def init_model(m):
+    '''
+    Initialises model parameters with xavier normalisation method.
+    '''
 
     seed_everything()
 
@@ -111,7 +115,7 @@ def findLR( model : nn.Module, dataset : nn.Module, loss_fn : nn.Module ,optimiz
 
 
 
-def find_batch_size(model : nn.Module, dataset : nn.Module) -> None:
+def find_batch_size(model : nn.Module, dataset : nn.Module):
     '''
     Finds the batch size to be set for ideal GPU usage (95% default)
 

From 2df2079abce45fc4d63630dcaada989772ca2b37 Mon Sep 17 00:00:00 2001
From: Rohit R <rajesh.rohit04@gmail.com>
Date: Wed, 4 May 2022 19:44:11 +0530
Subject: [PATCH 02/28] minor bug fix

---
 src/subpixel/utils.py | 68 +++++++++++++++++++++----------------------
 1 file changed, 33 insertions(+), 35 deletions(-)

diff --git a/src/subpixel/utils.py b/src/subpixel/utils.py
index 2fa237e..b082e8f 100644
--- a/src/subpixel/utils.py
+++ b/src/subpixel/utils.py
@@ -21,16 +21,16 @@
 #     return classes[lst.index(max(lst))]
 
 
-# def get_boxxes(t):
-#     # '{x, y, h, w, [classes]}' -> [x, y, h, w, classes]
-#     bbox = list(json.loads(t).values())
-#     return bbox[:-1] + bbox[-1]
+def get_boxxes(t):
+    # '{x, y, h, w, [classes]}' -> [x, y, h, w, classes]
+    bbox = list(json.loads(t).values())
+    return bbox[:-1] + bbox[-1]
 
 
 def seed_everything(seed=42):
-    '''
+    """
     Seeds EVERYTHING.
-    '''
+    """
 
     random.seed(seed)
     os.environ["PYTHONHASHSEED"] = str(seed)
@@ -42,9 +42,9 @@ def seed_everything(seed=42):
 
 
 def init_model(m):
-    '''
+    """
     Initialises model parameters with xavier normalisation method.
-    '''
+    """
 
     seed_everything()
 
@@ -58,9 +58,16 @@ def init_model(m):
         nn.init.xavier_normal_(m.weight.data)
 
 
-
-def findLR( model : nn.Module, dataset : nn.Module, loss_fn : nn.Module ,optimizer : str , start_lr : float=1e-7, end_lr : float=1e-1, steps : float=100):
-    '''
+def findLR(
+    model: nn.Module,
+    dataset: nn.Module,
+    loss_fn: nn.Module,
+    optimizer: str,
+    start_lr: float = 1e-7,
+    end_lr: float = 1e-1,
+    steps: float = 100,
+):
+    """
     Finds the ideal initial LR for optimal training.
     model : nn.Module , the model for which ideal LR needs to be found.
 
@@ -75,20 +82,18 @@ def findLR( model : nn.Module, dataset : nn.Module, loss_fn : nn.Module ,optimiz
     end_lr : upper bound of the learning rate to be checked.
 
     steps : number of learning rates between start_lr and end_lr to be checked. 
-    '''
+    """
     seed_everything()
     lr = []
     loss = []
-    optimizer = get_optimizer(model,lr=start_lr)
+    optimizer = get_optimizer(model, lr=start_lr)
     dx = (end_lr - start_lr) / steps
 
-    x = find_batch_size(model, dataset) 
+    x = find_batch_size(model, dataset)
     if len(dataset) // steps < x:
         x = len(dataset) // steps
-    
-    scheduler = torch.optim.lr_scheduler.LambdaLR(
-        optimizer, lambda epoch: epoch + dx
-    )
+
+    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lambda epoch: epoch + dx)
     Dataloader = iter(DataLoader(dataset, x, True))
     model.train()
 
@@ -114,16 +119,15 @@ def findLR( model : nn.Module, dataset : nn.Module, loss_fn : nn.Module ,optimiz
     return lr[numpy.argmin(diff(loss) / dx)], loss, lr
 
 
-
-def find_batch_size(model : nn.Module, dataset : nn.Module):
-    '''
+def find_batch_size(model: nn.Module, dataset: nn.Module):
+    """
     Finds the batch size to be set for ideal GPU usage (95% default)
 
     model : nn.Module , model being trained.
 
     dataset : nn.Module , dataset to be loaded.
 
-    '''
+    """
 
     p, total_bits = model.find_size()
     f_before = torch.cuda.memory_reserved(0) - torch.cuda.memory_allocated(0)
@@ -144,21 +148,15 @@ def find_batch_size(model : nn.Module, dataset : nn.Module):
     return b_size
 
 
-def get_optimizer(model : nn.Module, optim : str = "adam", lr : float = 1e-3, weight_decay : float = 1e-5):
-    '''
+def get_optimizer(
+    model: nn.Module, optim: str = "adam", lr: float = 1e-3, weight_decay: float = 1e-5
+):
+    """
     returns torch.optim optimizer instance given optim string
-    '''
+    """
     if optim == "adam":
-        return torch.optim.Adam(
-            model.parameters(),
-            lr= lr,
-            weight_decay= weight_decay
-        )
+        return torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
     elif optim == "sgd":
-        return torch.optim.SGD(
-            model.parameters(),
-            lr,
-            weight_decay= weight_decay
-        )
+        return torch.optim.SGD(model.parameters(), lr, weight_decay=weight_decay)
     else:
         raise NotImplementedError("Optimizer not implemented yet!!")

From 2e19f4278dee7db81900c66ec84c595acdee774b Mon Sep 17 00:00:00 2001
From: audi1712 <cool.saiadi@gmail.com>
Date: Sat, 7 May 2022 16:12:47 +0530
Subject: [PATCH 03/28] class Model now accepts custom models without
 arch.json.....(untested)

---
 src/subpixel/model.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/subpixel/model.py b/src/subpixel/model.py
index 6602b1b..7286291 100644
--- a/src/subpixel/model.py
+++ b/src/subpixel/model.py
@@ -13,8 +13,12 @@ class Model(nn.Module):
     '''
     Converts model architecture from JSON to a trainable model and has a fit function that can train the model on the given dataset when called.
     '''
-    def __init__(self,path = 'arch.json') -> None:
+    def __init__(self,model : nn.Module = None,path :str = 'arch.json') -> None:
         super(Model,self).__init__()
+        if isinstance(model,nn.Module):
+            self.pre_defined_model = True
+            self.model = model
+            return
 
         JSON_file = open(path,"r")
         arch = json.load(JSON_file)
@@ -35,6 +39,8 @@ def __init__(self,path = 'arch.json') -> None:
         
     
     def forward(self,*X):
+        if self.pre_defined_model:
+            return self.model(*X)
 
         outputs = []
         
@@ -73,7 +79,7 @@ def fit(self,trainset : Union[str,nn.Module], loss_fun : nn.Module,optimizer : s
         valset (optional): nn.Module | None , default None, provides validation set. Note:- if trainset is str automatically valset is taken from directory structure. 
         '''
 
-        self.trainer = Trainer(self, trainset= trainset, epochs= 10, learning_rate= lr)
+        self.trainer = Trainer(self, trainset= trainset, epochs= 10, learning_rate= lr, loss_fn= loss_fun, optimizer= optimizer, mode= mode, valset= valset)
         self.history = self.trainer.fit()
         return self.history
 

From 75e4871db9062736d2b82b4748222db9f91546bb Mon Sep 17 00:00:00 2001
From: Rohit R <rajesh.rohit04@gmail.com>
Date: Mon, 9 May 2022 22:30:19 +0530
Subject: [PATCH 04/28] fixed acc

---
 src/subpixel/train.py | 43 +++++++++++++++++--------------------------
 1 file changed, 17 insertions(+), 26 deletions(-)

diff --git a/src/subpixel/train.py b/src/subpixel/train.py
index a2b400c..af59775 100644
--- a/src/subpixel/train.py
+++ b/src/subpixel/train.py
@@ -14,29 +14,21 @@
 device = "cuda" if torch.cuda.is_available() else "cpu"
 
 
-def accuracy(out: torch.Tensor, labels: torch.Tensor): # NEEDS TO BE CHANGED
-    '''
+def accuracy(out: torch.Tensor, labels: torch.Tensor):
+    """
     Finds the accuracy of the model by comparing the output of the model to the labels.
 
     out: tensor
     labels: tensor
-    '''
-
-    c = 0
-
-    preds = torch.round(out)
-    preds = preds.detach().cpu().numpy().tolist()
-    labels = labels.cpu().numpy().tolist()
-
-    for label, pred in zip(labels, preds):
-        if pred == label:
-            c += 1
-
-    return c / len(out)
+    """
+    try:
+        return (out == labels).sum().item() / out.size(0) * out.size(1) * out.size(2)
+    except:
+        return (out == labels).sum().item() / out.size(0) * out.size(1)
 
 
 class Trainer:
-    '''
+    """
     class that has all the funcions and variables to train a model on your custom dataset.
 
     model: nn.Module
@@ -52,7 +44,8 @@ class that has all the funcions and variables to train a model on your custom da
     model_save_path: str
     shuffle: bool
     device: str ["cpu", "cuda"]
-    '''
+    """
+
     def __init__(
         self,
         model,
@@ -86,9 +79,7 @@ def __init__(
                     trainset, self.mode, device, transforms
                 )
             except:
-                self.trainset = get_dataset(
-                    trainset, self.mode, device, transforms
-                )
+                self.trainset = get_dataset(trainset, self.mode, device, transforms)
 
         elif isinstance(trainset, Dataset) or isinstance(trainset, ImageDataset):
             self.trainset = trainset
@@ -114,12 +105,12 @@ def __init__(
             self.val_dl = get_dataloader(self.valset, self.b_size, self.shuffle)
 
     def fit(self):
-        '''
+        """
         Function that has the training loop implemented. 
         It inherits all the necessary components from the Trainer class.
 
         Returns the loss values and acc values if applicable. 
-        '''
+        """
 
         flag = self.mode == "classification" or self.mode == "detection"
         scaler = torch.cuda.amp.GradScaler()
@@ -217,11 +208,11 @@ def fit(self):
             return losses
 
     def test_sample(self, image, label=None):
-        '''
+        """
         Used to test the model on one image.
 
         Returns the prediction.
-        '''
+        """
 
         pred = self.model(image)
 
@@ -232,11 +223,11 @@ def test_sample(self, image, label=None):
         return pred
 
     def evaluate(self, test_path):
-        '''
+        """
         Used to evaluate the model on the test dataset. 
 
         Returns the losses. 
-        '''
+        """
 
         test_dl = get_dataloader(
             ImageDataset(test_path, self.mode, device), self.b_size, False

From e677c889116116910bc42e25bf8bae5862ffd413 Mon Sep 17 00:00:00 2001
From: Rohit R <rajesh.rohit04@gmail.com>
Date: Mon, 9 May 2022 22:31:46 +0530
Subject: [PATCH 05/28] removed x100 in train for acc

---
 src/subpixel/train.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/subpixel/train.py b/src/subpixel/train.py
index af59775..4d1f300 100644
--- a/src/subpixel/train.py
+++ b/src/subpixel/train.py
@@ -177,7 +177,7 @@ def fit(self):
                     )
 
                     print(
-                        f"{epoch+1}/{self.epochs} -- Train Loss: {losses['train'][-1]} -- Train acc: {acc['train'][-1] *100}% -- Val Loss: {losses['val'][-1]} -- Val acc: {acc['val'][-1]*100}%"
+                        f"{epoch+1}/{self.epochs} -- Train Loss: {losses['train'][-1]} -- Train acc: {acc['train'][-1]}% -- Val Loss: {losses['val'][-1]} -- Val acc: {acc['val'][-1]}%"
                     )
                 else:
                     print(
@@ -192,7 +192,7 @@ def fit(self):
                     )
 
                     print(
-                        f"{epoch+1}/{self.epochs} -- Train Loss: {losses['train'][-1]} -- Train acc: {acc['train'][-1] * 100}%"
+                        f"{epoch+1}/{self.epochs} -- Train Loss: {losses['train'][-1]} -- Train acc: {acc['train'][-1]}%"
                     )
                 else:
                     print(

From 97b65c48d183e367c19ba627ce99e785078dda65 Mon Sep 17 00:00:00 2001
From: Rohit R <rajesh.rohit04@gmail.com>
Date: Mon, 9 May 2022 23:01:43 +0530
Subject: [PATCH 06/28] test changes

---
 src/subpixel/test.py | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/src/subpixel/test.py b/src/subpixel/test.py
index 010d796..7883746 100644
--- a/src/subpixel/test.py
+++ b/src/subpixel/test.py
@@ -5,41 +5,48 @@
 from torch.utils import data
 import torchvision
 from model import Model
-from PIL import Image
+from data import ImageDataset
 
-dataset = torchvision.datasets.FashionMNIST("./", download=True)
+# dataset = torchvision.datasets.FashionMNIST("./", download=True)
 
 
 class Datas(torch.utils.data.Dataset):
-    def __init__(self, dataset) -> None:
+    def __init__(self, dataset):
         super().__init__()
+
         self.dataset = dataset
 
     def __getitem__(self, index):
         return (
             torch.tensor(np.array(self.dataset[index][0])).unsqueeze(0).float().cuda(),
-            torch.tensor([1 if i == self.dataset[index][1] else 0 for i in range(10)]).float().cuda()
+            torch.tensor([1 if i == self.dataset[index][1] else 0 for i in range(10)])
+            .float()
+            .cuda(),
         )
 
     def __len__(self):
-        # return 1000   
+
         return len(self.dataset)
 
 
 class Test:
-    def __init__(self, model, dataset, loss_fun) -> None:
+    def __init__(self, model, dataset_path, loss_fun, mode, device, transforms=None):
+
         self.model = model
-        # x = int(0.1*len(dataset)) if int(len(dataset))<100 else 100
-        self.dataset= dataset
+        self.mode = mode
+        self.dataset = ImageDataset(
+            dataset_path, mode=mode, device=device, transforms=transforms
+        )
         self.loss_fun = loss_fun
 
     def test(self):
-        print("Testing!")
-        self.model.fit(self.dataset, self.loss_fun, optimizer= "adam")
+
+        self.model.fit(self.dataset, self.loss_fun, mode=self.mode, optimizer="adam")
+
 
 
-datase = Datas(dataset)
+datase = Datas(dataset_path)
 model = Model().cuda()
-tes = Test(model, datase, loss_fun= nn.MSELoss())
+tes = Test(model, datase, loss_fun=nn.MSELoss())
 tes.test()
 

From a2723b068bdc7247515536dd20e1bdd9632da323 Mon Sep 17 00:00:00 2001
From: Rohit R <rajesh.rohit04@gmail.com>
Date: Sun, 15 May 2022 23:39:52 +0530
Subject: [PATCH 07/28] new files

---
 src/subpixel/ml/experimental/gradcam.py | 0
 src/subpixel/ml/tabular.py              | 4 ++++
 2 files changed, 4 insertions(+)
 create mode 100644 src/subpixel/ml/experimental/gradcam.py
 create mode 100644 src/subpixel/ml/tabular.py

diff --git a/src/subpixel/ml/experimental/gradcam.py b/src/subpixel/ml/experimental/gradcam.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/subpixel/ml/tabular.py b/src/subpixel/ml/tabular.py
new file mode 100644
index 0000000..7daa5d4
--- /dev/null
+++ b/src/subpixel/ml/tabular.py
@@ -0,0 +1,4 @@
+import sklearn
+import numpy as np
+import pandas as pd
+

From 57a11ac2684b481dd5130cd8e1c0c6f8fe8bb89c Mon Sep 17 00:00:00 2001
From: audi1712 <cool.saiadi@gmail.com>
Date: Wed, 8 Jun 2022 22:43:11 +0530
Subject: [PATCH 08/28] updated folder structure

---
 src/subpixel/{ => vision}/data.py  | 238 +++++++-------
 src/subpixel/{ => vision}/train.py | 486 ++++++++++++++---------------
 2 files changed, 362 insertions(+), 362 deletions(-)
 rename src/subpixel/{ => vision}/data.py (96%)
 rename src/subpixel/{ => vision}/train.py (96%)

diff --git a/src/subpixel/data.py b/src/subpixel/vision/data.py
similarity index 96%
rename from src/subpixel/data.py
rename to src/subpixel/vision/data.py
index 43245da..9f3c05d 100644
--- a/src/subpixel/data.py
+++ b/src/subpixel/vision/data.py
@@ -1,119 +1,119 @@
-# file that scans for data from ordered folders and generates DataLoader class.
-# -----------------------------------------------------------------------------------------------
-# Classification - data/train/images, data/train/train_data.csv, data/val/images and data/val/val_data.csv.
-# Segmentation - data/train/images, data/train/masks, data/train/train_data.csv, data/val/images, data/val/masks and data/val/val_data.csv.
-# Object Detection - data/train/images, data/train/train_data.csv, data/val/images and data/val/val_data.csv.
-# bboxes - [x, y, h, w, classes]
-
-from torch.utils.data import Dataset
-import torch
-import pandas as pd
-import numpy as np
-from PIL import Image
-from utils import *
-import warnings
-
-warnings.filterwarnings("ignore")
-torch.cuda.empty_cache()
-
-
-class ImageDataset(Dataset):
-    '''
-    Class that takes in the path of the dataset and converts it into a torch.utils.data.Dataset object.
-    '''
-    def __init__(self, path, mode, device, transforms=None, train=True):
-        super().__init__()
-
-        self.transforms, self.mode, self.device = transforms, mode, device
-        self.path = path
-
-        self.df = pd.read_csv(f"{self.path}\\data.csv")[:1]
-
-        if mode == "classification":
-            self.classes = self.df["class"].unique()
-            self.df[self.classes] = pd.get_dummies(self.df["class"])
-            del self.df["class"]
-
-        if mode == "detection":
-            for i in range(len(self.df)):
-                self.df["labels"].iloc[i] = get_boxxes(self.df["labels"].iloc[i])
-
-    def __getitem__(self, idx):
-
-        img_path = f"{self.path}\\images\\" + self.df["img_path"].iloc[idx]
-        img = np.array(Image.open(img_path).convert("RGB"))
-
-        if self.mode == "classification":
-
-            label = torch.tensor(np.array(self.df[self.classes].iloc[idx]))
-
-            if self.transforms:
-
-                transformed = self.transforms(image=img)
-                img = transformed["image"]
-
-        elif self.mode == "detection":
-
-            label = np.array(self.df["labels"].iloc[idx])
-
-            if self.transforms:
-
-                transformed = self.transforms(image=img, bboxes=label)
-                img = transformed["image"]
-                label = transformed["bboxes"]
-
-            label = torch.tensor(label)
-
-        elif self.mode == "segmentation":
-
-            img_path = f"{self.path}\\masks\\" + self.df["mask_path"].iloc[idx]
-            label = np.array(Image.open(img_path).convert("RGB"))
-
-            if self.transforms:
-
-                transformed = self.transforms(image=img, mask=label)
-                img = transformed["image"]
-                label = transformed["mask"]
-
-            label = torch.tensor(label).permute(2, 0, 1)
-
-        return (
-            torch.tensor(img).permute(2, 0, 1).float().to(self.device),
-            label.float().to(self.device),
-        )
-
-    def __len__(self):
-        return len(self.df)
-
-
-def get_dataset(path, mode, device, transforms=None):
-    '''
-    Function that takes in the path and generates a trainset and valset (if present).
-
-    path: str 
-    mode: str
-    device: str
-    transforms: albumentations.transforms
-
-    Returns trainset and valset
-    '''
-
-    trainset = ImageDataset(
-        f"{path}train\\", mode, device, transforms=transforms, train=True
-    )
-    try:
-        valset = ImageDataset(f"{path}\\val\\", mode, device, train=False)
-        return trainset, valset
-    except FileNotFoundError:
-        return trainset
-
-def get_dataloader(datset, b_size, shuffle):
-    '''
-    Converts the dataset to a DataLoader.
-
-    dataset: torch.utils.data.Dataset
-
-    Returns torch.utils.data.DataLoader
-    '''
-
-    return DataLoader(datset, b_size, shuffle)
+# file that scans for data from ordered folders and generates DataLoader class.
+# -----------------------------------------------------------------------------------------------
+# Classification - data/train/images, data/train/train_data.csv, data/val/images and data/val/val_data.csv.
+# Segmentation - data/train/images, data/train/masks, data/train/train_data.csv, data/val/images, data/val/masks and data/val/val_data.csv.
+# Object Detection - data/train/images, data/train/train_data.csv, data/val/images and data/val/val_data.csv.
+# bboxes - [x, y, h, w, classes]
+
+from torch.utils.data import Dataset
+import torch
+import pandas as pd
+import numpy as np
+from PIL import Image
+from utils import *
+import warnings
+
+warnings.filterwarnings("ignore")
+torch.cuda.empty_cache()
+
+
+class ImageDataset(Dataset):
+    '''
+    Class that takes in the path of the dataset and converts it into a torch.utils.data.Dataset object.
+    '''
+    def __init__(self, path, mode, device, transforms=None, train=True):
+        super().__init__()
+
+        self.transforms, self.mode, self.device = transforms, mode, device
+        self.path = path
+
+        self.df = pd.read_csv(f"{self.path}\\data.csv")[:1]
+
+        if mode == "classification":
+            self.classes = self.df["class"].unique()
+            self.df[self.classes] = pd.get_dummies(self.df["class"])
+            del self.df["class"]
+
+        if mode == "detection":
+            for i in range(len(self.df)):
+                self.df["labels"].iloc[i] = get_boxxes(self.df["labels"].iloc[i])
+
+    def __getitem__(self, idx):
+
+        img_path = f"{self.path}\\images\\" + self.df["img_path"].iloc[idx]
+        img = np.array(Image.open(img_path).convert("RGB"))
+
+        if self.mode == "classification":
+
+            label = torch.tensor(np.array(self.df[self.classes].iloc[idx]))
+
+            if self.transforms:
+
+                transformed = self.transforms(image=img)
+                img = transformed["image"]
+
+        elif self.mode == "detection":
+
+            label = np.array(self.df["labels"].iloc[idx])
+
+            if self.transforms:
+
+                transformed = self.transforms(image=img, bboxes=label)
+                img = transformed["image"]
+                label = transformed["bboxes"]
+
+            label = torch.tensor(label)
+
+        elif self.mode == "segmentation":
+
+            img_path = f"{self.path}\\masks\\" + self.df["mask_path"].iloc[idx]
+            label = np.array(Image.open(img_path).convert("RGB"))
+
+            if self.transforms:
+
+                transformed = self.transforms(image=img, mask=label)
+                img = transformed["image"]
+                label = transformed["mask"]
+
+            label = torch.tensor(label).permute(2, 0, 1)
+
+        return (
+            torch.tensor(img).permute(2, 0, 1).float().to(self.device),
+            label.float().to(self.device),
+        )
+
+    def __len__(self):
+        return len(self.df)
+
+
+def get_dataset(path, mode, device, transforms=None):
+    '''
+    Function that takes in the path and generates a trainset and valset (if present).
+
+    path: str 
+    mode: str
+    device: str
+    transforms: albumentations.transforms
+
+    Returns trainset and valset
+    '''
+
+    trainset = ImageDataset(
+        f"{path}train\\", mode, device, transforms=transforms, train=True
+    )
+    try:
+        valset = ImageDataset(f"{path}\\val\\", mode, device, train=False)
+        return trainset, valset
+    except FileNotFoundError:
+        return trainset
+
+def get_dataloader(datset, b_size, shuffle):
+    '''
+    Converts the dataset to a DataLoader.
+
+    dataset: torch.utils.data.Dataset
+
+    Returns torch.utils.data.DataLoader
+    '''
+
+    return DataLoader(datset, b_size, shuffle)
diff --git a/src/subpixel/train.py b/src/subpixel/vision/train.py
similarity index 96%
rename from src/subpixel/train.py
rename to src/subpixel/vision/train.py
index 4d1f300..b862bb7 100644
--- a/src/subpixel/train.py
+++ b/src/subpixel/vision/train.py
@@ -1,243 +1,243 @@
-from torch.utils.data import Dataset
-import torch
-from tqdm import tqdm
-import warnings
-from data import ImageDataset, get_dataloader, get_dataset
-import numpy as np
-import torch.nn as nn
-from utils import findLR, find_batch_size, get_optimizer
-
-
-warnings.filterwarnings("ignore")
-torch.cuda.empty_cache()
-
-device = "cuda" if torch.cuda.is_available() else "cpu"
-
-
-def accuracy(out: torch.Tensor, labels: torch.Tensor):
-    """
-    Finds the accuracy of the model by comparing the output of the model to the labels.
-
-    out: tensor
-    labels: tensor
-    """
-    try:
-        return (out == labels).sum().item() / out.size(0) * out.size(1) * out.size(2)
-    except:
-        return (out == labels).sum().item() / out.size(0) * out.size(1)
-
-
-class Trainer:
-    """
-    class that has all the funcions and variables to train a model on your custom dataset.
-
-    model: nn.Module
-    trainset: str or (Dataset, ImageDataset)
-    transforms: 
-    optimizer: str
-    valset: (Dataset, ImageDataset)
-    epochs: int
-    mode: str ["classification", "detection", "segmentation"]
-    loss_fn: nn.Module
-    learning_rate: float
-    weight_decay: float
-    model_save_path: str
-    shuffle: bool
-    device: str ["cpu", "cuda"]
-    """
-
-    def __init__(
-        self,
-        model,
-        trainset,
-        transforms=None,
-        optimizer="adam",
-        valset=None,
-        epochs=10,
-        mode="classification",
-        loss_fn=nn.MSELoss(),
-        learning_rate=None,
-        weight_decay=1e-5,
-        model_save_path="./",
-        shuffle=True,
-        device="cpu",
-    ):
-        self.model = model.cuda() if device == "cuda" else model
-        self.valset = valset
-        self.epochs = epochs
-        self.mode = mode
-        self.loss_fn = loss_fn
-        self.weight_decay = weight_decay
-        self.model_save_path = model_save_path
-        self.learning_rate = learning_rate
-        self.shuffle = shuffle
-        self.device = device
-
-        if isinstance(trainset, str):
-            try:
-                self.trainset, self.valset = get_dataset(
-                    trainset, self.mode, device, transforms
-                )
-            except:
-                self.trainset = get_dataset(trainset, self.mode, device, transforms)
-
-        elif isinstance(trainset, Dataset) or isinstance(trainset, ImageDataset):
-            self.trainset = trainset
-            self.valset = valset
-
-        self.b_size = find_batch_size(model, self.trainset)
-
-        if learning_rate == None:
-            self.learning_rate = findLR(
-                self.model, self.trainset, self.loss_fn, optimizer
-            )[0]
-
-        self.optimizer = get_optimizer(
-            self.model,
-            optim=optimizer,
-            lr=self.learning_rate,
-            weight_decay=self.weight_decay,
-        )
-
-        self.train_dl = get_dataloader(self.trainset, self.b_size, self.shuffle)
-
-        if self.valset != None:
-            self.val_dl = get_dataloader(self.valset, self.b_size, self.shuffle)
-
-    def fit(self):
-        """
-        Function that has the training loop implemented. 
-        It inherits all the necessary components from the Trainer class.
-
-        Returns the loss values and acc values if applicable. 
-        """
-
-        flag = self.mode == "classification" or self.mode == "detection"
-        scaler = torch.cuda.amp.GradScaler()
-        losses = {"train": [], "val": []}
-        acc = {"train": [], "val": []}
-
-        for epoch in range(self.epochs):
-
-            epoch_loss = {"train": [], "val": []}
-            epoch_acc = {"train": [], "val": []}
-
-            self.model.train()
-            for img, label in tqdm(self.train_dl):
-
-                with torch.cuda.amp.autocast():
-
-                    pred = self.model(img)
-                    loss = self.loss_fn(pred, label)
-
-                    epoch_loss["train"].append(loss)
-
-                    if self.mode == "classification":
-                        a = accuracy(pred, label)
-                        epoch_acc["train"].append(a)
-
-                    elif self.mode == "detection":
-                        a = accuracy(pred[1:5], label[1:5])
-                        epoch_acc["train"].append(a)
-
-                scaler.scale(loss).backward()
-                scaler.step(self.optimizer)
-                scaler.update()
-                self.optimizer.zero_grad()
-
-            losses["train"].append(sum(epoch_loss["train"]) / len(epoch_loss["train"]))
-
-            if self.valset != None:
-
-                self.model.eval()
-                for img, label in tqdm(self.val_dl):
-
-                    with torch.cuda.amp.autocast():
-
-                        pred = self.model(img)
-                        loss = self.loss_fn(pred, label)
-
-                        epoch_loss["val"].append(loss)
-
-                        if self.mode == "classification":
-                            a = accuracy(pred, label)
-                            epoch_acc["val"].append(a)
-
-                        elif self.mode == "detection":
-                            a = accuracy(pred[1:5], label[1:5])
-                            epoch_acc["val"].append(a)
-
-                losses["val"].append(sum(epoch_loss["val"]) / len(epoch_loss["val"]))
-
-                if flag:
-
-                    acc["val"].append(sum(epoch_acc["val"]) / len(epoch_acc["val"]))
-                    acc["train"].append(
-                        sum(epoch_acc["train"]) / len(epoch_acc["train"])
-                    )
-
-                    print(
-                        f"{epoch+1}/{self.epochs} -- Train Loss: {losses['train'][-1]} -- Train acc: {acc['train'][-1]}% -- Val Loss: {losses['val'][-1]} -- Val acc: {acc['val'][-1]}%"
-                    )
-                else:
-                    print(
-                        f"{epoch+1}/{self.epochs} -- Train Loss: {losses['train'][-1]} -- Val Loss: {losses['val'][-1]}"
-                    )
-
-            else:
-
-                if flag:
-                    acc["train"].append(
-                        sum(epoch_acc["train"]) / len(epoch_acc["train"])
-                    )
-
-                    print(
-                        f"{epoch+1}/{self.epochs} -- Train Loss: {losses['train'][-1]} -- Train acc: {acc['train'][-1]}%"
-                    )
-                else:
-                    print(
-                        f"{epoch+1}/{self.epochs} -- Train Loss: {losses['train'][-1]}"
-                    )
-
-            torch.save(self.model, f"{self.model_save_path}\\model")
-
-        if flag:
-            return losses, acc
-
-        else:
-            return losses
-
-    def test_sample(self, image, label=None):
-        """
-        Used to test the model on one image.
-
-        Returns the prediction.
-        """
-
-        pred = self.model(image)
-
-        if label != None:
-            loss = self.loss_fn(label, pred).detach()
-            return pred, loss
-
-        return pred
-
-    def evaluate(self, test_path):
-        """
-        Used to evaluate the model on the test dataset. 
-
-        Returns the losses. 
-        """
-
-        test_dl = get_dataloader(
-            ImageDataset(test_path, self.mode, device), self.b_size, False
-        )
-        losses = []
-
-        for img, label in test_dl:
-            pred = self.model(img)
-            loss = self.loss_fn(label, pred).detach()
-            losses.append(loss)
-
-        return sum(losses) / len(losses)
-
+from torch.utils.data import Dataset
+import torch
+from tqdm import tqdm
+import warnings
+from data import ImageDataset, get_dataloader, get_dataset
+import numpy as np
+import torch.nn as nn
+from utils import findLR, find_batch_size, get_optimizer
+
+
+warnings.filterwarnings("ignore")
+torch.cuda.empty_cache()
+
+device = "cuda" if torch.cuda.is_available() else "cpu"
+
+
+def accuracy(out: torch.Tensor, labels: torch.Tensor):
+    """
+    Finds the accuracy of the model by comparing the output of the model to the labels.
+
+    out: tensor
+    labels: tensor
+    """
+    try:
+        return (out == labels).sum().item() / out.size(0) * out.size(1) * out.size(2)
+    except:
+        return (out == labels).sum().item() / out.size(0) * out.size(1)
+
+
+class Trainer:
+    """
+    class that has all the funcions and variables to train a model on your custom dataset.
+
+    model: nn.Module
+    trainset: str or (Dataset, ImageDataset)
+    transforms: 
+    optimizer: str
+    valset: (Dataset, ImageDataset)
+    epochs: int
+    mode: str ["classification", "detection", "segmentation"]
+    loss_fn: nn.Module
+    learning_rate: float
+    weight_decay: float
+    model_save_path: str
+    shuffle: bool
+    device: str ["cpu", "cuda"]
+    """
+
+    def __init__(
+        self,
+        model,
+        trainset,
+        transforms=None,
+        optimizer="adam",
+        valset=None,
+        epochs=10,
+        mode="classification",
+        loss_fn=nn.MSELoss(),
+        learning_rate=None,
+        weight_decay=1e-5,
+        model_save_path="./",
+        shuffle=True,
+        device="cpu",
+    ):
+        self.model = model.cuda() if device == "cuda" else model
+        self.valset = valset
+        self.epochs = epochs
+        self.mode = mode
+        self.loss_fn = loss_fn
+        self.weight_decay = weight_decay
+        self.model_save_path = model_save_path
+        self.learning_rate = learning_rate
+        self.shuffle = shuffle
+        self.device = device
+
+        if isinstance(trainset, str):
+            try:
+                self.trainset, self.valset = get_dataset(
+                    trainset, self.mode, device, transforms
+                )
+            except:
+                self.trainset = get_dataset(trainset, self.mode, device, transforms)
+
+        elif isinstance(trainset, Dataset) or isinstance(trainset, ImageDataset):
+            self.trainset = trainset
+            self.valset = valset
+
+        self.b_size = find_batch_size(model, self.trainset)
+
+        if learning_rate == None:
+            self.learning_rate = findLR(
+                self.model, self.trainset, self.loss_fn, optimizer
+            )[0]
+
+        self.optimizer = get_optimizer(
+            self.model,
+            optim=optimizer,
+            lr=self.learning_rate,
+            weight_decay=self.weight_decay,
+        )
+
+        self.train_dl = get_dataloader(self.trainset, self.b_size, self.shuffle)
+
+        if self.valset != None:
+            self.val_dl = get_dataloader(self.valset, self.b_size, self.shuffle)
+
+    def fit(self):
+        """
+        Function that has the training loop implemented. 
+        It inherits all the necessary components from the Trainer class.
+
+        Returns the loss values and acc values if applicable. 
+        """
+
+        flag = self.mode == "classification" or self.mode == "detection"
+        scaler = torch.cuda.amp.GradScaler()
+        losses = {"train": [], "val": []}
+        acc = {"train": [], "val": []}
+
+        for epoch in range(self.epochs):
+
+            epoch_loss = {"train": [], "val": []}
+            epoch_acc = {"train": [], "val": []}
+
+            self.model.train()
+            for img, label in tqdm(self.train_dl):
+
+                with torch.cuda.amp.autocast():
+
+                    pred = self.model(img)
+                    loss = self.loss_fn(pred, label)
+
+                    epoch_loss["train"].append(loss)
+
+                    if self.mode == "classification":
+                        a = accuracy(pred, label)
+                        epoch_acc["train"].append(a)
+
+                    elif self.mode == "detection":
+                        a = accuracy(pred[1:5], label[1:5])
+                        epoch_acc["train"].append(a)
+
+                scaler.scale(loss).backward()
+                scaler.step(self.optimizer)
+                scaler.update()
+                self.optimizer.zero_grad()
+
+            losses["train"].append(sum(epoch_loss["train"]) / len(epoch_loss["train"]))
+
+            if self.valset != None:
+
+                self.model.eval()
+                for img, label in tqdm(self.val_dl):
+
+                    with torch.cuda.amp.autocast():
+
+                        pred = self.model(img)
+                        loss = self.loss_fn(pred, label)
+
+                        epoch_loss["val"].append(loss)
+
+                        if self.mode == "classification":
+                            a = accuracy(pred, label)
+                            epoch_acc["val"].append(a)
+
+                        elif self.mode == "detection":
+                            a = accuracy(pred[1:5], label[1:5])
+                            epoch_acc["val"].append(a)
+
+                losses["val"].append(sum(epoch_loss["val"]) / len(epoch_loss["val"]))
+
+                if flag:
+
+                    acc["val"].append(sum(epoch_acc["val"]) / len(epoch_acc["val"]))
+                    acc["train"].append(
+                        sum(epoch_acc["train"]) / len(epoch_acc["train"])
+                    )
+
+                    print(
+                        f"{epoch+1}/{self.epochs} -- Train Loss: {losses['train'][-1]} -- Train acc: {acc['train'][-1]}% -- Val Loss: {losses['val'][-1]} -- Val acc: {acc['val'][-1]}%"
+                    )
+                else:
+                    print(
+                        f"{epoch+1}/{self.epochs} -- Train Loss: {losses['train'][-1]} -- Val Loss: {losses['val'][-1]}"
+                    )
+
+            else:
+
+                if flag:
+                    acc["train"].append(
+                        sum(epoch_acc["train"]) / len(epoch_acc["train"])
+                    )
+
+                    print(
+                        f"{epoch+1}/{self.epochs} -- Train Loss: {losses['train'][-1]} -- Train acc: {acc['train'][-1]}%"
+                    )
+                else:
+                    print(
+                        f"{epoch+1}/{self.epochs} -- Train Loss: {losses['train'][-1]}"
+                    )
+
+            torch.save(self.model, f"{self.model_save_path}\\model")
+
+        if flag:
+            return losses, acc
+
+        else:
+            return losses
+
+    def test_sample(self, image, label=None):
+        """
+        Used to test the model on one image.
+
+        Returns the prediction.
+        """
+
+        pred = self.model(image)
+
+        if label != None:
+            loss = self.loss_fn(label, pred).detach()
+            return pred, loss
+
+        return pred
+
+    def evaluate(self, test_path):
+        """
+        Used to evaluate the model on the test dataset. 
+
+        Returns the losses. 
+        """
+
+        test_dl = get_dataloader(
+            ImageDataset(test_path, self.mode, device), self.b_size, False
+        )
+        losses = []
+
+        for img, label in test_dl:
+            pred = self.model(img)
+            loss = self.loss_fn(label, pred).detach()
+            losses.append(loss)
+
+        return sum(losses) / len(losses)
+

From 928b4e1797bca4610b58a625b003f06c80d96bc6 Mon Sep 17 00:00:00 2001
From: Rohit R <rajesh.rohit04@gmail.com>
Date: Wed, 8 Jun 2022 22:49:34 +0530
Subject: [PATCH 09/28] folder

---
 src/subpixel/{ml => }/experimental/gradcam.py |  0
 src/subpixel/utils.py                         | 10 ----------
 2 files changed, 10 deletions(-)
 rename src/subpixel/{ml => }/experimental/gradcam.py (100%)

diff --git a/src/subpixel/ml/experimental/gradcam.py b/src/subpixel/experimental/gradcam.py
similarity index 100%
rename from src/subpixel/ml/experimental/gradcam.py
rename to src/subpixel/experimental/gradcam.py
diff --git a/src/subpixel/utils.py b/src/subpixel/utils.py
index b082e8f..0ade175 100644
--- a/src/subpixel/utils.py
+++ b/src/subpixel/utils.py
@@ -11,16 +11,6 @@
 import random
 
 
-# def show_batch(data):
-#     pass
-
-
-# def EncodingToClass(lst, classes):
-
-#     lst = list(lst.detach().squeeze(0).numpy())
-#     return classes[lst.index(max(lst))]
-
-
 def get_boxxes(t):
     # '{x, y, h, w, [classes]}' -> [x, y, h, w, classes]
     bbox = list(json.loads(t).values())

From 3b17f9137442122ceacaeb86daa250bbc4a45047 Mon Sep 17 00:00:00 2001
From: audi1712 <cool.saiadi@gmail.com>
Date: Wed, 8 Jun 2022 22:53:03 +0530
Subject: [PATCH 10/28] import trainer fixed

---
 src/subpixel/model.py        | 4 ++--
 src/subpixel/vision/train.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/subpixel/model.py b/src/subpixel/model.py
index 7286291..58d4565 100644
--- a/src/subpixel/model.py
+++ b/src/subpixel/model.py
@@ -4,7 +4,7 @@
 import torch.nn as nn
 import json
 from torchinfo import summary
-from train import Trainer
+from vision.train import visionTrainer
 from utils import findLR, find_batch_size
 import numpy as np
 
@@ -79,7 +79,7 @@ def fit(self,trainset : Union[str,nn.Module], loss_fun : nn.Module,optimizer : s
         valset (optional): nn.Module | None , default None, provides validation set. Note:- if trainset is str automatically valset is taken from directory structure. 
         '''
 
-        self.trainer = Trainer(self, trainset= trainset, epochs= 10, learning_rate= lr, loss_fn= loss_fun, optimizer= optimizer, mode= mode, valset= valset)
+        self.trainer = visionTrainer(self, trainset= trainset, epochs= 10, learning_rate= lr, loss_fn= loss_fun, optimizer= optimizer, mode= mode, valset= valset)
         self.history = self.trainer.fit()
         return self.history
 
diff --git a/src/subpixel/vision/train.py b/src/subpixel/vision/train.py
index b862bb7..06c2b40 100644
--- a/src/subpixel/vision/train.py
+++ b/src/subpixel/vision/train.py
@@ -27,7 +27,7 @@ def accuracy(out: torch.Tensor, labels: torch.Tensor):
         return (out == labels).sum().item() / out.size(0) * out.size(1)
 
 
-class Trainer:
+class visionTrainer:
     """
     class that has all the funcions and variables to train a model on your custom dataset.
 

From e9249fe0af4e530b80946dd2f2358ebfc825c11b Mon Sep 17 00:00:00 2001
From: audi1712 <cool.saiadi@gmail.com>
Date: Wed, 8 Jun 2022 22:54:17 +0530
Subject: [PATCH 11/28] test errors

---
 src/subpixel/test.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/subpixel/test.py b/src/subpixel/test.py
index 7883746..ac992a2 100644
--- a/src/subpixel/test.py
+++ b/src/subpixel/test.py
@@ -5,7 +5,7 @@
 from torch.utils import data
 import torchvision
 from model import Model
-from data import ImageDataset
+from vision.data import ImageDataset
 
 # dataset = torchvision.datasets.FashionMNIST("./", download=True)
 
@@ -45,8 +45,8 @@ def test(self):
 
 
 
-datase = Datas(dataset_path)
-model = Model().cuda()
-tes = Test(model, datase, loss_fun=nn.MSELoss())
-tes.test()
+#datase = Datas(dataset_path)
+#model = Model().cuda()
+#tes = Test(model, datase, loss_fun=nn.MSELoss())
+#tes.test()
 

From c930cb59b1970f919ee4803fe945865c653b3018 Mon Sep 17 00:00:00 2001
From: Rohit R <rajesh.rohit04@gmail.com>
Date: Mon, 13 Jun 2022 13:38:14 +0530
Subject: [PATCH 12/28] chuma

---
 src/subpixel/ml/tabular.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/subpixel/ml/tabular.py b/src/subpixel/ml/tabular.py
index 7daa5d4..b36a289 100644
--- a/src/subpixel/ml/tabular.py
+++ b/src/subpixel/ml/tabular.py
@@ -2,3 +2,4 @@
 import numpy as np
 import pandas as pd
 
+# changes

From 39da4c71afee71b71f716b74f57d5e3d620d84df Mon Sep 17 00:00:00 2001
From: Rohit R <rajesh.rohit04@gmail.com>
Date: Thu, 23 Jun 2022 18:37:16 +0530
Subject: [PATCH 13/28] finally ml

---
 src/subpixel/ml/tabular.py |   5 --
 src/subpixel/ml/utils.py   | 180 +++++++++++++++++++++++++++++++++++++
 2 files changed, 180 insertions(+), 5 deletions(-)
 delete mode 100644 src/subpixel/ml/tabular.py
 create mode 100644 src/subpixel/ml/utils.py

diff --git a/src/subpixel/ml/tabular.py b/src/subpixel/ml/tabular.py
deleted file mode 100644
index b36a289..0000000
--- a/src/subpixel/ml/tabular.py
+++ /dev/null
@@ -1,5 +0,0 @@
-import sklearn
-import numpy as np
-import pandas as pd
-
-# changes
diff --git a/src/subpixel/ml/utils.py b/src/subpixel/ml/utils.py
new file mode 100644
index 0000000..a2222f3
--- /dev/null
+++ b/src/subpixel/ml/utils.py
@@ -0,0 +1,180 @@
+from statistics import median
+import sklearn
+import matplotlib.pyplot as plt
+import seaborn as sns
+import itertools
+from sklearn.ensemble import ExtraTreesClassifier
+import pandas as pd
+
+
+def accuracy(y_true, y_pred):
+    return sklearn.metrics.accuracy_score(y_true, y_pred)
+
+
+def correlation_matrix(df, cols=False):
+
+    if cols:
+        df = df[cols]
+
+    return df.corr()
+
+
+def find_outliers(df, cols=False, remove=False):
+
+    if cols:
+        df = df[cols]
+        numeric_cols = df._get_numeric_data().columns.tolist()
+    else:
+        numeric_cols = df._get_numeric_data().columns.tolist()
+
+    outliers = {}
+
+    for col in numeric_cols:
+
+        outlier_list = []
+
+        q1 = df[col].quantile(0.25)
+        q3 = df[col].quantile(0.75)
+        iqr = q3 - q1
+
+        low_bound = q1 - (iqr * 1.5)
+        high_bound = q3 + (iqr * 1.5)
+
+        for i, val in enumerate(df[col]):
+            if val < low_bound or val > high_bound:
+                outlier_list.append(i)
+
+                if remove:
+                    df.drop(df.index[i], inplace=True)
+
+        outliers[col] = outlier_list
+
+    return outliers
+
+
+def boxplot(df, cols=False):
+
+    if cols:
+        df = df[cols]
+        numeric_cols = df._get_numeric_data().columns.tolist()
+    else:
+        numeric_cols = df._get_numeric_data().columns.tolist()
+
+    i = 1
+    plt.figure(figsize=(15, 25))
+    for col in numeric_cols:
+        plt.subplot(6, 3, i)
+        sns.boxplot(y=df[col], color="green")
+        i += 1
+
+    plt.show()
+
+
+def get_combinations(list_of_values):
+    return list(itertools.combinations(list_of_values, 2))
+
+
+# NEEDS TO BE CHANGED TO DISPLAY IN SAME PAGE
+def feature_correlation(df, cols=False, kind="reg"):
+
+    if cols:
+        lst = get_combinations(cols)
+
+    else:
+        lst = get_combinations(df.columns)
+
+    for i, j in lst:
+
+        sns.jointplot(x=i, y=j, data=df, kind=kind, truncate=False, color="m", height=7)
+
+    plt.show()
+
+
+def fill_nan_with_mean(df):
+    for col in df.columns:
+        df[col] = df[col].fillna(df[col].mean())
+    return df
+
+
+def delete_row_with_nan(df):
+    df.dropna(inplace=True)
+    return df
+
+
+def pie_chart(df, col):
+
+    df[col].value_counts().plot(kind="pie", autopct="%1.1f%%")
+    plt.show()
+
+
+def count_plot(df, col):
+
+    df[col].value_counts().plot(kind="bar")
+    plt.show()
+
+
+def feature_importance(x, y, show_plot=False):
+    model = ExtraTreesClassifier()
+    model.fit(x, y)
+
+    feat_importances = pd.Series(model.feature_importances_, index=x.columns)
+
+    if show_plot:
+        feat_importances.nlargest(12).plot(kind="barh")
+        plt.show()
+
+    return feat_importances
+
+
+def histogram(df, cols, bins=10):
+
+    n = len(cols)
+
+    plt.figure(figsize=(10, 10))
+
+    for i, col in enumerate(cols):
+        plt.subplot(n, 1, i + 1)
+        sns.histplot(
+            df[col],
+            bins=bins,
+            color="Red",
+            kde_kws={"color": "y", "lw": 3, "label": "KDE"},
+        )
+
+    plt.show()
+
+
+def get_median(df, col):
+    return df[col].median()
+
+
+def get_mean(df, col):
+    return df[col].mean()
+
+
+def check_for_outliers(df, cols=False, threshold=10):
+
+    cols = df.columns if cols is False else cols
+
+    cols_with_outliers = []
+
+    for col in cols:
+        mean = get_mean(df, col)
+        median = get_median(df, col)
+
+        if abs(mean - median) > (threshold / 100) * max(mean, median):
+            cols_with_outliers.append(col)
+
+    return cols_with_outliers
+
+
+def get_correlation_with_target(df, target, cols=False):
+
+    if cols:
+        df = df[cols]
+
+    return df.corrwith(target).sort_values(ascending=False)
+
+
+def get_kurtosis(df, col):
+    return df[col].kurtosis()

From 610e38396859dca5ae74e030b5069f9e843ec139 Mon Sep 17 00:00:00 2001
From: Rohit R <rajesh.rohit04@gmail.com>
Date: Thu, 23 Jun 2022 18:38:09 +0530
Subject: [PATCH 14/28] .

---
 src/subpixel/ml/utils.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/subpixel/ml/utils.py b/src/subpixel/ml/utils.py
index a2222f3..040c9ff 100644
--- a/src/subpixel/ml/utils.py
+++ b/src/subpixel/ml/utils.py
@@ -1,4 +1,3 @@
-from statistics import median
 import sklearn
 import matplotlib.pyplot as plt
 import seaborn as sns

From 1b53cd2c4397d17b6ccb9396f31d2385d1788e32 Mon Sep 17 00:00:00 2001
From: Rohit R <rajesh.rohit04@gmail.com>
Date: Thu, 23 Jun 2022 19:56:41 +0530
Subject: [PATCH 15/28] added training

---
 src/subpixel/ml/train.py | 74 ++++++++++++++++++++++++++++++++++++++++
 src/subpixel/ml/utils.py | 32 +++++++++++++++++
 2 files changed, 106 insertions(+)
 create mode 100644 src/subpixel/ml/train.py

diff --git a/src/subpixel/ml/train.py b/src/subpixel/ml/train.py
new file mode 100644
index 0000000..a28606f
--- /dev/null
+++ b/src/subpixel/ml/train.py
@@ -0,0 +1,74 @@
+import sklearn
+from sklearn.utils import resample
+from sklearn.neural_network import MLPClassifier
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.svm import SVC
+from sklearn.gaussian_process import GaussianProcessClassifier
+from sklearn.gaussian_process.kernels import RBF
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
+from sklearn.naive_bayes import GaussianNB
+from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
+
+
+class Regression:
+    def __init__(self, df, target_col, type=None):
+
+        self.df = df
+        self.target_col = target_col
+        self.X = self.df[self.df.columns.difference([self.target_col])]
+        self.y = self.df[self.target_col]
+        self.type = type
+
+        self.model_dict = {
+            "Nearest Neighbors": KNeighborsClassifier(3),
+            "Linear SVM": SVC(kernel="linear", C=0.025),
+            "RBF SVM": SVC(gamma=2, C=1),
+            "Gaussian Process": GaussianProcessClassifier(1.0 * RBF(1.0)),
+            "Decision Tree": DecisionTreeClassifier(max_depth=5),
+            "Random Forest": RandomForestClassifier(
+                max_depth=5, n_estimators=10, max_features=1
+            ),
+            "Neural Net": MLPClassifier(alpha=1),
+            "AdaBoost": AdaBoostClassifier(),
+            "Naive Bayes": GaussianNB(),
+            "QDA": QuadraticDiscriminantAnalysis(),
+        }
+
+        if self.type == "linear":
+            self.model = sklearn.linear_model.LinearRegression()
+        else:
+            self.model = self.model_dict[self.find_classfier()[0]]
+
+        self.model.fit(self.X, self.y)
+
+    def find_classfier(self):
+
+        _THRESHOLD = 500
+
+        if len(self.df) > _THRESHOLD:
+            self._X = resample(self.X, replace=False, n_samples=_THRESHOLD)
+            self._y = resample(self.y, replace=False, n_samples=_THRESHOLD)
+
+        else:
+            self._X = self.X
+            self._y = self.y
+
+        model_scores = {}
+
+        for name, model in zip(self.model_dict.keys(), self.model_dict.values()):
+            model.fit(self._X, self._y)
+            model_scores[name] = model.score(self._X, self._y)
+
+        model_scores = dict(sorted(model_scores.items(), key=lambda item: item[1]))
+
+        return list(model_scores.keys())[0], model_scores
+
+    def predict(self, df):
+        return self.model.predict(df)
+
+    def score(self):
+        return self.model.score(self.X, self.y)
+
+    def score_with_test(self, X_test, y_test):
+        return self.model.score(X_test, y_test)
diff --git a/src/subpixel/ml/utils.py b/src/subpixel/ml/utils.py
index 040c9ff..07c90a9 100644
--- a/src/subpixel/ml/utils.py
+++ b/src/subpixel/ml/utils.py
@@ -177,3 +177,35 @@ def get_correlation_with_target(df, target, cols=False):
 
 def get_kurtosis(df, col):
     return df[col].kurtosis()
+
+
+def get_skewness(df, col):
+    return df[col].skew()
+
+
+def get_variance(df, col):
+    return df[col].var()
+
+
+def get_count_of_unique_values(df, col):
+    return df[col].nunique()
+
+
+def get_statistics(df, cols=False):
+
+    if not cols:
+        cols = df.columns
+
+    stats = {}
+
+    for col in cols:
+        stats[col] = {}
+        stats[col]["unique_count"] = get_count_of_unique_values(df, col)
+        stats[col]["mean"] = get_mean(df, col)
+        stats[col]["median"] = get_median(df, col)
+        stats[col]["variance"] = get_variance(df, col)
+        stats[col]["skewness"] = get_skewness(df, col)
+        stats[col]["kurtosis"] = get_kurtosis(df, col)
+
+    return stats
+

From 4dd99692da7a48a679a4a8d3d44f857ed4b8f6b8 Mon Sep 17 00:00:00 2001
From: Rohit R <rajesh.rohit04@gmail.com>
Date: Thu, 23 Jun 2022 20:22:41 +0530
Subject: [PATCH 16/28] docs

---
 src/subpixel/ml/train.py |  34 ++++++++
 src/subpixel/ml/utils.py | 165 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 196 insertions(+), 3 deletions(-)

diff --git a/src/subpixel/ml/train.py b/src/subpixel/ml/train.py
index a28606f..bc07287 100644
--- a/src/subpixel/ml/train.py
+++ b/src/subpixel/ml/train.py
@@ -12,7 +12,17 @@
 
 
 class Regression:
+    """
+    Class that contains all the variables and funtions to train a model on the given data.
+    """
+
     def __init__(self, df, target_col, type=None):
+        """
+        Init funtion of the Regression class.
+        :param df: Dataframe
+        :param target_col: Target column
+        :param type: Type of the model to train.
+        """
 
         self.df = df
         self.target_col = target_col
@@ -43,6 +53,11 @@ def __init__(self, df, target_col, type=None):
         self.model.fit(self.X, self.y)
 
     def find_classfier(self):
+        """
+        Finds the best classifier for the given data.
+
+        :return: Name of the best classifier and the model.
+        """
 
         _THRESHOLD = 500
 
@@ -65,10 +80,29 @@ def find_classfier(self):
         return list(model_scores.keys())[0], model_scores
 
     def predict(self, df):
+        """
+        Gets the predictions for the given data.
+        :param df: Dataframe
+
+        :return: Predictions
+        """
+
         return self.model.predict(df)
 
     def score(self):
+        """
+        Gets the score of the model on train data.
+
+        :return: Score
+        """
+
         return self.model.score(self.X, self.y)
 
     def score_with_test(self, X_test, y_test):
+        """
+        Gets the score of the model on test data.
+
+        :param X_test: Score on Test data
+        """
+
         return self.model.score(X_test, y_test)
diff --git a/src/subpixel/ml/utils.py b/src/subpixel/ml/utils.py
index 07c90a9..bdcd7d6 100644
--- a/src/subpixel/ml/utils.py
+++ b/src/subpixel/ml/utils.py
@@ -7,10 +7,24 @@
 
 
 def accuracy(y_true, y_pred):
+    """
+    Function that finds the accuracy of a model based on the true and predicted values.
+    :param y_true: True values
+    :param y_pred: Predicted values
+
+    :return: Accuracy of the model
+    """
+
     return sklearn.metrics.accuracy_score(y_true, y_pred)
 
 
 def correlation_matrix(df, cols=False):
+    """
+    Gets the correlation matrix of the dataframe.
+    :param df: Dataframe
+
+    :return: Correlation matrix
+    """
 
     if cols:
         df = df[cols]
@@ -19,10 +33,17 @@ def correlation_matrix(df, cols=False):
 
 
 def find_outliers(df, cols=False, remove=False):
+    """
+    Finds outliers in each column of the dataframe.
+    :param df: Dataframe
+    :param cols: Columns to check for outliers
+    :param remove: If True, removes outliers from the dataframe
+
+    :return: list of outliers and dataframe without outliers if remove is True.
+    """
 
     if cols:
-        df = df[cols]
-        numeric_cols = df._get_numeric_data().columns.tolist()
+        numeric_cols = df[cols]._get_numeric_data().columns.tolist()
     else:
         numeric_cols = df._get_numeric_data().columns.tolist()
 
@@ -48,10 +69,19 @@ def find_outliers(df, cols=False, remove=False):
 
         outliers[col] = outlier_list
 
-    return outliers
+    if remove:
+        return outliers, df
+    else:
+        return outliers
 
 
 def boxplot(df, cols=False):
+    """
+    Shows a boxplot of the dataframe.
+    :param df: Dataframe
+
+    :return: None
+    """
 
     if cols:
         df = df[cols]
@@ -70,11 +100,25 @@ def boxplot(df, cols=False):
 
 
 def get_combinations(list_of_values):
+    """
+    Gets the combinations of the list of values.
+    :param list_of_values: List of values
+
+    :return: List of combinations
+    """
     return list(itertools.combinations(list_of_values, 2))
 
 
 # NEEDS TO BE CHANGED TO DISPLAY IN SAME PAGE
 def feature_correlation(df, cols=False, kind="reg"):
+    """
+    Gets the correlation matrix of the dataframe.
+    :param df: Dataframe
+    :param cols: Columns to check for outliers
+    :param kind: Type of plot to show
+
+    :return: None
+    """
 
     if cols:
         lst = get_combinations(cols)
@@ -90,29 +134,66 @@ def feature_correlation(df, cols=False, kind="reg"):
 
 
 def fill_nan_with_mean(df):
+    """
+    Fills the NaN values with the mean of the column.
+    :param df: Dataframe
+
+    :return: Dataframe with NaN values filled with mean.
+    """
+
     for col in df.columns:
         df[col] = df[col].fillna(df[col].mean())
     return df
 
 
 def delete_row_with_nan(df):
+    """
+    Delete rows with NaN values.
+    :param df: Dataframe
+
+    :return: Dataframe without rows with NaN values.
+    """
+
     df.dropna(inplace=True)
     return df
 
 
 def pie_chart(df, col):
+    """
+    Pie chart of the dataframe.
+    :param df: Dataframe
+    :param col: Column to show in the pie chart
+
+    :return: None
+    """
 
     df[col].value_counts().plot(kind="pie", autopct="%1.1f%%")
     plt.show()
 
 
 def count_plot(df, col):
+    """
+    Count chart of the dataframe.
+    :param df: Dataframe
+    :param col: Column to show in the count chart
+
+    :return: None
+    """
 
     df[col].value_counts().plot(kind="bar")
     plt.show()
 
 
 def feature_importance(x, y, show_plot=False):
+    """
+    Gets the important features for the given target column.
+    :param x: Dataframe
+    :param y: Target column
+    :param show_plot: If True, shows the plot of the feature importance.
+
+    :return: List of important features
+    """
+
     model = ExtraTreesClassifier()
     model.fit(x, y)
 
@@ -126,6 +207,14 @@ def feature_importance(x, y, show_plot=False):
 
 
 def histogram(df, cols, bins=10):
+    """
+    Shows a histogram of the dataframe.
+    :param df: Dataframe
+    :param cols: Columns to show in the histogram
+    :param bins: Number of bins in the histogram
+
+    :return: None
+    """
 
     n = len(cols)
 
@@ -144,14 +233,38 @@ def histogram(df, cols, bins=10):
 
 
 def get_median(df, col):
+    """
+    Gets the median of the column.
+    :param df: Dataframe
+    :param col: Column to get the median of
+
+    :return: Median of the column
+    """
+
     return df[col].median()
 
 
 def get_mean(df, col):
+    """
+    Gets the mean of the column.
+    :param df: Dataframe
+    :param col: Column to get the mean of
+
+    :return: Mean of the column
+    """
+
     return df[col].mean()
 
 
 def check_for_outliers(df, cols=False, threshold=10):
+    """
+    Finds columns that might have outliers in the dataframe.
+    :param df: Dataframe
+    :param cols: Columns to check for outliers
+    :param threshold: Threshold for deviation of mean from median
+
+    :return: List of columns with outliers
+    """
 
     cols = df.columns if cols is False else cols
 
@@ -168,6 +281,13 @@ def check_for_outliers(df, cols=False, threshold=10):
 
 
 def get_correlation_with_target(df, target, cols=False):
+    """
+    Gets the correlation between the target column and the other columns.
+    :param df: Dataframe
+    :param target: Target column
+
+    :return: List of correlations
+    """
 
     if cols:
         df = df[cols]
@@ -176,22 +296,61 @@ def get_correlation_with_target(df, target, cols=False):
 
 
 def get_kurtosis(df, col):
+    """
+    Gets the kurtosis of the column.
+    :param df: Dataframe
+    :param col: Column to get the kurtosis of
+
+    :return: Kurtosis of the column
+    """
+
     return df[col].kurtosis()
 
 
 def get_skewness(df, col):
+    """
+    Gets the skewness of the column.
+    :param df: Dataframe
+    :param col: Column to get the skewness of
+
+    :return: skewness of the column
+    """
+
     return df[col].skew()
 
 
 def get_variance(df, col):
+    """
+    Gets the variance of the column.
+    :param df: Dataframe
+    :param col: Column to get the variance of
+
+    :return: variance of the column
+    """
+
     return df[col].var()
 
 
 def get_count_of_unique_values(df, col):
+    """
+    Gets the count of unique values in the column.
+    :param df: Dataframe
+    :param col: Column to get the count of unique values of
+
+    :return: count of unique values in the column
+    """
+
     return df[col].nunique()
 
 
 def get_statistics(df, cols=False):
+    """
+    Gets the statistics of the dataframe.
+    :param df: Dataframe
+    :param cols: Columns to get the statistics of
+
+    :return: Dictionary with the statistics
+    """
 
     if not cols:
         cols = df.columns

From 4bad47a3b9d086a358c47d0fcffbcd07c3144320 Mon Sep 17 00:00:00 2001
From: Rohit R <rajesh.rohit04@gmail.com>
Date: Mon, 27 Jun 2022 22:09:19 +0530
Subject: [PATCH 17/28] .

---
 src/subpixel/experimental/cpcv2.py | 76 ++++++++++++++++++++++++++++++
 src/subpixel/ml/display.py         |  0
 2 files changed, 76 insertions(+)
 create mode 100644 src/subpixel/experimental/cpcv2.py
 create mode 100644 src/subpixel/ml/display.py

diff --git a/src/subpixel/experimental/cpcv2.py b/src/subpixel/experimental/cpcv2.py
new file mode 100644
index 0000000..50a4e0c
--- /dev/null
+++ b/src/subpixel/experimental/cpcv2.py
@@ -0,0 +1,76 @@
+import torch
+import numpy as np
+import torch.nn as nn
+from torch.autograd import Variable
+import os
+import time
+import random
+import torchvision.transforms as ttf
+import cv2
+import matplotlib.pylab as plt
+
+
+# WRONG 
+def get_overlapping_grids(img, kernal_size, stride):
+    
+    h, w, _ = img.shape
+    csteps = int((h / stride) - 1)
+    rsteps = int((w / stride) - 1)
+
+
+    crops = []
+    img_full = np.random.randn(csteps * kernal_size, rsteps * kernal_size, 3)
+
+    for i in range(csteps):
+        for j in range(rsteps):
+            crop = np.array(
+                img[
+                    stride * i : stride * i + kernal_size,
+                    stride * j : stride * j + kernal_size,
+                    :,
+                ]
+            )
+            
+            img_full[
+                kernal_size * i : kernal_size * i + kernal_size,
+                kernal_size * j : kernal_size * j + kernal_size,
+                :,
+            ] = crop
+
+            crops.append(crop)
+
+    return crops, img_full
+
+
+def read_image(filename, resize=False):
+
+    image = cv2.imread(filename)
+
+    try:
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    except:
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+
+    if resize:
+        image = cv2.resize(image, resize)
+
+    return image
+
+
+def display_images(images, nrows=3, ncols=3, cmap=None, title=None):
+    
+    fig, ax = plt.subplots(nrows=nrows, ncols=ncols, figsize=(10, 10))
+    if title:
+        fig.suptitle(title, fontsize=20)
+    for i in range(ncols):
+        for j in range(nrows):
+            ax[i][j].imshow(images[i], cmap=cmap)
+            ax[i][j].axis("off")
+    plt.show()
+
+
+img = read_image("D:\\Desktop\\test.jpeg")
+grids, full = get_overlapping_grids(img, kernal_size=100, stride=100)
+display_images(grids)
+plt.imshow(full)
+plt.show()
\ No newline at end of file
diff --git a/src/subpixel/ml/display.py b/src/subpixel/ml/display.py
new file mode 100644
index 0000000..e69de29

From a26bf664e2cc4d104462dec5c4b3b8d54950bd95 Mon Sep 17 00:00:00 2001
From: Rohit R <rajesh.rohit04@gmail.com>
Date: Thu, 30 Jun 2022 11:41:52 +0530
Subject: [PATCH 18/28] model results

---
 src/subpixel/model_results.py | 42 +++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 src/subpixel/model_results.py

diff --git a/src/subpixel/model_results.py b/src/subpixel/model_results.py
new file mode 100644
index 0000000..f5c146c
--- /dev/null
+++ b/src/subpixel/model_results.py
@@ -0,0 +1,42 @@
+import torch
+import json
+import matplotlib.pyplot as plt
+import seaborn as sns
+
+
+class ModelResults():
+    def __init__(self, model, result_dict, output_path, testset= None):
+
+        self.model = model
+        self.result_dict = result_dict
+        self.output_path = output_path
+
+        if testset:
+            self.testset = testset
+
+        self.epochs = result_dict['epochs']
+        self.loss = result_dict['loss']
+        self.acc = result_dict['acc']
+        self.lr = result_dict['lr']
+
+    def save_results(self):
+        
+        plt.plot(self.epochs, self.loss)
+        plt.xlabel('Epochs')
+        plt.ylabel('Loss')
+        plt.savefig(self.output_path + 'loss_vs_epochs.png')
+        plt.close()
+
+        plt.plot(self.epochs, self.lr)
+        plt.xlabel('Epochs')
+        plt.ylabel('Learning Rate')
+        plt.savefig(self.output_path + 'loss_vs_lr.png')
+        plt.close()
+
+        if self.acc:
+            plt.plot(self.epochs, self.acc)
+            plt.xlabel('Epochs')
+            plt.ylabel('Accuracy')
+            plt.savefig(self.output_path + 'acc_vs_epochs.png')
+            plt.close()
+

From 69f211e3ae784bcf8e118b9ed029ebac91b4a94e Mon Sep 17 00:00:00 2001
From: Rohit R <rajesh.rohit04@gmail.com>
Date: Sun, 3 Jul 2022 14:03:38 +0530
Subject: [PATCH 19/28] new shit

---
 src/subpixel/experimental/cpcv2.py | 213 ++++++++++++++++++++++++-----
 src/subpixel/ml/model.py           |  26 ++++
 2 files changed, 204 insertions(+), 35 deletions(-)
 create mode 100644 src/subpixel/ml/model.py

diff --git a/src/subpixel/experimental/cpcv2.py b/src/subpixel/experimental/cpcv2.py
index 50a4e0c..d9a4150 100644
--- a/src/subpixel/experimental/cpcv2.py
+++ b/src/subpixel/experimental/cpcv2.py
@@ -1,45 +1,50 @@
 import torch
 import numpy as np
 import torch.nn as nn
-from torch.autograd import Variable
 import os
 import time
 import random
 import torchvision.transforms as ttf
+from torch.utils.data import Dataset, DataLoader, random_split
 import cv2
 import matplotlib.pylab as plt
+import warnings
 
+warnings.filterwarnings("ignore")
 
-# WRONG 
-def get_overlapping_grids(img, kernal_size, stride):
-    
-    h, w, _ = img.shape
-    csteps = int((h / stride) - 1)
-    rsteps = int((w / stride) - 1)
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 
 
-    crops = []
-    img_full = np.random.randn(csteps * kernal_size, rsteps * kernal_size, 3)
+def get_grids(image, grid_size, overlap):
 
-    for i in range(csteps):
-        for j in range(rsteps):
-            crop = np.array(
-                img[
-                    stride * i : stride * i + kernal_size,
-                    stride * j : stride * j + kernal_size,
-                    :,
-                ]
-            )
-            
-            img_full[
-                kernal_size * i : kernal_size * i + kernal_size,
-                kernal_size * j : kernal_size * j + kernal_size,
-                :,
-            ] = crop
+    try:
+        h, w, _ = image.shape
+    except:
+        h, w = image.shape
+
+    try:
+        h_grid, w_grid = grid_size
+    except:
+        h_grid, w_grid = grid_size, grid_size
+
+    h_steps = (h - h_grid) // (h_grid * (1 - overlap)) + 1
+    w_steps = w / w_grid
+
+    grids = []
 
-            crops.append(crop)
+    for i in range(int(h_steps)):
+        for j in range(int(w_steps)):
 
-    return crops, img_full
+            if j == 0:
+                w_start = 0
+            else:
+                w_start = w_grid * (j - overlap)
+                w_start = int(np.round(w_start))
+
+            grid = image[i * h_grid : (i + 1) * h_grid, w_start : w_start + w_grid, :]
+            grids.append(grid)
+
+    return grids
 
 
 def read_image(filename, resize=False):
@@ -57,20 +62,158 @@ def read_image(filename, resize=False):
     return image
 
 
-def display_images(images, nrows=3, ncols=3, cmap=None, title=None):
-    
+def display_images(images, nrows=4, ncols=3, cmap=None, title=None):
+
     fig, ax = plt.subplots(nrows=nrows, ncols=ncols, figsize=(10, 10))
     if title:
         fig.suptitle(title, fontsize=20)
+    c = 0
     for i in range(ncols):
         for j in range(nrows):
-            ax[i][j].imshow(images[i], cmap=cmap)
-            ax[i][j].axis("off")
+            ax[j][i].imshow(images[c], cmap=cmap)
+            ax[j][i].axis("off")
+            c += 1
     plt.show()
 
 
-img = read_image("D:\\Desktop\\test.jpeg")
-grids, full = get_overlapping_grids(img, kernal_size=100, stride=100)
-display_images(grids)
-plt.imshow(full)
-plt.show()
\ No newline at end of file
+class CPC_Dataset(Dataset):
+    def __init__(self, path, grid_size, overlap, transform=None):
+
+        self.path = path
+        self.transform = transform
+        self.images = os.listdir(path)
+        self.images.sort()
+        self.grid_size, self.overlap = grid_size, overlap
+
+    def __len__(self):
+        return len(self.images)
+
+    def __getitem__(self, idx):
+
+        img_path = os.path.join(self.path, self.images[idx])
+        img = read_image(img_path)
+        grids = get_grids(img, self.grid_size, self.overlap)
+
+        if self.transform:
+            for i, grid in enumerate(grids):
+                grids[i] = self.transform(grid)
+
+
+class BasicBlock(nn.Module):
+    def __init__(self, in_channels, out_channels, stride=(2, 2)):
+        super(BasicBlock, self).__init__()
+
+        self.stride = stride
+
+        self.conv1 = nn.Conv2d(in_channels, out_channels, (3, 3), stride, (1, 1))
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.relu = nn.ReLU()
+
+        self.conv2 = nn.Conv2d(out_channels, out_channels, (3, 3), (1, 1), (1, 1))
+
+        self.up = nn.Conv2d(in_channels, out_channels, (1, 1), (2, 2))
+
+    def forward(self, x):
+
+        x_ = self.relu(self.bn(self.conv1(x)))
+        x_ = self.bn(self.conv2(x_))
+
+        if self.stride == (2, 2):
+            x = self.bn(self.up(x))
+
+        return x_ + x
+
+
+class Resnet18(nn.Module):
+    def __init__(self):
+        super(Resnet18, self).__init__()
+
+        self.conv1 = nn.Conv2d(3, 32, (7, 7), (2, 2), (3, 3), bias=False)
+        self.bn = nn.BatchNorm2d(32)
+        self.relu = nn.ReLU()
+        self.maxpool = nn.MaxPool2d(3, 2, 1, 1)
+        self.avgpool = nn.AdaptiveAvgPool2d(output_size=(1, 1))
+
+        self.layer1 = nn.ModuleList(
+            [BasicBlock(32, 32, stride=(1, 1)), BasicBlock(32, 32, stride=(1, 1))]
+        )
+        self.layer2 = nn.ModuleList(
+            [BasicBlock(32, 64), BasicBlock(64, 64, stride=(1, 1))]
+        )
+        self.layer3 = nn.ModuleList(
+            [BasicBlock(64, 128), BasicBlock(128, 128, stride=(1, 1))]
+        )
+        self.layer4 = nn.ModuleList(
+            [BasicBlock(128, 256), BasicBlock(256, 256, stride=(1, 1))]
+        )
+
+    def forward(self, x):
+
+        x = self.maxpool(self.relu(self.bn(self.conv1(x))))
+
+        for layer in self.layer1:
+            x = layer(x)
+
+        for layer in self.layer2:
+            x = layer(x)
+
+        for layer in self.layer3:
+            x = layer(x)
+
+        for layer in self.layer4:
+            x = layer(x)
+
+        x = self.avgpool(x)
+        return x
+
+
+# class CPC_Model(nn.Module):
+#     def __init__(self):
+#         super().__init__()
+
+#         self.model = Resnet18()
+
+#         try:
+#             self.model = torch.load(r"../input/mri-scan/Encoder_2").to(DEVICE)
+#         except:
+#             pass
+
+#         self.net = nn.Sequential(
+#             nn.Conv2d(256, 128, 1, 1),
+#             nn.Conv2d(128, 128, 1, 1),
+#             nn.Conv2d(128, 256, 1, 1),
+#         )
+
+#     def forward(self, crops):
+
+#         embedding = self.model(crops[0].to(DEVICE))
+#         for crop in crops[1:]:
+#             emb = self.model(crop.to(DEVICE))
+#             embedding = torch.cat([embedding, emb], dim=0)
+
+#         context = embedding.reshape((1, 256, 6, 6))
+
+#         if np.random.rand(1)[0] > 0.5:
+#             if np.random.rand(1)[0] > 0.5:
+#                 top_half = context[:, :, :3, :]
+#                 bottom_half = context[:, :, 3:, :]
+
+#                 return self.net(top_half)
+
+#             else:
+#                 bottom_half = context[:, :, 3:, :]
+#                 top_half = context[:, :, :3, :]
+
+#                 return self.net(bottom_half)
+#         else:
+#             if np.random.rand(1)[0] > 0.5:
+#                 right_half = context[:, :, :, 3:]
+#                 left_half = context[:, :, :, :3]
+
+#                 return self.net(right_half)
+
+#             else:
+#                 left_half = context[:, :, :, :3]
+#                 right_half = context[:, :, :, 3:]
+
+#                 return self.net(left_half)
diff --git a/src/subpixel/ml/model.py b/src/subpixel/ml/model.py
new file mode 100644
index 0000000..cc94d6b
--- /dev/null
+++ b/src/subpixel/ml/model.py
@@ -0,0 +1,26 @@
+import torch
+import torch.nn as nn
+from tab_transformer_pytorch import TabTransformer
+
+
+cont_mean_std = torch.randn(10, 2)
+
+## EXPERMIENTAL ##
+# If normal ML models don't give good results and if the dataset is big enogh to use TabTransformer.
+# Use exisiting train function or make a new train function to train TabTransformer on the custom dataset.
+# Try to maybe find methods to find the best parameters for TabTransformer for the given task.
+
+
+model = TabTransformer(
+    categories=(10, 5, 6, 5, 8),  # tuple containing the number of unique values within each category
+    num_continuous=10,  # number of continuous values
+    dim=32,  # dimension, paper set at 32
+    dim_out=1,  # binary prediction, but could be anything
+    depth=6,  # depth, paper recommended 6
+    heads=8,  # heads, paper recommends 8
+    attn_dropout=0.1,  # post-attention dropout
+    ff_dropout=0.1,  # feed forward dropout
+    mlp_hidden_mults=(4, 2),  # relative multiples of each hidden dimension of the last mlp to logits
+    mlp_act=nn.ReLU(),  # activation for final mlp, defaults to relu, but could be anything else (selu etc)
+    continuous_mean_std=cont_mean_std,  # (optional) - normalize the continuous values before layer norm
+)

From cfdbfca09c381bfee1b88bd12720bfb381c15117 Mon Sep 17 00:00:00 2001
From: Rohit R <rajesh.rohit04@gmail.com>
Date: Sun, 3 Jul 2022 15:51:34 +0530
Subject: [PATCH 20/28] gradcam

---
 src/subpixel/experimental/gradcam.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/src/subpixel/experimental/gradcam.py b/src/subpixel/experimental/gradcam.py
index e69de29..7cc011a 100644
--- a/src/subpixel/experimental/gradcam.py
+++ b/src/subpixel/experimental/gradcam.py
@@ -0,0 +1,18 @@
+from torchcam.methods import SmoothGradCAMpp
+import cv2
+import torch
+from torchvision.transforms.functional import normalize
+
+
+def get_activationMap(model, image, device='cpu'):
+
+    cam_extractor = SmoothGradCAMpp(model)
+
+    if isinstance(image, str):
+        image = cv2.cvtColor(cv2.imread(image), cv2.COLOR_BGR2RGB)
+        image = torch.tensor(image).permute(2, 0, 1).float()
+        image = normalize(image / 255., [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+
+    out = model(image.unsqueeze(0).to(device))
+
+    return cam_extractor(out.squeeze(0).argmax().item(), out)

From 7bde72fcfd188bd83b565edbec50020eac5b6982 Mon Sep 17 00:00:00 2001
From: Rohit R <rajesh.rohit04@gmail.com>
Date: Sun, 3 Jul 2022 16:02:29 +0530
Subject: [PATCH 21/28] gradcam bbox

---
 src/subpixel/experimental/gradcam.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/src/subpixel/experimental/gradcam.py b/src/subpixel/experimental/gradcam.py
index 7cc011a..69b7ef2 100644
--- a/src/subpixel/experimental/gradcam.py
+++ b/src/subpixel/experimental/gradcam.py
@@ -2,6 +2,7 @@
 import cv2
 import torch
 from torchvision.transforms.functional import normalize
+import numpy as np
 
 
 def get_activationMap(model, image, device='cpu'):
@@ -16,3 +17,17 @@ def get_activationMap(model, image, device='cpu'):
     out = model(image.unsqueeze(0).to(device))
 
     return cam_extractor(out.squeeze(0).argmax().item(), out)
+
+
+def get_bbox(activation_maps, image_shape= None, threshold=0.5):
+    
+        if image_shape:
+            activation_maps = cv2.resize(activation_maps, image_shape)
+    
+        activation_map = activation_maps[0]
+        activation_map = activation_map > threshold
+        contours, _ = cv2.findContours(activation_map.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+    
+        bbox = cv2.boundingRect(contours[0])
+    
+        return bbox
\ No newline at end of file

From 1c6a3898fd61e59e252c33891e7050cf6e047e8e Mon Sep 17 00:00:00 2001
From: Rohit R <rajesh.rohit04@gmail.com>
Date: Sun, 3 Jul 2022 16:13:56 +0530
Subject: [PATCH 22/28] .

---
 src/subpixel/experimental/gradcam.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/subpixel/experimental/gradcam.py b/src/subpixel/experimental/gradcam.py
index 69b7ef2..ca2613f 100644
--- a/src/subpixel/experimental/gradcam.py
+++ b/src/subpixel/experimental/gradcam.py
@@ -21,13 +21,13 @@ def get_activationMap(model, image, device='cpu'):
 
 def get_bbox(activation_maps, image_shape= None, threshold=0.5):
     
-        if image_shape:
-            activation_maps = cv2.resize(activation_maps, image_shape)
-    
-        activation_map = activation_maps[0]
-        activation_map = activation_map > threshold
-        contours, _ = cv2.findContours(activation_map.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
-    
-        bbox = cv2.boundingRect(contours[0])
-    
-        return bbox
\ No newline at end of file
+    if image_shape:
+        activation_maps = cv2.resize(activation_maps, image_shape)
+
+    activation_map = activation_maps[0]
+    activation_map = activation_map > threshold
+    contours, _ = cv2.findContours(activation_map.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+
+    bbox = cv2.boundingRect(contours[0])
+
+    return bbox
\ No newline at end of file

From 946d6022f1b09ebebc135bbf2ce17ae37f4566a3 Mon Sep 17 00:00:00 2001
From: Rohit R <rajesh.rohit04@gmail.com>
Date: Mon, 4 Jul 2022 21:56:29 +0530
Subject: [PATCH 23/28] .

---
 src/subpixel/ml/EDA.py   | 51 ++++++++++++++++++++++++++++++++++++++++
 src/subpixel/ml/utils.py |  7 +++---
 2 files changed, 55 insertions(+), 3 deletions(-)
 create mode 100644 src/subpixel/ml/EDA.py

diff --git a/src/subpixel/ml/EDA.py b/src/subpixel/ml/EDA.py
new file mode 100644
index 0000000..e013277
--- /dev/null
+++ b/src/subpixel/ml/EDA.py
@@ -0,0 +1,51 @@
+from utils import *
+
+
+class EDA:
+    def __init__(self, df, target_col=None):
+        self.df = df
+        self.target_col = target_col
+
+    def show_corrMatrix(self):
+        return correlation_matrix(self.df)
+
+    def get_importantFeatures(self):
+
+        if self.target_col:
+            imp_features = feature_importance(self.df, self.target_col)
+            return imp_features
+        else:
+            raise Exception("Target column not specified.")
+
+    def deal_withNaN(self, method="mean"):
+        if method == "mean":
+            return fill_nan_with_mean(self.df)
+        elif method == "delete":
+            return delete_row_with_nan(self.df)
+        else:
+            raise Exception("Method not supported.")
+
+    def check_and_deal_wtihOutliers(self):
+
+        cols_with_outliers = check_for_outliers(self.df)
+        if cols_with_outliers:
+            _, df = find_outliers(self.df, cols=cols_with_outliers, remove=True)
+            return df
+        else:
+            return None
+
+    def data_stats(self):
+        return get_statistics(self.df)
+
+    def show_chart(self, df, col, chart="pie"):
+
+        if chart == "pie":
+            pie_chart(self.df, col)
+        elif chart == "count":
+            count_plot(self.df, col)
+        elif chart == "hist":
+            histogram(self.df, col)
+        elif chart == "box":
+            boxplot(self.df, col)
+        else:
+            raise Exception("Chart not supported.")
diff --git a/src/subpixel/ml/utils.py b/src/subpixel/ml/utils.py
index bdcd7d6..de3493f 100644
--- a/src/subpixel/ml/utils.py
+++ b/src/subpixel/ml/utils.py
@@ -29,7 +29,9 @@ def correlation_matrix(df, cols=False):
     if cols:
         df = df[cols]
 
-    return df.corr()
+    numeric_cols = df._get_numeric_data().columns.tolist()
+
+    return df[numeric_cols].corr()
 
 
 def find_outliers(df, cols=False, remove=False):
@@ -200,7 +202,7 @@ def feature_importance(x, y, show_plot=False):
     feat_importances = pd.Series(model.feature_importances_, index=x.columns)
 
     if show_plot:
-        feat_importances.nlargest(12).plot(kind="barh")
+        feat_importances.nlargest(len(x.columns) // 2).plot(kind="barh")
         plt.show()
 
     return feat_importances
@@ -367,4 +369,3 @@ def get_statistics(df, cols=False):
         stats[col]["kurtosis"] = get_kurtosis(df, col)
 
     return stats
-

From 2c7a78ff259fe9d55f9df7bc3034e5f25db553e4 Mon Sep 17 00:00:00 2001
From: Rohit R <rajesh.rohit04@gmail.com>
Date: Fri, 8 Jul 2022 18:59:12 +0530
Subject: [PATCH 24/28] bug fixes

---
 src/subpixel/ml/utils.py | 30 ++++++++++++++----------------
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/src/subpixel/ml/utils.py b/src/subpixel/ml/utils.py
index de3493f..c1882ce 100644
--- a/src/subpixel/ml/utils.py
+++ b/src/subpixel/ml/utils.py
@@ -4,6 +4,7 @@
 import itertools
 from sklearn.ensemble import ExtraTreesClassifier
 import pandas as pd
+import json
 
 
 def accuracy(y_true, y_pred):
@@ -34,7 +35,7 @@ def correlation_matrix(df, cols=False):
     return df[numeric_cols].corr()
 
 
-def find_outliers(df, cols=False, remove=False):
+def find_outliers(df, cols=False):
     """
     Finds outliers in each column of the dataframe.
     :param df: Dataframe
@@ -49,12 +50,10 @@ def find_outliers(df, cols=False, remove=False):
     else:
         numeric_cols = df._get_numeric_data().columns.tolist()
 
-    outliers = {}
+    outlier_idx = []
 
     for col in numeric_cols:
 
-        outlier_list = []
-
         q1 = df[col].quantile(0.25)
         q3 = df[col].quantile(0.75)
         iqr = q3 - q1
@@ -64,17 +63,12 @@ def find_outliers(df, cols=False, remove=False):
 
         for i, val in enumerate(df[col]):
             if val < low_bound or val > high_bound:
-                outlier_list.append(i)
-
-                if remove:
-                    df.drop(df.index[i], inplace=True)
+                outlier_idx.append(i)
 
-        outliers[col] = outlier_list
-
-    if remove:
-        return outliers, df
-    else:
-        return outliers
+    outlier_idx = list(set(outlier_idx))
+    df = df.drop(index=outlier_idx)
+    
+    return outlier_idx, df
 
 
 def boxplot(df, cols=False):
@@ -294,7 +288,7 @@ def get_correlation_with_target(df, target, cols=False):
     if cols:
         df = df[cols]
 
-    return df.corrwith(target).sort_values(ascending=False)
+    return df.corrwith(df[target]).sort_values(ascending=False)[1:]
 
 
 def get_kurtosis(df, col):
@@ -345,7 +339,7 @@ def get_count_of_unique_values(df, col):
     return df[col].nunique()
 
 
-def get_statistics(df, cols=False):
+def get_statistics(df, cols=False, save= False):
     """
     Gets the statistics of the dataframe.
     :param df: Dataframe
@@ -368,4 +362,8 @@ def get_statistics(df, cols=False):
         stats[col]["skewness"] = get_skewness(df, col)
         stats[col]["kurtosis"] = get_kurtosis(df, col)
 
+    if save:
+        with open("stats.json", "w") as f:
+            json.dump(stats, f)
+
     return stats

From 1a877ab240ff94fab02310fa726720d91f7d29bf Mon Sep 17 00:00:00 2001
From: Rohit R <rajesh.rohit04@gmail.com>
Date: Fri, 8 Jul 2022 20:13:12 +0530
Subject: [PATCH 25/28] bug fixes

---
 src/subpixel/ml/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/subpixel/ml/utils.py b/src/subpixel/ml/utils.py
index c1882ce..4e5c641 100644
--- a/src/subpixel/ml/utils.py
+++ b/src/subpixel/ml/utils.py
@@ -138,7 +138,7 @@ def fill_nan_with_mean(df):
     """
 
     for col in df.columns:
-        df[col] = df[col].fillna(df[col].mean())
+        df[col] = df[col].fillna(get_mean(df, col))
     return df
 
 
@@ -366,4 +366,4 @@ def get_statistics(df, cols=False, save= False):
         with open("stats.json", "w") as f:
             json.dump(stats, f)
 
-    return stats
+    return stats
\ No newline at end of file

From 441b1b23e98be384fee80e61b2ef2d0456aa35ee Mon Sep 17 00:00:00 2001
From: Rohit R <rajesh.rohit04@gmail.com>
Date: Fri, 8 Jul 2022 22:25:07 +0530
Subject: [PATCH 26/28] to do

---
 src/subpixel/ml/utils.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/subpixel/ml/utils.py b/src/subpixel/ml/utils.py
index 4e5c641..cb32912 100644
--- a/src/subpixel/ml/utils.py
+++ b/src/subpixel/ml/utils.py
@@ -7,6 +7,9 @@
 import json
 
 
+# Make function to find if a column has classification type values that are not numeric, if True get_dummies. If False, do nothing.
+
+
 def accuracy(y_true, y_pred):
     """
     Function that finds the accuracy of a model based on the true and predicted values.
@@ -67,7 +70,7 @@ def find_outliers(df, cols=False):
 
     outlier_idx = list(set(outlier_idx))
     df = df.drop(index=outlier_idx)
-    
+
     return outlier_idx, df
 
 
@@ -339,7 +342,7 @@ def get_count_of_unique_values(df, col):
     return df[col].nunique()
 
 
-def get_statistics(df, cols=False, save= False):
+def get_statistics(df, cols=False, save=False):
     """
     Gets the statistics of the dataframe.
     :param df: Dataframe
@@ -366,4 +369,4 @@ def get_statistics(df, cols=False, save= False):
         with open("stats.json", "w") as f:
             json.dump(stats, f)
 
-    return stats
\ No newline at end of file
+    return stats

From 964e9f66733755ff0ae23cddff37e2062126dc80 Mon Sep 17 00:00:00 2001
From: Rohit R <rajesh.rohit04@gmail.com>
Date: Tue, 12 Jul 2022 16:39:54 +0530
Subject: [PATCH 27/28] tabtransformer

---
 src/subpixel/ml/model.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/subpixel/ml/model.py b/src/subpixel/ml/model.py
index cc94d6b..9e9e7b3 100644
--- a/src/subpixel/ml/model.py
+++ b/src/subpixel/ml/model.py
@@ -2,6 +2,8 @@
 import torch.nn as nn
 from tab_transformer_pytorch import TabTransformer
 
+# https://github.com/lucidrains/tab-transformer-pytorch
+
 
 cont_mean_std = torch.randn(10, 2)
 

From 39213b654431a5edf3c192a0634e6f23cce70d50 Mon Sep 17 00:00:00 2001
From: Rohit R <rajesh.rohit04@gmail.com>
Date: Tue, 19 Jul 2022 09:33:53 +0530
Subject: [PATCH 28/28] testing pull requests in github.

---
 src/subpixel/vision/train.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/subpixel/vision/train.py b/src/subpixel/vision/train.py
index 06c2b40..9d88ada 100644
--- a/src/subpixel/vision/train.py
+++ b/src/subpixel/vision/train.py
@@ -241,3 +241,4 @@ def evaluate(self, test_path):
 
         return sum(losses) / len(losses)
 
+# test pull request. 
\ No newline at end of file