diff --git a/best_qubit_model_weights.pt b/best_qubit_model_weights.pt
new file mode 100644
index 0000000..c667275
Binary files /dev/null and b/best_qubit_model_weights.pt differ
diff --git a/experiment_log.txt b/experiment_log.txt
new file mode 100644
index 0000000..fff8206
--- /dev/null
+++ b/experiment_log.txt
@@ -0,0 +1,522 @@
+Model: BestQubitModel(
+  (fc1): Linear(in_features=32, out_features=128, bias=True)
+  (hidden_layers_list): ModuleList(
+    (0-2): 3 x Linear(in_features=128, out_features=128, bias=True)
+  )
+  (fc_out): Linear(in_features=128, out_features=16, bias=True)
+  (dropout): Dropout(p=0.3, inplace=False)
+)
+Number of hidden layers: 4
+Optimizer: Adam (
+Parameter Group 0
+    amsgrad: False
+    betas: (0.9, 0.999)
+    capturable: False
+    differentiable: False
+    eps: 1e-08
+    foreach: None
+    fused: None
+    lr: 0.0001
+    maximize: False
+    weight_decay: 0
+)
+Number of epochs: 4709
+Patience: 1000
+Best training loss: 0.0870
+Best validation loss: 0.2626
+
+================================================================================
+
+Model: BestQubitModel(
+  (fc1): Linear(in_features=32, out_features=128, bias=True)
+  (hidden_layers_list): ModuleList(
+    (0-2): 3 x Linear(in_features=128, out_features=128, bias=True)
+  )
+  (fc_out): Linear(in_features=128, out_features=16, bias=True)
+  (dropout): Dropout(p=0.3, inplace=False)
+)
+Number of hidden layers: 4
+Optimizer: Adam (
+Parameter Group 0
+    amsgrad: False
+    betas: (0.9, 0.999)
+    capturable: False
+    differentiable: False
+    eps: 1e-08
+    foreach: None
+    fused: None
+    lr: 0.0001
+    maximize: False
+    weight_decay: 0
+)
+Number of epochs: 3968
+Patience: 1000
+Best training loss: 0.1179
+Best validation loss: 0.2706
+
+================================================================================
+
+Model: BestQubitModel(
+  (fc1): Linear(in_features=32, out_features=128, bias=True)
+  (hidden_layers_list): ModuleList(
+    (0-2): 3 x Linear(in_features=128, out_features=128, bias=True)
+  )
+  (fc_out): Linear(in_features=128, out_features=16, bias=True)
+  (dropout): Dropout(p=0.3, inplace=False)
+)
+Number of hidden layers: 4
+Optimizer: Adam (
+Parameter Group 0
+    amsgrad: False
+    betas: (0.9, 0.999)
+    capturable: False
+    differentiable: False
+    eps: 1e-08
+    foreach: None
+    fused: None
+    lr: 0.0001
+    maximize: False
+    weight_decay: 0.0001
+)
+Number of epochs: 4204
+Patience: 1000
+Best training loss: 0.0967
+Best validation loss: 0.2644
+
+================================================================================
+
+Model: BestQubitModel(
+  (fc1): Linear(in_features=32, out_features=128, bias=True)
+  (hidden_layers_list): ModuleList(
+    (0-2): 3 x Linear(in_features=128, out_features=128, bias=True)
+  )
+  (fc_out): Linear(in_features=128, out_features=16, bias=True)
+  (dropout): Dropout(p=0.3, inplace=False)
+)
+Number of hidden layers: 4
+Optimizer: Adam (
+Parameter Group 0
+    amsgrad: False
+    betas: (0.9, 0.999)
+    capturable: False
+    differentiable: False
+    eps: 1e-08
+    foreach: None
+    fused: None
+    lr: 0.0001
+    maximize: False
+    weight_decay: 0.0001
+)
+Number of epochs: 3671
+Patience: 1000
+Best training loss: 0.1028
+Best validation loss: 0.2579
+
+================================================================================
+
+Model: BestQubitModel(
+  (fc1): Linear(in_features=32, out_features=128, bias=True)
+  (hidden_layers_list): ModuleList(
+    (0-2): 3 x Linear(in_features=128, out_features=128, bias=True)
+  )
+  (fc_out): Linear(in_features=128, out_features=16, bias=True)
+  (dropout): Dropout(p=0.3, inplace=False)
+)
+Number of hidden layers: 4
+Optimizer: Adam (
+Parameter Group 0
+    amsgrad: False
+    betas: (0.9, 0.999)
+    capturable: False
+    differentiable: False
+    eps: 1e-08
+    foreach: None
+    fused: None
+    lr: 0.0001
+    maximize: False
+    weight_decay: 0.0001
+)
+Number of epochs: 4903
+Patience: 1000
+Best training loss: 0.1027
+Best validation loss: 0.2825
+
+================================================================================
+
+Model: BestQubitModel(
+  (fc1): Linear(in_features=32, out_features=128, bias=True)
+  (hidden_layers_list): ModuleList(
+    (0-2): 3 x Linear(in_features=128, out_features=128, bias=True)
+  )
+  (fc_out): Linear(in_features=128, out_features=16, bias=True)
+  (dropout): Dropout(p=0.3, inplace=False)
+)
+Number of hidden layers: 4
+Optimizer: AdamW (
+Parameter Group 0
+    amsgrad: False
+    betas: (0.9, 0.999)
+    capturable: False
+    differentiable: False
+    eps: 1e-08
+    foreach: None
+    fused: None
+    lr: 0.0001
+    maximize: False
+    weight_decay: 0.0001
+)
+Number of epochs: 2022
+Patience: 1000
+Best training loss: 0.1695
+Best validation loss: 0.3072
+
+================================================================================
+
+Model: BestQubitModel(
+  (fc1): Linear(in_features=32, out_features=128, bias=True)
+  (hidden_layers_list): ModuleList(
+    (0-2): 3 x Linear(in_features=128, out_features=128, bias=True)
+  )
+  (fc_out): Linear(in_features=128, out_features=16, bias=True)
+  (dropout): Dropout(p=0.3, inplace=False)
+)
+Number of hidden layers: 4
+Optimizer: AdamW (
+Parameter Group 0
+    amsgrad: False
+    betas: (0.9, 0.999)
+    capturable: False
+    differentiable: False
+    eps: 1e-08
+    foreach: None
+    fused: None
+    lr: 0.0001
+    maximize: False
+    weight_decay: 0.0001
+)
+Number of epochs: 3256
+Patience: 1000
+Best training loss: 0.1342
+Best validation loss: 0.2927
+
+================================================================================
+
+Model: BestQubitModel(
+  (fc1): Linear(in_features=32, out_features=128, bias=True)
+  (hidden_layers_list): ModuleList(
+    (0-2): 3 x Linear(in_features=128, out_features=128, bias=True)
+  )
+  (fc_out): Linear(in_features=128, out_features=16, bias=True)
+  (dropout): Dropout(p=0.3, inplace=False)
+)
+Number of hidden layers: 4
+Optimizer: AdamW (
+Parameter Group 0
+    amsgrad: False
+    betas: (0.9, 0.999)
+    capturable: False
+    differentiable: False
+    eps: 1e-08
+    foreach: None
+    fused: None
+    lr: 0.0001
+    maximize: False
+    weight_decay: 0.0001
+)
+Number of epochs: 4499
+Patience: 1000
+Best training loss: 0.0847
+Best validation loss: 0.2784
+
+================================================================================
+
+Model: BestQubitModel(
+  (fc1): Linear(in_features=32, out_features=128, bias=True)
+  (hidden_layers_list): ModuleList(
+    (0-2): 3 x Linear(in_features=128, out_features=128, bias=True)
+  )
+  (fc_out): Linear(in_features=128, out_features=16, bias=True)
+  (dropout): Dropout(p=0.3, inplace=False)
+)
+Number of hidden layers: 4
+Optimizer: AdamW (
+Parameter Group 0
+    amsgrad: False
+    betas: (0.9, 0.999)
+    capturable: False
+    differentiable: False
+    eps: 1e-08
+    foreach: None
+    fused: None
+    lr: 0.0001
+    maximize: False
+    weight_decay: 0.0001
+)
+Number of epochs: 4616
+Patience: 1000
+Best training loss: 0.0907
+Best validation loss: 0.2940
+
+================================================================================
+
+Model: BestQubitModel(
+  (fc1): Linear(in_features=32, out_features=128, bias=True)
+  (hidden_layers_list): ModuleList(
+    (0-2): 3 x Linear(in_features=128, out_features=128, bias=True)
+  )
+  (fc_out): Linear(in_features=128, out_features=16, bias=True)
+  (dropout): Dropout(p=0.3, inplace=False)
+)
+Number of hidden layers: 4
+Optimizer: AdamW (
+Parameter Group 0
+    amsgrad: False
+    betas: (0.9, 0.999)
+    capturable: False
+    differentiable: False
+    eps: 1e-08
+    foreach: None
+    fused: None
+    lr: 0.0001
+    maximize: False
+    weight_decay: 0.0001
+)
+Number of epochs: 4802
+Patience: 1000
+Best training loss: 0.1008
+Best validation loss: 0.2530
+
+================================================================================
+
+Model: BestQubitModel(
+  (fc1): Linear(in_features=32, out_features=128, bias=True)
+  (hidden_layers_list): ModuleList(
+    (0-2): 3 x Linear(in_features=128, out_features=128, bias=True)
+  )
+  (fc_out): Linear(in_features=128, out_features=16, bias=True)
+  (dropout): Dropout(p=0.3, inplace=False)
+)
+Number of hidden layers: 4
+Optimizer: AdamW (
+Parameter Group 0
+    amsgrad: False
+    betas: (0.9, 0.999)
+    capturable: False
+    differentiable: False
+    eps: 1e-08
+    foreach: None
+    fused: None
+    lr: 0.0001
+    maximize: False
+    weight_decay: 0.0001
+)
+Number of epochs: 6218
+Patience: 1000
+Best training loss: 0.1074
+Best validation loss: 0.2965
+
+================================================================================
+
+Model: BestQubitModel(
+  (fc1): Linear(in_features=32, out_features=128, bias=True)
+  (hidden_layers_list): ModuleList(
+    (0-2): 3 x Linear(in_features=128, out_features=128, bias=True)
+  )
+  (fc_out): Linear(in_features=128, out_features=16, bias=True)
+  (dropout): Dropout(p=0.3, inplace=False)
+)
+Number of hidden layers: 4
+Optimizer: AdamW (
+Parameter Group 0
+    amsgrad: False
+    betas: (0.9, 0.999)
+    capturable: False
+    differentiable: False
+    eps: 1e-08
+    foreach: None
+    fused: None
+    lr: 0.0001
+    maximize: False
+    weight_decay: 0.0001
+)
+Number of epochs: 2157
+Patience: 1000
+Best training loss: 0.8267
+Best validation loss: 0.7643
+
+================================================================================
+
+Model: BestQubitModel(
+  (fc1): Linear(in_features=32, out_features=128, bias=True)
+  (hidden_layers_list): ModuleList(
+    (0-2): 3 x Linear(in_features=128, out_features=128, bias=True)
+  )
+  (fc_out): Linear(in_features=128, out_features=16, bias=True)
+  (dropout): Dropout(p=0.3, inplace=False)
+)
+Number of hidden layers: 4
+Optimizer: AdamW (
+Parameter Group 0
+    amsgrad: False
+    betas: (0.9, 0.999)
+    capturable: False
+    differentiable: False
+    eps: 1e-08
+    foreach: None
+    fused: None
+    lr: 0.0001
+    maximize: False
+    weight_decay: 0.0001
+)
+Number of epochs: 2174
+Patience: 1000
+Best training loss: 0.8120
+Best validation loss: 0.7643
+
+================================================================================
+
+Model: BestQubitModel(
+  (fc1): Linear(in_features=32, out_features=128, bias=True)
+  (hidden_layers_list): ModuleList(
+    (0-2): 3 x Linear(in_features=128, out_features=128, bias=True)
+  )
+  (fc_out): Linear(in_features=128, out_features=16, bias=True)
+  (dropout): Dropout(p=0.3, inplace=False)
+)
+Number of hidden layers: 4
+Optimizer: AdamW (
+Parameter Group 0
+    amsgrad: False
+    betas: (0.9, 0.999)
+    capturable: False
+    differentiable: False
+    eps: 1e-08
+    foreach: None
+    fused: None
+    lr: 0.0001
+    maximize: False
+    weight_decay: 0.0001
+)
+Number of epochs: 1321
+Patience: 1000
+Best training loss: 0.6876
+Best validation loss: 0.8739
+
+================================================================================
+
+Model: BestQubitModel(
+  (fc1): Linear(in_features=32, out_features=128, bias=True)
+  (hidden_layers_list): ModuleList(
+    (0-8): 9 x Linear(in_features=128, out_features=128, bias=True)
+  )
+  (fc_out): Linear(in_features=128, out_features=16, bias=True)
+  (dropout): Dropout(p=0.5, inplace=False)
+)
+Number of hidden layers: 10
+Optimizer: AdamW (
+Parameter Group 0
+    amsgrad: False
+    betas: (0.9, 0.999)
+    capturable: False
+    differentiable: False
+    eps: 1e-08
+    foreach: None
+    fused: None
+    lr: 0.001
+    maximize: False
+    weight_decay: 0.0001
+)
+Number of epochs: 1327
+Patience: 1000
+Best training loss: 0.7197
+Best validation loss: 0.7976
+
+================================================================================
+
+Model: BestQubitModel(
+  (fc1): Linear(in_features=32, out_features=128, bias=True)
+  (hidden_layers_list): ModuleList(
+    (0-8): 9 x Linear(in_features=128, out_features=128, bias=True)
+  )
+  (fc_out): Linear(in_features=128, out_features=16, bias=True)
+  (dropout): Dropout(p=0.5, inplace=False)
+)
+Number of hidden layers: 10
+Optimizer: AdamW (
+Parameter Group 0
+    amsgrad: False
+    betas: (0.9, 0.999)
+    capturable: False
+    differentiable: False
+    eps: 1e-08
+    foreach: None
+    fused: None
+    lr: 0.001
+    maximize: False
+    weight_decay: 0.0001
+)
+Number of epochs: 1521
+Patience: 1000
+Best training loss: 0.4948
+Best validation loss: 0.5179
+
+================================================================================
+
+Model: BestQubitModel(
+  (fc1): Linear(in_features=32, out_features=64, bias=True)
+  (hidden_layers_list): ModuleList(
+    (0): Linear(in_features=64, out_features=64, bias=True)
+  )
+  (fc_out): Linear(in_features=64, out_features=16, bias=True)
+  (dropout): Dropout(p=0.5, inplace=False)
+)
+Number of hidden layers: 2
+Optimizer: Adam (
+Parameter Group 0
+    amsgrad: False
+    betas: (0.9, 0.999)
+    capturable: False
+    differentiable: False
+    eps: 1e-08
+    foreach: None
+    fused: None
+    lr: 0.0001
+    maximize: False
+    weight_decay: 0.0001
+)
+Number of epochs: 5323
+Patience: 1000
+Best training loss: 0.2356
+Best validation loss: 0.3459
+
+================================================================================
+
+Model: BestQubitModel(
+  (fc1): Linear(in_features=32, out_features=128, bias=True)
+  (hidden_layers_list): ModuleList(
+    (0-1): 2 x Linear(in_features=128, out_features=128, bias=True)
+  )
+  (fc_out): Linear(in_features=128, out_features=16, bias=True)
+  (dropout): Dropout(p=0.5, inplace=False)
+)
+Number of hidden layers: 3
+Optimizer: Adam (
+Parameter Group 0
+    amsgrad: False
+    betas: (0.9, 0.999)
+    capturable: False
+    differentiable: False
+    eps: 1e-08
+    foreach: None
+    fused: None
+    lr: 0.0001
+    maximize: False
+    weight_decay: 0.0001
+)
+Number of epochs: 5157
+Patience: 1000
+Best training loss: 0.2217
+Best validation loss: 0.3809
+
+================================================================================
+
diff --git a/src/nn/best_qubit_model.py b/src/nn/best_qubit_model.py
new file mode 100644
index 0000000..166ac98
--- /dev/null
+++ b/src/nn/best_qubit_model.py
@@ -0,0 +1,45 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+class BestQubitModel(nn.Module):
+    def __init__(self, n_size=4, hidden_layers=3, hidden_size=128, dropout_rate=0.5):
+        super(BestQubitModel, self).__init__()
+        input_size = 2 * n_size * n_size  # 32 features
+        output_size = n_size * n_size     # 16 outputs
+
+        self.n_size = n_size
+        self.hidden_layers = hidden_layers
+        self.dropout_rate = dropout_rate
+
+        # Input layer
+        self.fc1 = nn.Linear(input_size, hidden_size)
+        
+        # Hidden layers
+        self.hidden_layers_list = nn.ModuleList()
+        for _ in range(hidden_layers - 1):
+            self.hidden_layers_list.append(nn.Linear(hidden_size, hidden_size))
+        
+        # Output layer
+        self.fc_out = nn.Linear(hidden_size, output_size)
+        
+        self.dropout = nn.Dropout(p=dropout_rate)  # Dropout layer with adjustable dropout rat
+
+    def forward(self, x):
+        # Take only the first two channels: shape becomes [batch, 2, n_size, n_size]
+        x = x[:, :2, :, :]
+        # Flatten to [batch, 2*n_size*n_size]
+        x = x.view(x.size(0), -1)
+        
+        x = F.relu(self.fc1(x))
+        x = self.dropout(x)  # Apply dropout
+        
+        for layer in self.hidden_layers_list:
+            x = F.relu(layer(x))
+            x = self.dropout(x)  # Apply dropout
+        
+        x = self.fc_out(x)
+        
+        # Reshape output to match target shape: [batch, 1, n_size, n_size]
+        x = x.view(-1, 1, self.n_size, self.n_size)
+        return x
\ No newline at end of file
diff --git a/src/nn/brute_force_data.py b/src/nn/brute_force_data.py
index 940fbe6..5059664 100644
--- a/src/nn/brute_force_data.py
+++ b/src/nn/brute_force_data.py
@@ -191,7 +191,7 @@ def apply(gate_name: str, gate_data: tuple) -> None:
 
 def generate_dataset_ct(nr_samples: int, qubits: List[int],
                         gates: List[int],
-                        topo_factory: Callable[[int], Topology] = None, labels_as_described:bool = False, preprocessing_type: PreprocessingType = PreprocessingType.ORIGINAL) -> Tuple[torch.Tensor, torch.Tensor]:
+                        topo_factory: Callable[[int], Topology] = None, labels_as_described:bool = True, preprocessing_type: PreprocessingType = PreprocessingType.ORIGINAL) -> Tuple[torch.Tensor, torch.Tensor]:
     """
     Generate a dataset defined by labels_as_described and preprocessing_type.
     
diff --git a/src/nn/nn_train_main.py b/src/nn/nn_train_main.py
new file mode 100644
index 0000000..18ba02b
--- /dev/null
+++ b/src/nn/nn_train_main.py
@@ -0,0 +1,204 @@
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import torch.nn.functional as F  # Import torch.nn.functional as F
+from torch.utils.data import DataLoader, TensorDataset
+from src.nn.best_qubit_model import BestQubitModel
+
+def load_data(train_path, val_path):
+    """
+    Loads the training and validation data.
+    
+    Args:
+        train_path (str): Path to the training data file.
+        val_path (str): Path to the validation data file.
+    
+    Returns:
+        (Tensor, Tensor, Tensor, Tensor): X_train, y_train, X_val, y_val
+    """
+    train_data = torch.load(train_path)
+    val_data = torch.load(val_path)
+    X_train, y_train = train_data
+    X_val, y_val = val_data
+    return X_train, y_train, X_val, y_val
+
+def create_dataloaders(X_train, y_train, batch_size=32):
+    """
+    Creates the training DataLoader.
+    
+    Args:
+        X_train (Tensor): Training inputs
+        y_train (Tensor): Training targets
+        batch_size (int): Batch size for DataLoader
+    
+    Returns:
+        DataLoader: A DataLoader for training data
+    """
+    train_dataset = TensorDataset(X_train, y_train)
+    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
+    return train_loader
+
+def save_loss_plot(train_losses, val_losses, filename="training_loss.png"):
+    """
+    Saves the training and validation loss plot as a PNG file.
+
+    Args:
+        train_losses (list of float): List containing the training loss value per epoch.
+        val_losses (list of float): List containing the validation loss value per epoch.
+        filename (str): Filename for the saved plot (default: training_loss.png).
+    """
+    import matplotlib.pyplot as plt
+    plt.figure(figsize=(10, 6))
+    plt.plot(train_losses, label="Training Loss")
+    plt.plot(val_losses, label="Validation Loss")
+    plt.xlabel("Epoch")
+    plt.ylabel("Loss")
+    plt.title("Training and Validation Loss Progression")
+    plt.legend()
+    plt.grid(True)
+    plt.savefig(filename)
+    plt.close()
+
+def log_experiment_details(filename, model, optimizer, best_train_loss, best_val_loss, n_epochs, patience):
+    """
+    Logs the experiment details to a text file.
+
+    Args:
+        filename (str): Path to the log file.
+        model (nn.Module): The model being trained.
+        optimizer (torch.optim.Optimizer): The optimizer used for training.
+        best_train_loss (float): The best training loss achieved.
+        best_val_loss (float): The best validation loss achieved.
+        n_epochs (int): Number of epochs the model was trained for.
+        patience (int): Patience for early stopping.
+    """
+    with open(filename, 'a') as f:
+        f.write(f"Model: {model}\n")
+        f.write(f"Number of hidden layers: {model.hidden_layers}\n")
+        f.write(f"Optimizer: {optimizer}\n")
+        f.write(f"Number of epochs: {n_epochs}\n")
+        f.write(f"Patience: {patience}\n")
+        f.write(f"Best training loss: {best_train_loss:.4f}\n")
+        f.write(f"Best validation loss: {best_val_loss:.4f}\n")
+        f.write("\n" + "="*80 + "\n\n")
+
+def custom_loss(output, target):
+    mse_loss = nn.MSELoss()(output, target)
+    penalty = torch.sum(F.relu(-output))  # Penalize negative values
+    return mse_loss + penalty
+
+
+def train_model(model, train_loader, criterion, optimizer, X_train, y_train, X_val, y_val, n_epochs=30000, verbose=True, patience=1000, log_file="experiment_log.txt"):
+    """
+    Main training loop for the model.
+    
+    Args:
+        model (nn.Module): Neural network model.
+        train_loader (DataLoader): DataLoader for training data.
+        criterion (nn.Module): Loss function.
+        optimizer (torch.optim.Optimizer): Optimizer for training.
+        X_train (Tensor): Training inputs for occasional sample prediction.
+        y_train (Tensor): Training targets for occasional sample comparison.
+        X_val (Tensor): Validation inputs.
+        y_val (Tensor): Validation targets.
+        n_epochs (int): Number of epochs to train.
+        verbose (bool): If True, prints updates to terminal.
+        log_file (str): Path to the log file.
+    
+    Returns:
+        None.
+    """
+    train_losses = []
+    val_losses = []
+    best_train_loss = float('inf')
+    best_val_loss = float('inf')
+    epochs_no_improve = 0  # Counter for early stopping
+
+    for epoch in range(n_epochs):
+        model.train()
+        total_loss = 0
+        
+        for batch_X, batch_y in train_loader:
+            optimizer.zero_grad()
+            outputs = model(batch_X)
+            loss = criterion(outputs, batch_y)
+            loss.backward()
+            optimizer.step()
+            total_loss += loss.item()
+
+        # Compute average training loss for this epoch
+        avg_loss = total_loss / len(train_loader)
+        train_losses.append(avg_loss)
+        
+        # Evaluate on validation set
+        model.eval()
+        with torch.no_grad():
+            val_outputs = model(X_val)
+            val_loss = criterion(val_outputs, y_val).item()
+        val_losses.append(val_loss)
+
+        if verbose and epoch % 10 == 0:
+            current_lr = optimizer.param_groups[0]['lr']
+            print(f'Epoch {epoch}, Training Loss: {avg_loss:.4f}, '
+                  f'Validation Loss: {val_loss:.4f}, LR: {current_lr:.6f}')
+            with torch.no_grad():
+                for i in range(2):  # Print predictions for the first i examples
+                    test_input = X_train[i:i+1]
+                    pred = model(test_input)
+                    print(f"Example {i+1} - Predicted values:")
+                    print(pred[0, 0])
+                    print(f"Example {i+1} - Actual values:")
+                    print(y_train[i, 0])
+
+        # Save only the best model so far and check early stopping
+        if val_loss < best_val_loss:
+            best_val_loss = val_loss
+            best_train_loss = avg_loss
+            torch.save(model.state_dict(), "best_qubit_model_weights.pt")
+            epochs_no_improve = 0
+            
+        else:
+            epochs_no_improve += 1
+
+        if epochs_no_improve >= patience:
+            print(f"Early stopping triggered after {epoch} epochs.")
+            break
+
+    save_loss_plot(train_losses, val_losses)
+    model.load_state_dict(torch.load("best_qubit_model_weights.pt"))
+
+    # Log experiment details when a new best validation loss is achieved
+    log_experiment_details(log_file, model, optimizer, best_train_loss, best_val_loss, epoch, patience)
+
+def main():
+    # File paths
+    train_path = 'train_data_True_from_project_description.pt'
+    val_path = 'val_data_True_from_project_description.pt'
+    
+    # Load data
+    X_train, y_train, X_val, y_val = load_data(train_path, val_path)
+    
+    # Create model, criterion, optimizer
+    model = BestQubitModel(n_size=4, hidden_layers=3, hidden_size=128, dropout_rate=0.5)
+    criterion = custom_loss  # Use the custom loss function
+    optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-4)
+    
+    # Create data loader
+    train_loader = create_dataloaders(X_train, y_train, batch_size=32)
+    
+    # Train model with validation
+    train_model(
+        model=model,
+        train_loader=train_loader,
+        criterion=criterion,
+        optimizer=optimizer,
+        X_train=X_train,
+        y_train=y_train,
+        X_val=X_val,
+        y_val=y_val,
+        n_epochs=20000,
+        verbose=True
+    )
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/src/nn_eval_main.py b/src/nn_eval_main.py
index 05ba411..27546c6 100644
--- a/src/nn_eval_main.py
+++ b/src/nn_eval_main.py
@@ -2,6 +2,7 @@
 import warnings
 from typing import List
 
+import torch
 import numpy as np
 import pandas as pd
 from pauliopt.circuits import Circuit
@@ -12,6 +13,10 @@
 from src.nn.brute_force_data import get_best_cnots
 from src.utils import random_hscx_circuit, tableau_from_circuit
 
+from src.nn.best_qubit_model import BestQubitModel
+from src.nn.preprocess_data import PREPROCESSING_SCRIPTS, PreprocessingType
+
+
 # Suppress all overflow warnings globally
 np.seterr(over='ignore')
 
@@ -88,6 +93,61 @@ def pick_pivot_callback(G, remaining: "CliffordTableau", remaining_rows: List[in
     return {"n_rep": n_rep} | collect_circuit_data(circ_out) | {"method": "optimum"}
 
 
+def nn_compilation(circuit: Circuit, topology: Topology, n_rep: int):
+    """
+    Compilation using the trained neural network to infer the best pivot qubit.
+    """
+    model = BestQubitModel()
+    model.load_state_dict(torch.load("best_qubit_model_weights.pt"))
+    model.eval()
+
+    # Prepare the Clifford tableau from the circuit
+    clifford_tableau = CliffordTableau(circuit.n_qubits)
+    clifford_tableau = tableau_from_circuit(clifford_tableau, circuit)
+
+    # Ensure matrices are numpy arrays with the expected shape (n_qubits x n_qubits)
+    n_qubits = circuit.n_qubits
+
+    # Reshape x_mat and z_mat to (n, n)
+    x_mat = np.array(clifford_tableau.x_matrix).reshape(n_qubits, n_qubits)
+    z_mat = np.array(clifford_tableau.z_matrix).reshape(n_qubits, n_qubits)
+
+    # Create an input tensor of shape [1, 3, n, n]
+    input_tensor = torch.zeros(1, 3, n_qubits, n_qubits, dtype=torch.float32)
+    input_tensor[0, 0] = torch.tensor(x_mat, dtype=torch.float32)
+    input_tensor[0, 1] = torch.tensor(z_mat, dtype=torch.float32)
+    # The third channel remains zero (or filled as needed)
+
+    with torch.no_grad():
+        output = model(input_tensor)
+        output = torch.round(output).int().numpy()
+
+    # Ensure the output matrix has the expected shape (n_qubits x n_qubits)
+    output = output.reshape(n_qubits, n_qubits)
+    print(output)
+    
+    # Use a large integer value to represent infinity
+    int_inf = np.iinfo(np.int32).max
+
+    # Collect row and column combinations based on the lowest values
+    combinations = []
+    while not np.all(output == int_inf):
+        min_index = np.unravel_index(np.argmin(output, axis=None), output.shape) #note picks the first occurence in ties
+        print(min_index)
+        combinations.append(min_index)
+        output[min_index[0], :] = int_inf
+        output[:, min_index[1]] = int_inf
+
+    combination_iterator = iter(combinations)
+
+    def pick_pivot_callback(G, remaining: "CliffordTableau", remaining_rows: List[int], choice_fn=min):
+        row, col = next(combination_iterator)
+        return row, col
+    
+
+    circ_out = synthesize_tableau_perm_row_col(clifford_tableau, topology, pick_pivot_callback=pick_pivot_callback)
+    return {"n_rep": n_rep} | collect_circuit_data(circ_out) | {"method": "nn"}
+
 
 def main(n_qubits: int = 4, nr_gates: int = 1000):
     """
@@ -100,23 +160,47 @@ def main(n_qubits: int = 4, nr_gates: int = 1000):
     df = pd.DataFrame(columns=["n_rep", "num_qubits", "method", "h", "s", "cx", "depth"])
     topo = Topology.complete(n_qubits)
     for i in range(20):
-        print(i)
         circuit = random_hscx_circuit(nr_qubits=n_qubits, nr_gates=nr_gates)
 
+        # Our compilation e.g. the baseline from the paper
         df_dictionary = pd.DataFrame([our_compilation(circuit.copy(), topo, i)])
         df = pd.concat([df, df_dictionary], ignore_index=True)
         print("Min", df_dictionary["cx"])
+
+        # Optimal compilation
         df_dictionary = pd.DataFrame([optimal_compilation(circuit.copy(), topo, i)])
         df = pd.concat([df, df_dictionary], ignore_index=True)
         print("OPTIMUM", df_dictionary["cx"])
 
+        # Random compilation
         df_dictionary = pd.DataFrame([random_compilation(circuit.copy(), topo, i)])
         df = pd.concat([df, df_dictionary], ignore_index=True)
         print("Random", df_dictionary["cx"])
 
+        # Group's first ANN compilation
+        df_dictionary = pd.DataFrame([nn_compilation(circuit.copy(), topo, i)])
+        df = pd.concat([df, df_dictionary], ignore_index=True)
+        print("NN", df_dictionary["cx"])
+
+    # Convert the cx column to a numerical type
+    df["cx"] = pd.to_numeric(df["cx"])
+
     df.to_csv("test_clifford_synthesis.csv", index=False)
     print(df.groupby("method").mean())
 
+    # Is the difference just luck?
+    from scipy.stats import ttest_ind
+
+    nn_cx_values = df[df["method"] == "nn"]["cx"]
+    random_cx_values = df[df["method"] == "random"]["cx"]
+    t_stat, p_value = ttest_ind(nn_cx_values, random_cx_values)
+
+    print(f"T-test results: t-statistic = {t_stat}, p-value = {p_value}")
+    if p_value < 0.05:
+        print("The difference in cx values between nn and random is statistically significant (p < 0.05).")
+    else:
+        print("The difference in cx values between nn and random is not statistically significant (p >= 0.05).")
+
 
 if __name__ == "__main__":
-    main()
+    main()
\ No newline at end of file
diff --git a/test_clifford_synthesis.csv b/test_clifford_synthesis.csv
new file mode 100644
index 0000000..2b7faa5
--- /dev/null
+++ b/test_clifford_synthesis.csv
@@ -0,0 +1,81 @@
+n_rep,num_qubits,method,h,s,cx,depth
+0,4,normal_heuristic,5,13,3,13
+0,4,optimum,5,13,3,13
+0,4,random,7,12,6,12
+0,4,nn,7,14,9,18
+1,4,normal_heuristic,13,18,6,21
+1,4,optimum,11,18,5,16
+1,4,random,11,18,11,21
+1,4,nn,11,18,13,25
+2,4,normal_heuristic,11,13,7,19
+2,4,optimum,7,13,5,13
+2,4,random,5,14,9,20
+2,4,nn,14,18,9,19
+3,4,normal_heuristic,6,10,3,12
+3,4,optimum,6,10,3,11
+3,4,random,9,16,10,23
+3,4,nn,9,18,10,24
+4,4,normal_heuristic,12,15,7,19
+4,4,optimum,10,9,6,12
+4,4,random,8,7,12,17
+4,4,nn,12,15,13,25
+5,4,normal_heuristic,12,15,7,20
+5,4,optimum,8,11,5,16
+5,4,random,10,8,11,18
+5,4,nn,12,16,7,18
+6,4,normal_heuristic,10,11,6,15
+6,4,optimum,7,10,6,12
+6,4,random,11,12,11,19
+6,4,nn,14,20,7,24
+7,4,normal_heuristic,17,20,10,29
+7,4,optimum,11,19,6,19
+7,4,random,9,18,12,21
+7,4,nn,14,17,12,30
+8,4,normal_heuristic,12,20,9,26
+8,4,optimum,7,11,5,15
+8,4,random,10,17,8,20
+8,4,nn,7,13,11,18
+9,4,normal_heuristic,11,17,7,17
+9,4,optimum,11,17,6,16
+9,4,random,13,15,10,21
+9,4,nn,14,14,9,18
+10,4,normal_heuristic,9,12,6,20
+10,4,optimum,8,11,6,17
+10,4,random,13,17,10,25
+10,4,nn,15,17,10,24
+11,4,normal_heuristic,11,17,6,13
+11,4,optimum,13,14,6,20
+11,4,random,10,13,8,20
+11,4,nn,12,15,10,23
+12,4,normal_heuristic,10,11,6,15
+12,4,optimum,12,11,6,16
+12,4,random,11,16,11,19
+12,4,nn,12,11,10,17
+13,4,normal_heuristic,10,16,7,17
+13,4,optimum,6,12,7,16
+13,4,random,10,19,9,24
+13,4,nn,9,14,10,20
+14,4,normal_heuristic,11,19,10,20
+14,4,optimum,11,18,7,20
+14,4,random,12,16,8,24
+14,4,nn,13,21,10,25
+15,4,normal_heuristic,13,18,6,20
+15,4,optimum,13,18,5,18
+15,4,random,11,17,11,22
+15,4,nn,15,15,9,18
+16,4,normal_heuristic,9,14,5,16
+16,4,optimum,11,14,5,19
+16,4,random,14,14,12,21
+16,4,nn,11,12,11,19
+17,4,normal_heuristic,9,16,7,21
+17,4,optimum,9,17,6,17
+17,4,random,15,20,9,21
+17,4,nn,14,16,11,21
+18,4,normal_heuristic,13,17,6,23
+18,4,optimum,9,12,4,14
+18,4,random,9,12,4,14
+18,4,nn,15,16,8,19
+19,4,normal_heuristic,12,18,7,25
+19,4,optimum,10,11,6,13
+19,4,random,15,18,8,26
+19,4,nn,10,14,12,20
diff --git a/test_file.py b/test_file.py
new file mode 100644
index 0000000..598a500
--- /dev/null
+++ b/test_file.py
@@ -0,0 +1,38 @@
+import torch
+
+# Load the data from the file
+train_data = torch.load('train_data_True_from_project_description.pt')
+
+# Assuming train_data is a tuple of two tensors
+X, Y = train_data
+
+# Print the shapes of the tensors to understand their structure
+print("Shape of X:", X.shape)
+print("Shape of Y:", Y.shape)
+
+# Print the first example of X and Y to see the actual values
+print("First example of X:", X[0])
+print("First example of Y:", Y[0])
+
+# Flatten the Y tensor to a 1D tensor
+Y_flat = Y.flatten()
+
+# Count the frequency of each value in the Y tensor
+value_counts = torch.bincount(Y_flat.long())
+
+# Print the frequency of each value
+for value, count in enumerate(value_counts):
+    print(f"Value {value}: {count} occurrences")
+
+# Count how many matrices have at least half of the values other than zero
+half_non_zero_count = 0
+total_elements = Y.shape[2] * Y.shape[3]  # Assuming Y has shape [batch_size, channels, height, width]
+half_threshold = total_elements // 2
+total_matrices = Y.shape[0]  # Total number of matrices
+
+for i in range(total_matrices):
+    non_zero_count = torch.count_nonzero(Y[i])
+    if non_zero_count >= half_threshold:
+        half_non_zero_count += 1
+
+print(f"Number of matrices with at least half of the values other than zero: {half_non_zero_count} out of {total_matrices}")
\ No newline at end of file
diff --git a/train_data_False_original.pt b/train_data_False_original.pt
index 3dff485..cc61994 100644
Binary files a/train_data_False_original.pt and b/train_data_False_original.pt differ
diff --git a/training_loss.png b/training_loss.png
new file mode 100644
index 0000000..f21aa65
Binary files /dev/null and b/training_loss.png differ
diff --git a/val_data_False_original.pt b/val_data_False_original.pt
index c46bb5d..a67be22 100644
Binary files a/val_data_False_original.pt and b/val_data_False_original.pt differ