From b7b733dd29e28a26fb13dcbad0a7f08669fed5ca Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 6 Nov 2025 15:32:30 +1000 Subject: [PATCH 01/18] Create README.md --- recognition/AttUNetOASIS/README.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 recognition/AttUNetOASIS/README.md diff --git a/recognition/AttUNetOASIS/README.md b/recognition/AttUNetOASIS/README.md new file mode 100644 index 000000000..999c56626 --- /dev/null +++ b/recognition/AttUNetOASIS/README.md @@ -0,0 +1 @@ +# Attention U-Net for OASIS Brain Tissue Segmentation \ No newline at end of file From 50adc0da9b5991c9d5166b2a610bc0066b325ac1 Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 6 Nov 2025 15:34:21 +1000 Subject: [PATCH 02/18] Create unet.py and implement dataset class Create OASISDataset class to load dataset from files --- recognition/AttUNetOASIS/unet.py | 73 ++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 recognition/AttUNetOASIS/unet.py diff --git a/recognition/AttUNetOASIS/unet.py b/recognition/AttUNetOASIS/unet.py new file mode 100644 index 000000000..96786d37f --- /dev/null +++ b/recognition/AttUNetOASIS/unet.py @@ -0,0 +1,73 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.utils.data import Dataset, DataLoader +from PIL import Image +import numpy as np +import os +from pathlib import Path +import time + +device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + +# Data paths +DATA_ROOT = '/home/groups/comp3710/OASIS' +TRAIN_IMG_DIR = f'{DATA_ROOT}/keras_png_slices_train' +TRAIN_SEG_DIR = f'{DATA_ROOT}/keras_png_slices_seg_train' +VAL_IMG_DIR = f'{DATA_ROOT}/keras_png_slices_validate' +VAL_SEG_DIR = f'{DATA_ROOT}/keras_png_slices_seg_validate' +TEST_IMG_DIR = f'{DATA_ROOT}/keras_png_slices_test' +TEST_SEG_DIR = f'{DATA_ROOT}/keras_png_slices_seg_test' + +# Map raw PNG values to class indices. Values retrieved at runtime using PIL. +# Expect 4 unique PNG values -> classes from OASIS dataset. +LABEL_MAP = None + +class OASISDataset(Dataset): + """Custom dataset for OASIS brain MRI slices and segmentation masks""" + + def __init__(self, img_dir, seg_dir, label_map, augment=False): + self.img_dir = img_dir + self.seg_dir = seg_dir + self.label_map = label_map + self.augment = augment + + # Get all image filenames + self.img_files = sorted([f for f in os.listdir(img_dir) if f.endswith('.png')]) + + def __len__(self): + return len(self.img_files) + + def __getitem__(self, idx): + # Load image + img_name = self.img_files[idx] + img_path = os.path.join(self.img_dir, img_name) + img = np.array(Image.open(img_path), dtype=np.float32) + + # Load segmentation mask + seg_name = img_name.replace('case_', 'seg_') + seg_path = os.path.join(self.seg_dir, seg_name) + seg = np.array(Image.open(seg_path), dtype=np.uint8) + + # Map labels: [0, 85, 170, 255] -> [0, 1, 2, 3] + seg_mapped = np.zeros_like(seg) + for old_val, new_val in self.label_map.items(): + seg_mapped[seg == old_val] = new_val + + # Normalize image to [0, 1] + img = img / 255.0 + + # Add channel dimension + img = np.expand_dims(img, axis=0) + + # Convert to tensors + img = torch.from_numpy(img).float() + seg = torch.from_numpy(seg_mapped).long() + + # Basic augmentation + if self.augment: + if torch.rand(1) > 0.5: + img = torch.flip(img, dims=[2]) # Horizontal flip + seg = torch.flip(seg, dims=[1]) + + return img, seg \ No newline at end of file From bd3dc5e16f10f2ccf4d00de7956719ffaf9c7d87 Mon Sep 17 00:00:00 2001 From: Brian Date: Fri, 7 Nov 2025 08:44:58 +1000 Subject: [PATCH 03/18] Add ConvBlock and AttentionBlock classes - Add ConvBlock class to perform double convolution at encoder and decoder stages. - Add AttentionBlock class to filter intermediate-scale features before merging skip connections. --- recognition/AttUNetOASIS/unet.py | 71 +++++++++++++++++++++++++++++++- 1 file changed, 70 insertions(+), 1 deletion(-) diff --git a/recognition/AttUNetOASIS/unet.py b/recognition/AttUNetOASIS/unet.py index 96786d37f..e5e46c0d1 100644 --- a/recognition/AttUNetOASIS/unet.py +++ b/recognition/AttUNetOASIS/unet.py @@ -27,6 +27,14 @@ class OASISDataset(Dataset): """Custom dataset for OASIS brain MRI slices and segmentation masks""" def __init__(self, img_dir, seg_dir, label_map, augment=False): + """ + Dataset constructor + + @param img_dir Directory to load input images from + @param seg_dir Directory to load segmentation masks from + @param label_map Dictionary to maps unique PNG values to class indices + @param augment Whether or not to augment training data (default: faulse) + """ self.img_dir = img_dir self.seg_dir = seg_dir self.label_map = label_map @@ -70,4 +78,65 @@ def __getitem__(self, idx): img = torch.flip(img, dims=[2]) # Horizontal flip seg = torch.flip(seg, dims=[1]) - return img, seg \ No newline at end of file + return img, seg + +class ConvBlock(nn.Module): + """Double convolution block: Conv-BN-ReLU-Conv-BN-ReLU""" + + def __init__(self, in_channels, out_channels): + super(ConvBlock, self).__init__() + self.conv = nn.Sequential( + # First convolution layer + nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, bias=False), + nn.BatchNorm2d(out_channels), # Normalise activations to stabilize training + nn.ReLU(inplace=True), # Nonlinearity for feature learning + + # Second convolution layer + nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, bias=False), + nn.BatchNorm2d(out_channels), + nn.ReLU(inplace=True) + ) + + def forward(self, x): + return self.conv(x) # Apply sequential block to input tensor + +class AttentionBlock(nn.Module): + """Attention gate for focusing on relevant spatial regions in U-Net skip connections""" + + def __init__(self, F_g, F_l, F_int): + super(AttentionBlock, self).__init__() + self.W_g = nn.Sequential( + """ + @param F_g Number of feature channels in the gating signal (from decoder) + @param F_l Number of feature channels in the skip connection (from encoder) + @param F_int Number of intermediate channels for computing attention + """ + nn.Conv2d(F_g, F_int, kernel_size=1, stride=1, padding=0, bias=True), + nn.BatchNorm2d(F_int) + ) + + # 1x1 convolution to project gating signal to intermediate feature space + self.W_x = nn.Sequential( + nn.Conv2d(F_l, F_int, kernel_size=1, stride=1, padding=0, bias=True), + nn.BatchNorm2d(F_int) + ) + + # 1x1 convolution to project encoder features to same intermediate space + self.psi = nn.Sequential( + nn.Conv2d(F_int, 1, kernel_size=1, stride=1, padding=0, bias=True), + nn.BatchNorm2d(1), + nn.Sigmoid() + ) + + self.relu = nn.ReLU(inplace=True) # Nonlinearity for combining features + + def forward(self, g, x): + g1 = self.W_g(g) # Transform decoder input (context) + x1 = self.W_x(x) # Transform encoder input (skip feature) + + # Add projected tensors and apply ReLU + psi = self.relu(g1 + x1) + psi = self.psi(psi) # Apply spatial attention weights + + # Apply attention mask to suppress irrelevant regions + return x * psi # Output tensor shape: (batch, 1, H, W) From b34d65251e6355fc0b81d1cd383a83430eaa447d Mon Sep 17 00:00:00 2001 From: Brian Date: Fri, 7 Nov 2025 09:16:26 +1000 Subject: [PATCH 04/18] Add algorithm description and functional description to README --- recognition/AttUNetOASIS/README.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/recognition/AttUNetOASIS/README.md b/recognition/AttUNetOASIS/README.md index 999c56626..dc3f59db2 100644 --- a/recognition/AttUNetOASIS/README.md +++ b/recognition/AttUNetOASIS/README.md @@ -1 +1,9 @@ -# Attention U-Net for OASIS Brain Tissue Segmentation \ No newline at end of file +# Attention U-Net for OASIS Brain Tissue Segmentation + +## Algorithm Description +This implementation uses an Attention U-Net architecture to perform semantic segmentation of brain MRI scans from the OASIS dataset. It solves the problem of automatic tissue classifcation: that is, given a 2D image of an MRI slice, the model predicts a label for every pixel indicating whether it belongs to the background, cerebrospinal fluid (CSF), gray matter (GM), or white matter (WM). This helps avoid the need to rely traditional methods such as pixel counting and voxel-based morphometry, which are time-consuming and subject to inter-rater variability. + +## How it works +The Attention U-Net is an improved version of the traditional U-Net neural network. In line with its predecessor, Attention U-Net features an encoder-decoder architecture that uses convolution blocks to perform feature extraction, performs hierarchical downsampling and upsampling, and produces a pixelwise segmentation output, with the added benefit of attention mechanisms. In the context of the OASIS dataset, the encoder progressively downsamples the input MRI slices, extracting hierarchical features from low-level edges to high-level anatomical patterns. The decoder symmentrically upsamples these features back to the original resolution. But critically, before joining encoder features via the skip connections, attention gates are used to dynamically highlight regions with brain tissue while suppressing irrelevant background areas. This is particularly useful for brain segmentation where tissue boundaries can be subtle and structures vary in size. + +The network is trained using a combined Dice and Cross-Entropy loss function, which directly optimises the segmentation quality metric while maintaining stable gradients. It also employs data augmentation via horizontal flipping to improve generalization across the anatomical variability present across patients. \ No newline at end of file From 051245d11c6ad551cf8239d1526d4cfc4d6970ea Mon Sep 17 00:00:00 2001 From: Brian Date: Fri, 7 Nov 2025 09:51:20 +1000 Subject: [PATCH 05/18] Update README with Attention U-Net architecture figure Added a figure illustrating the Attention U-Net architecture and its source information. --- recognition/AttUNetOASIS/README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/recognition/AttUNetOASIS/README.md b/recognition/AttUNetOASIS/README.md index dc3f59db2..96f7015ac 100644 --- a/recognition/AttUNetOASIS/README.md +++ b/recognition/AttUNetOASIS/README.md @@ -6,4 +6,7 @@ This implementation uses an Attention U-Net architecture to perform semantic seg ## How it works The Attention U-Net is an improved version of the traditional U-Net neural network. In line with its predecessor, Attention U-Net features an encoder-decoder architecture that uses convolution blocks to perform feature extraction, performs hierarchical downsampling and upsampling, and produces a pixelwise segmentation output, with the added benefit of attention mechanisms. In the context of the OASIS dataset, the encoder progressively downsamples the input MRI slices, extracting hierarchical features from low-level edges to high-level anatomical patterns. The decoder symmentrically upsamples these features back to the original resolution. But critically, before joining encoder features via the skip connections, attention gates are used to dynamically highlight regions with brain tissue while suppressing irrelevant background areas. This is particularly useful for brain segmentation where tissue boundaries can be subtle and structures vary in size. -The network is trained using a combined Dice and Cross-Entropy loss function, which directly optimises the segmentation quality metric while maintaining stable gradients. It also employs data augmentation via horizontal flipping to improve generalization across the anatomical variability present across patients. \ No newline at end of file +The network is trained using a combined Dice and Cross-Entropy loss function, which directly optimises the segmentation quality metric while maintaining stable gradients. It also employs data augmentation via horizontal flipping to improve generalization across the anatomical variability present across patients. + +![Attention U‑Net architecture](https://www.researchgate.net/publication/347344899/figure/fig6/AS:971357475069952@1608601077414/The-architecture-of-Attention-U‑Net‑Attention‑gate‑selects‑features‑by‑using‑the.png) +*Figure 1: Attention U-Net architecture. Source: Hwang et al., 2020. Licensed under CC BY-NC 4.0.* From 8aa65b9ea30f9e3f91d386e5bb4408932562f87b Mon Sep 17 00:00:00 2001 From: Brian Date: Fri, 7 Nov 2025 09:53:24 +1000 Subject: [PATCH 06/18] Add AttentionUNet class to represent full model architecture --- recognition/AttUNetOASIS/unet.py | 83 ++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/recognition/AttUNetOASIS/unet.py b/recognition/AttUNetOASIS/unet.py index e5e46c0d1..104363141 100644 --- a/recognition/AttUNetOASIS/unet.py +++ b/recognition/AttUNetOASIS/unet.py @@ -140,3 +140,86 @@ def forward(self, g, x): # Apply attention mask to suppress irrelevant regions return x * psi # Output tensor shape: (batch, 1, H, W) + +class AttentionUNet(nn.Module): + """ + Attention U-Net for semantic segmentation + Encoder-decoder with attention gates before skip connections + """ + + def __init__(self, in_channels=1, num_classes=4, init_features=64): + super(AttentionUNet, self).__init__() + + features = init_features # Base number of feature maps + + # Encoder (downsampling path) + self.enc1 = ConvBlock(in_channels, features) + self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2) # Downsample by 2x + + self.enc2 = ConvBlock(features, features * 2) + self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2) + + self.enc3 = ConvBlock(features * 2, features * 4) + self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2) + + self.enc4 = ConvBlock(features * 4, features * 8) + self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2) + + # Bottleneck + self.bottleneck = ConvBlock(features * 8, features * 16) # Deepest feature extraction + + # Decoder (upsampling path) with Attention Gates + # At each stage, upsample -> apply attention on encoder feature -> concatenate -> refine + self.upconv4 = nn.ConvTranspose2d(features * 16, features * 8, kernel_size=2, stride=2) + self.att4 = AttentionBlock(F_g=features * 8, F_l=features * 8, F_int=features * 4) + self.dec4 = ConvBlock(features * 16, features * 8) + + self.upconv3 = nn.ConvTranspose2d(features * 8, features * 4, kernel_size=2, stride=2) + self.att3 = AttentionBlock(F_g=features * 4, F_l=features * 4, F_int=features * 2) + self.dec3 = ConvBlock(features * 8, features * 4) + + self.upconv2 = nn.ConvTranspose2d(features * 4, features * 2, kernel_size=2, stride=2) + self.att2 = AttentionBlock(F_g=features * 2, F_l=features * 2, F_int=features) + self.dec2 = ConvBlock(features * 4, features * 2) + + self.upconv1 = nn.ConvTranspose2d(features * 2, features, kernel_size=2, stride=2) + self.att1 = AttentionBlock(F_g=features, F_l=features, F_int=features // 2) + self.dec1 = ConvBlock(features * 2, features) + + # Output layer, 1x1 conv for per-pixel class scores + self.out = nn.Conv2d(features, num_classes, kernel_size=1) + + def forward(self, x): + # Encoder + enc1 = self.enc1(x) + enc2 = self.enc2(self.pool1(enc1)) + enc3 = self.enc3(self.pool2(enc2)) + enc4 = self.enc4(self.pool3(enc3)) + + # Bottleneck + bottleneck = self.bottleneck(self.pool4(enc4)) + + # Decoder with Attention Gates + # At each stage, upsample bottleneck -> apply attention on encoder feature + # -> concatenate filtered encoder feature with decoder -> refine combined features + dec4 = self.upconv4(bottleneck) + enc4_att = self.att4(g=dec4, x=enc4) + dec4 = torch.cat((enc4_att, dec4), dim=1) + dec4 = self.dec4(dec4) + + dec3 = self.upconv3(dec4) + enc3_att = self.att3(g=dec3, x=enc3) + dec3 = torch.cat((enc3_att, dec3), dim=1) + dec3 = self.dec3(dec3) + + dec2 = self.upconv2(dec3) + enc2_att = self.att2(g=dec2, x=enc2) + dec2 = torch.cat((enc2_att, dec2), dim=1) + dec2 = self.dec2(dec2) + + dec1 = self.upconv1(dec2) + enc1_att = self.att1(g=dec1, x=enc1) + dec1 = torch.cat((enc1_att, dec1), dim=1) + dec1 = self.dec1(dec1) + + return self.out(dec1) # per-pixel class predictions From 83e826fd9309b63aedafa24ca7f18bcea7e05848 Mon Sep 17 00:00:00 2001 From: Brian Date: Fri, 7 Nov 2025 10:13:32 +1000 Subject: [PATCH 07/18] Update README.md to include dependencies section --- recognition/AttUNetOASIS/README.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/recognition/AttUNetOASIS/README.md b/recognition/AttUNetOASIS/README.md index 96f7015ac..a61561a17 100644 --- a/recognition/AttUNetOASIS/README.md +++ b/recognition/AttUNetOASIS/README.md @@ -10,3 +10,12 @@ The network is trained using a combined Dice and Cross-Entropy loss function, wh ![Attention U‑Net architecture](https://www.researchgate.net/publication/347344899/figure/fig6/AS:971357475069952@1608601077414/The-architecture-of-Attention-U‑Net‑Attention‑gate‑selects‑features‑by‑using‑the.png) *Figure 1: Attention U-Net architecture. Source: Hwang et al., 2020. Licensed under CC BY-NC 4.0.* + +## Dependencies +Python 3.10.19 was used for this implementation. Packages used and their versions listed below: +| Packages | Version | +| :------- | :------: | +| torch+cu118 | 2.7.1 | +| pillow | 12.0.0 | + +Results are highly reproducible for homogenous data, i.e. MRI scans with unique pixel values for brain regions. From 372e63387909118233e277e0df3b7ac9455d61dc Mon Sep 17 00:00:00 2001 From: Brian Date: Fri, 7 Nov 2025 10:22:41 +1000 Subject: [PATCH 08/18] Implement DiceLoss and CombinedLoss for segmentation Added DiceLoss and CombinedLoss classes for segmentation tasks. --- recognition/AttUNetOASIS/unet.py | 39 +++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/recognition/AttUNetOASIS/unet.py b/recognition/AttUNetOASIS/unet.py index 104363141..435fab3f7 100644 --- a/recognition/AttUNetOASIS/unet.py +++ b/recognition/AttUNetOASIS/unet.py @@ -222,4 +222,41 @@ def forward(self, x): dec1 = torch.cat((enc1_att, dec1), dim=1) dec1 = self.dec1(dec1) - return self.out(dec1) # per-pixel class predictions + return self.out(dec1) # Per-pixel class predictions + +class DiceLoss(nn.Module): + """Dice loss for segmentation""" + + def __init__(self, smooth=1.0): + super(DiceLoss, self).__init__() + self.smooth = smooth # Small constant to avoid division by zero + + def forward(self, pred, target): + # Apply softmax and convert target labels to one-hot encoding + pred = F.softmax(pred, dim=1) + target_one_hot = F.one_hot(target, num_classes=NUM_CLASSES).permute(0, 3, 1, 2).float() + + # Compute per-class intersection over spatial dimensions + intersection = (pred * target_one_hot).sum(dim=(2, 3)) + union = pred.sum(dim=(2, 3)) + target_one_hot.sum(dim=(2, 3)) # Compute per-class union + + # Compute Dice coefficient per class and add smoothing + dice = (2.0 * intersection + self.smooth) / (union + self.smooth) + return 1.0 - dice.mean() + + +class CombinedLoss(nn.Module): + """Combined Cross-Entropy + Dice loss""" + + def __init__(self, weight=None): + super(CombinedLoss, self).__init__() + self.ce_loss = nn.CrossEntropyLoss(weight=weight) # Standard CE loss + self.dice_loss = DiceLoss() # Dice loss for overlap + + def forward(self, pred, target): + # Compute cross-entropy and Dice losses + ce = self.ce_loss(pred, target) + dice = self.dice_loss(pred, target) + + # Combine and return losses + return ce + dice From f502f1414061cae37325563da4b480eef46ce856 Mon Sep 17 00:00:00 2001 From: Brian Date: Fri, 7 Nov 2025 10:33:30 +1000 Subject: [PATCH 09/18] Include training data section in README --- recognition/AttUNetOASIS/README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/recognition/AttUNetOASIS/README.md b/recognition/AttUNetOASIS/README.md index a61561a17..29a9b358a 100644 --- a/recognition/AttUNetOASIS/README.md +++ b/recognition/AttUNetOASIS/README.md @@ -3,7 +3,7 @@ ## Algorithm Description This implementation uses an Attention U-Net architecture to perform semantic segmentation of brain MRI scans from the OASIS dataset. It solves the problem of automatic tissue classifcation: that is, given a 2D image of an MRI slice, the model predicts a label for every pixel indicating whether it belongs to the background, cerebrospinal fluid (CSF), gray matter (GM), or white matter (WM). This helps avoid the need to rely traditional methods such as pixel counting and voxel-based morphometry, which are time-consuming and subject to inter-rater variability. -## How it works +## How It Works The Attention U-Net is an improved version of the traditional U-Net neural network. In line with its predecessor, Attention U-Net features an encoder-decoder architecture that uses convolution blocks to perform feature extraction, performs hierarchical downsampling and upsampling, and produces a pixelwise segmentation output, with the added benefit of attention mechanisms. In the context of the OASIS dataset, the encoder progressively downsamples the input MRI slices, extracting hierarchical features from low-level edges to high-level anatomical patterns. The decoder symmentrically upsamples these features back to the original resolution. But critically, before joining encoder features via the skip connections, attention gates are used to dynamically highlight regions with brain tissue while suppressing irrelevant background areas. This is particularly useful for brain segmentation where tissue boundaries can be subtle and structures vary in size. The network is trained using a combined Dice and Cross-Entropy loss function, which directly optimises the segmentation quality metric while maintaining stable gradients. It also employs data augmentation via horizontal flipping to improve generalization across the anatomical variability present across patients. @@ -19,3 +19,6 @@ Python 3.10.19 was used for this implementation. Packages used and their version | pillow | 12.0.0 | Results are highly reproducible for homogenous data, i.e. MRI scans with unique pixel values for brain regions. + +## Training data +The training process involved using preprocessed slices from 3D OASIS MRI volumes, which served as training targets and were used to measure the loss from the original input. From 032bb1060a6e2e8057ab6be2daa30695142bab25 Mon Sep 17 00:00:00 2001 From: Brian Date: Fri, 7 Nov 2025 10:45:41 +1000 Subject: [PATCH 10/18] Implement dice coefficient and update training loop Added dice coefficient calculation for evaluation and updated training epoch function. --- recognition/AttUNetOASIS/unet.py | 49 ++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/recognition/AttUNetOASIS/unet.py b/recognition/AttUNetOASIS/unet.py index 435fab3f7..b32e80c0b 100644 --- a/recognition/AttUNetOASIS/unet.py +++ b/recognition/AttUNetOASIS/unet.py @@ -260,3 +260,52 @@ def forward(self, pred, target): # Combine and return losses return ce + dice + +def dice_coefficient(pred, target, num_classes): + """Calculate Dice coefficient per class for evaluation""" + dice_scores = [] # Dice scores for each class + + # Convert model predictions to discrete class labels (argmax over channels) + pred = torch.argmax(pred, dim=1) + + for cls in range(num_classes): + # Compute binary masks for current class + pred_cls = (pred == cls).float() # prediction == class ? 1 : 0 + target_cls = (target == cls).float() # target == class ? 1 : 0 + + # Compute intersection and union for Dice + intersection = (pred_cls * target_cls).sum() + union = pred_cls.sum() + target_cls.sum() + + # Handle edge cases where union is 0 + if union == 0: + dice = 1.0 if intersection == 0 else 0.0 + else: + dice = (2.0 * intersection) / union + + dice_scores.append(dice.item()) # Convert tensor to float and store + + return dice_scores + +def train_epoch(model, loader, criterion, optimizer, device): + """Train for one epoch""" + model.train() + total_loss = 0 # Track batch losses + + for images, masks in tqdm(loader, desc="Training", leave=False): + # Move input and target images to device + images = images.to(device) + masks = masks.to(device) + + # Reset gradients and perform forward pass + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, masks) # Compute loss + + # Perform backward pass and update model params + loss.backward() + optimizer.step() + + total_loss += loss.item() # Accumulate loss for reporting + + return total_loss / len(loader) # Average loss over all batches From 703be50523fdf9a17dfaba5b58696a68b7c702dc Mon Sep 17 00:00:00 2001 From: Brian Date: Fri, 7 Nov 2025 10:51:42 +1000 Subject: [PATCH 11/18] Implement main function for training setup Added main function to initialize datasets, loaders, and model. --- recognition/AttUNetOASIS/unet.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/recognition/AttUNetOASIS/unet.py b/recognition/AttUNetOASIS/unet.py index b32e80c0b..bd815386e 100644 --- a/recognition/AttUNetOASIS/unet.py +++ b/recognition/AttUNetOASIS/unet.py @@ -309,3 +309,25 @@ def train_epoch(model, loader, criterion, optimizer, device): total_loss += loss.item() # Accumulate loss for reporting return total_loss / len(loader) # Average loss over all batches + +def main(): + # Create datasets + train_dataset = OASISDataset(TRAIN_IMG_DIR, TRAIN_SEG_DIR, augment=True) + val_dataset = OASISDataset(VAL_IMG_DIR, VAL_SEG_DIR, augment=False) + test_dataset = OASISDataset(TEST_IMG_DIR, TEST_SEG_DIR, augment=False) + + train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=1, pin_memory=True) + val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=1, pin_memory=True) + test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=1, pin_memory=True) + + # Print number of samples loaded + print(f"Train: {len(train_dataset)} samples") + print(f"Val: {len(val_dataset)} samples") + print(f"Test: {len(test_dataset)} samples") + + # Create model + model = AttentionUNet(in_channels=1, num_classes=NUM_CLASSES, init_features=64) + model = model.to(device) + +if __name__ == '__main__': + main() From 8b23edd41b34c24c3d9ac81d41c986260768d053 Mon Sep 17 00:00:00 2001 From: Brian Date: Fri, 7 Nov 2025 10:54:08 +1000 Subject: [PATCH 12/18] Update README to mention usage of AdamW. --- recognition/AttUNetOASIS/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recognition/AttUNetOASIS/README.md b/recognition/AttUNetOASIS/README.md index 29a9b358a..a25ca0606 100644 --- a/recognition/AttUNetOASIS/README.md +++ b/recognition/AttUNetOASIS/README.md @@ -6,7 +6,7 @@ This implementation uses an Attention U-Net architecture to perform semantic seg ## How It Works The Attention U-Net is an improved version of the traditional U-Net neural network. In line with its predecessor, Attention U-Net features an encoder-decoder architecture that uses convolution blocks to perform feature extraction, performs hierarchical downsampling and upsampling, and produces a pixelwise segmentation output, with the added benefit of attention mechanisms. In the context of the OASIS dataset, the encoder progressively downsamples the input MRI slices, extracting hierarchical features from low-level edges to high-level anatomical patterns. The decoder symmentrically upsamples these features back to the original resolution. But critically, before joining encoder features via the skip connections, attention gates are used to dynamically highlight regions with brain tissue while suppressing irrelevant background areas. This is particularly useful for brain segmentation where tissue boundaries can be subtle and structures vary in size. -The network is trained using a combined Dice and Cross-Entropy loss function, which directly optimises the segmentation quality metric while maintaining stable gradients. It also employs data augmentation via horizontal flipping to improve generalization across the anatomical variability present across patients. +The network is trained using a combined Dice and Cross-Entropy loss function, which directly optimises the segmentation quality metric while maintaining stable gradients. It also employs data augmentation via horizontal flipping to improve generalization across the anatomical variability present across patients. The AdamW optimiser is also used to dynamically adjust the learning rate with decoupled weight decay. ![Attention U‑Net architecture](https://www.researchgate.net/publication/347344899/figure/fig6/AS:971357475069952@1608601077414/The-architecture-of-Attention-U‑Net‑Attention‑gate‑selects‑features‑by‑using‑the.png) *Figure 1: Attention U-Net architecture. Source: Hwang et al., 2020. Licensed under CC BY-NC 4.0.* From b4b0e2759956e7e7fe285dfcd92bcdee1d3ee28e Mon Sep 17 00:00:00 2001 From: Brian Date: Fri, 7 Nov 2025 10:59:24 +1000 Subject: [PATCH 13/18] Define hyperparameters for training --- recognition/AttUNetOASIS/unet.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/recognition/AttUNetOASIS/unet.py b/recognition/AttUNetOASIS/unet.py index bd815386e..746fe7bae 100644 --- a/recognition/AttUNetOASIS/unet.py +++ b/recognition/AttUNetOASIS/unet.py @@ -23,6 +23,14 @@ # Expect 4 unique PNG values -> classes from OASIS dataset. LABEL_MAP = None +# Hyperparameters +NUM_CLASSES = 4 # Number of unique pixel values -> brain tissue classes in MRI slices +NUM_EPOCHS = 5 # Small for testing +LEARNING_RATE = 1.0e-4 # Standard for AdamW in segmentation tasks +BATCH_SIZE = 32 # Rangpur uses A100 +IMG_SIZE = None # Extracted from data at runtime +WEIGHT_DECAY = 1.0e-5 # Standard for AdamW + class OASISDataset(Dataset): """Custom dataset for OASIS brain MRI slices and segmentation masks""" From 301ebedbefc965f26c4f02e44955e9c1dff6081b Mon Sep 17 00:00:00 2001 From: Brian Date: Fri, 7 Nov 2025 11:12:22 +1000 Subject: [PATCH 14/18] Implement auto-detection of label mapping and classes Added a function to auto-detect label mapping, number of classes, and image size from segmentation masks. Updated NUM_CLASSES to be determined dynamically. --- recognition/AttUNetOASIS/unet.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/recognition/AttUNetOASIS/unet.py b/recognition/AttUNetOASIS/unet.py index 746fe7bae..16a75f142 100644 --- a/recognition/AttUNetOASIS/unet.py +++ b/recognition/AttUNetOASIS/unet.py @@ -24,13 +24,25 @@ LABEL_MAP = None # Hyperparameters -NUM_CLASSES = 4 # Number of unique pixel values -> brain tissue classes in MRI slices +NUM_CLASSES = None # Number of unique pixel values -> brain tissue classes in MRI slices NUM_EPOCHS = 5 # Small for testing LEARNING_RATE = 1.0e-4 # Standard for AdamW in segmentation tasks BATCH_SIZE = 32 # Rangpur uses A100 IMG_SIZE = None # Extracted from data at runtime WEIGHT_DECAY = 1.0e-5 # Standard for AdamW +def detect_label_mapping(seg_dir): + """Auto-detect label values, number of classes, and image size from first segmentation mask""" + seg_file = sorted([f for f in os.listdir(seg_dir) if f.endswith('.png')])[0] + seg = np.array(Image.open(os.path.join(seg_dir, seg_file))) + + unique_values = sorted(np.unique(seg).tolist()) + label_map = {val: idx for idx, val in enumerate(unique_values)} + num_classes = len(unique_values) + img_size = seg.shape[0] + + return label_map, num_classes, img_size + class OASISDataset(Dataset): """Custom dataset for OASIS brain MRI slices and segmentation masks""" @@ -319,6 +331,11 @@ def train_epoch(model, loader, criterion, optimizer, device): return total_loss / len(loader) # Average loss over all batches def main(): + global LABEL_MAP, NUM_CLASSES, IMG_SIZE + + # Auto-detect label mapping, number of classes, and image size + LABEL_MAP, NUM_CLASSES, IMG_SIZE = detect_label_mapping(TRAIN_SEG_DIR) + # Create datasets train_dataset = OASISDataset(TRAIN_IMG_DIR, TRAIN_SEG_DIR, augment=True) val_dataset = OASISDataset(VAL_IMG_DIR, VAL_SEG_DIR, augment=False) From d24d00aa6be319731ba70107af0864bb6d893059 Mon Sep 17 00:00:00 2001 From: Brian Date: Fri, 7 Nov 2025 11:19:43 +1000 Subject: [PATCH 15/18] Implement validation function for model evaluation --- recognition/AttUNetOASIS/unet.py | 38 ++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/recognition/AttUNetOASIS/unet.py b/recognition/AttUNetOASIS/unet.py index 16a75f142..b5f6d1b16 100644 --- a/recognition/AttUNetOASIS/unet.py +++ b/recognition/AttUNetOASIS/unet.py @@ -329,6 +329,38 @@ def train_epoch(model, loader, criterion, optimizer, device): total_loss += loss.item() # Accumulate loss for reporting return total_loss / len(loader) # Average loss over all batches + +def validate(model, loader, criterion, device, epoch, phase="Validation"): + """Validate on validation/test set""" + model.eval() + total_loss = 0 + dice_per_class = [[] for _ in range(NUM_CLASSES)] + num_batches = len(loader) + + with torch.no_grad(): + for i, (images, masks) in enumerate(loader): + images = images.to(device) + masks = masks.to(device) + + outputs = model(images) + loss = criterion(outputs, masks) + total_loss += loss.item() + + # Calculate Dice per class + dice_scores = dice_coefficient(outputs, masks, NUM_CLASSES) + for cls in range(NUM_CLASSES): + dice_per_class[cls].append(dice_scores[cls]) + + # Print progress every 20% of validation + if (i + 1) % max(1, num_batches // 5) == 0: + progress = (i + 1) / num_batches + print(f" Epoch {epoch} - {phase}: {progress:.1%} ({i+1}/{num_batches})") + + avg_loss = total_loss / len(loader) + avg_dice_per_class = [np.mean(scores) for scores in dice_per_class] + avg_dice = np.mean(avg_dice_per_class) + + return avg_loss, avg_dice, avg_dice_per_class def main(): global LABEL_MAP, NUM_CLASSES, IMG_SIZE @@ -354,5 +386,11 @@ def main(): model = AttentionUNet(in_channels=1, num_classes=NUM_CLASSES, init_features=64) model = model.to(device) + # Loss and optimizer + criterion = CombinedLoss() + optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY) + scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=10) + + if __name__ == '__main__': main() From 6fb3ff7a913e0021184be8576d22f3bca5f7c4b8 Mon Sep 17 00:00:00 2001 From: Brian Date: Fri, 7 Nov 2025 11:57:24 +1000 Subject: [PATCH 16/18] Update unet.py --- recognition/AttUNetOASIS/unet.py | 45 ++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/recognition/AttUNetOASIS/unet.py b/recognition/AttUNetOASIS/unet.py index b5f6d1b16..734d3374e 100644 --- a/recognition/AttUNetOASIS/unet.py +++ b/recognition/AttUNetOASIS/unet.py @@ -390,7 +390,52 @@ def main(): criterion = CombinedLoss() optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=10) + + # Training loop + best_dice = 0.0 + patience_counter = 0 + + start_time = time.time() + for epoch in range(NUM_EPOCHS): + train_loss = train_epoch(model, train_loader, criterion, optimizer, device) + val_loss, val_dice, val_dice_per_class = validate(model, val_loader, criterion, device) + + print(f"Epoch {epoch+1}: Train Loss={train_loss:.4f}, Val Loss={val_loss:.4f}, Val Dice={val_dice:.4f}") + + scheduler.step(val_dice) + + if val_dice > best_dice: + best_dice = val_dice + torch.save({'model_state_dict': model.state_dict(), 'best_dice': best_dice}, 'best_attention_unet.pth') + print(f"Saved (Dice: {best_dice:.4f})") + patience_counter = 0 + else: + patience_counter += 1 + + if patience_counter >= 30: + print(f"Early stopping at epoch {epoch+1}") + break + + elapsed = time.time() - start_time + print(f"Training done in {(time.time()-start_time)/60:.2f} min. Best Dice: {best_dice:.4f}") + + # Test evaluation + print("\nTesting...") + checkpoint = torch.load('best_attention_unet.pth', weights_only=False) + model.load_state_dict(checkpoint['model_state_dict']) + + test_loss, test_dice, test_dice_per_class = validate(model, test_loader, criterion, device) + + class_names = ['Background', 'CSF', 'Gray Matter', 'White Matter'] + print(f"\nTest Loss: {test_loss:.4f}, Test Dice: {test_dice:.4f}") + for i, (name, dice) in enumerate(zip(class_names, test_dice_per_class)): + print(f" {name}: {dice:.4f}") + + min_dice = min(test_dice_per_class[1:]) + print( + f"\nMin Dice (no bg): {min_dice:.4f} {'Dice threshold of 0.9 met' if min_dice >= 0.9 else 'Dice threshold of 0.9 not met'}" + ) if __name__ == '__main__': main() From 150e5cc83c4e5b43f467fb69fff042365093c562 Mon Sep 17 00:00:00 2001 From: Brian Date: Fri, 7 Nov 2025 11:59:18 +1000 Subject: [PATCH 17/18] Add SLURM script for running UNET model --- recognition/AttUNetOASIS/unetrunner | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 recognition/AttUNetOASIS/unetrunner diff --git a/recognition/AttUNetOASIS/unetrunner b/recognition/AttUNetOASIS/unetrunner new file mode 100644 index 000000000..79df21f03 --- /dev/null +++ b/recognition/AttUNetOASIS/unetrunner @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH --time=00:20:00 +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 +#SBATCH --gres=gpu:1 +#SBATCH --job-name=unet +#SBATCH -o unet.out + +module load cuda/11.4 + +source ~/miniconda3/etc/profile.d/conda.sh +conda activate unet + +python unet.py From bf8aa9cc0e99d6ef646f5ceb17cd2a28def95ba9 Mon Sep 17 00:00:00 2001 From: Brian Date: Sat, 8 Nov 2025 21:00:51 +1000 Subject: [PATCH 18/18] Add student ID, hopefully its not too late --- recognition/AttUNetOASIS/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/recognition/AttUNetOASIS/README.md b/recognition/AttUNetOASIS/README.md index a25ca0606..0119955d7 100644 --- a/recognition/AttUNetOASIS/README.md +++ b/recognition/AttUNetOASIS/README.md @@ -22,3 +22,5 @@ Results are highly reproducible for homogenous data, i.e. MRI scans with unique ## Training data The training process involved using preprocessed slices from 3D OASIS MRI volumes, which served as training targets and were used to measure the loss from the original input. + +**SID** 48915768