Skip to content

Preprocess Data #12

@hoai-thuong

Description

@hoai-thuong

To reproduce the preprocess of BTCV data, I follow your paper that had mentioned that " re-sample all the CT scans to the voxel spacing [1.5×1.5×2.0]mm3 and normalize them to have zero mean and unit variance".

I make this code bellow and got the shape of image like your preprocessed data (example 0001.h5).

But when i draw the histogram of mine and yours, it's different

I don't know if your team make sth else like clip,remove,....

I hope you will reply me or just suggest me some idea. Thank you so much

image

This is my code:

import os
import nibabel as nib
import numpy as np
import h5py
from scipy.ndimage import zoom

data_dir = "data"
images_dir = os.path.join(data_dir, "imagesTr")
labels_dir = os.path.join(data_dir, "labelsTr")

target_spacing = [1.5, 1.5, 2.0]

def load_nifti_file(filepath):
"""Load a NIfTI file and return the image data and header info."""
nifti_img = nib.load(filepath)
data = nifti_img.get_fdata()
spacing = nifti_img.header.get_zooms()
return data, spacing

def resample_volume(volume, original_spacing, target_spacing):

resize_factor = [o / t for o, t in zip(original_spacing, target_spacing)]
new_shape = [int(s * r) for s, r in zip(volume.shape, resize_factor)]
resampled_volume = zoom(volume, resize_factor, order=3)  # Sử dụng nội suy bậc 1
return resampled_volume

def normalize_volume(volume):

mean = np.mean(volume)
std = np.std(volume)
normalized_volume = (volume - mean) / std
return normalized_volume

def preprocess_and_save(image_path, label_path, output_path):

image, image_spacing = load_nifti_file(image_path)
label, label_spacing = load_nifti_file(label_path)


resampled_image = resample_volume(image, image_spacing, target_spacing)
resampled_label = resample_volume(label, label_spacing, target_spacing)


normalized_image = normalize_volume(resampled_image)

# Save as .h5
with h5py.File(output_path, "w") as f:
    f.create_dataset("image", data=normalized_image, compression="gzip")
    f.create_dataset("label", data=resampled_label, compression="gzip")

sample_image = os.path.join(images_dir, "img0001.nii.gz")
sample_label = os.path.join(labels_dir, "label0001.nii.gz")
output_file = "0001.h5"

preprocess_and_save(sample_image, sample_label, output_file)

with h5py.File(output_file, "r") as f:
image = f["image"][:]
label = f["label"][:]
print("Image shape:", image.shape)
print("Label shape:", label.shape)
print("Image mean:", np.mean(image))
print("Image std:", np.std(image))

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions