Skip to content

[BUG] Inconsistencies between image crop sizes for dataset Platy CBG #36

@postnubilaphoebus

Description

@postnubilaphoebus

Describe the bug
When running the data jupyter notebook from examples as a python script, I get images with different crop sizes (about half being 16 x 80 x 80, the other half being 24 x 80 x 80). This results in an error during torch.stack when attempting training. Also, this seems different from the crop sizes in the paper (https://www.sciencedirect.com/science/article/pii/S1361841522001700), which lists 32 * 136 * 136 for this dataset. Can you comment on what might be going wrong?
My apologies if I missed something.

To Reproduce

from tqdm import tqdm
from glob import glob
import tifffile
import numpy as np
import os
from EmbedSeg.utils.preprocess_data import extract_data, split_train_val, split_train_test, get_data_properties
from EmbedSeg.utils.generate_crops import *
from EmbedSeg.utils.visualize import visualize_crop_3d
import json
from matplotlib.colors import ListedColormap

data_dir = '../../../data'
project_name = 'Platynereis-Nuclei-CBG'

extract_data(
    zip_url = 'https://github.com/juglab/EmbedSeg/releases/download/v0.1.0/Platynereis-Nuclei-CBG.zip',
    data_dir = data_dir,
    project_name = project_name,
)

split_train_test(
    data_dir = data_dir,
    project_name = project_name, 
    train_test_name = 'train',
    subset = 2, 
    by_fraction = False,
    seed = 0)

split_train_val(
    data_dir = data_dir,
    project_name = project_name, 
    train_val_name = 'train',
    subset = 2,
    by_fraction = False,
    seed = 0)

data_properties_dir = get_data_properties(data_dir, project_name, train_val_name=['train', 'val'], 
                                          test_name=['test'], mode='3d')

data_properties_dir['data_type']='16-bit'
data_properties_dir['pixel_size_x_microns']=0.406 # set equal to voxel size (microns) in x dimension
data_properties_dir['pixel_size_y_microns']=0.406 # set equal to voxel size (microns) in y dimension
data_properties_dir['pixel_size_z_microns']=2.031 # set equal to voxel size (microns) in z dimension

with open('data_properties.json', 'w') as outfile:
    json.dump(data_properties_dir, outfile)
    print("Dataset properies of the `{}` dataset is saved to `data_properties.json`".format(project_name))

center = 'medoid' # 'medoid', 'centroid'
try:
    assert center in {'medoid', 'centroid'}
    print("Spatial Embedding Location chosen as : {}".format(center))
except AssertionError as e:
    e.args += ('Please specify center as one of : {"medoid", "centroid"}', 42)
    raise

n_sigma = 5

def round_up_8(x):
    return (x.astype(int)+7) & (-8)

crops_dir = './crops'
data_subsets = ['train', 'val'] 
crop_size_z = round_up_8(data_properties_dir['avg_object_size_z'] + n_sigma*data_properties_dir['stdev_object_size_z'])
crop_size_y = np.maximum(round_up_8(data_properties_dir['avg_object_size_y'] + n_sigma*data_properties_dir['stdev_object_size_y']),
round_up_8(data_properties_dir['avg_object_size_x'] + n_sigma*data_properties_dir['stdev_object_size_x']))
crop_size_x = crop_size_y
print("Crop size in x and y will be set equal to {}. Crop size in z is set equal to {}".format(crop_size_x, crop_size_z))

anisotropy_factor = data_properties_dir['pixel_size_z_microns']/data_properties_dir['pixel_size_x_microns']
speed_up = 2

norm = 'min-max-percentile'
try:
    assert norm in {'min-max-percentile', 'mean-std', 'absolute'}
    print("Normalization chosen as : {}".format(norm))
except AssertionError as e:
    e.args += ('Please specify norm as one of : {"min-max-percentile", "mean-std", "absolute"}', 42)
    raise

for data_subset in data_subsets:
    image_dir = os.path.join(data_dir, project_name, data_subset, 'images')
    instance_dir = os.path.join(data_dir, project_name, data_subset, 'masks')
    image_names = sorted(glob(os.path.join(image_dir, '*.tif'))) 
    instance_names = sorted(glob(os.path.join(instance_dir, '*.tif')))  
    for i in tqdm(np.arange(len(image_names))):
        process_3d(image_names[i], instance_names[i], os.path.join(crops_dir, project_name), data_subset, 
                crop_size_x = crop_size_x, crop_size_y = crop_size_y, crop_size_z = crop_size_z,
                center = center, anisotropy_factor = anisotropy_factor, speed_up = speed_up, 
                norm=norm, data_type = data_properties_dir['data_type'])
    print("Cropping of images, instances and centre_images for data_subset = `{}` done!".format(data_subset))

normalization = {}
normalization['data_type']=data_properties_dir['data_type']
normalization['norm']=norm
with open('normalization.json', 'w') as outfile:
    json.dump(normalization, outfile)
    print("Normalization properties of the `{}` dataset is saved to `normalization.json`".format(project_name))

Expected behavior
Equal crop sizes for the whole dataset, as well as crop sizes matching the dimensions mentioned in the paper.

Desktop (please complete the following information):

  • OS: SUSE Linux Enterprise Server 15 SP6

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions