-
Notifications
You must be signed in to change notification settings - Fork 23
Open
Labels
bugSomething isn't workingSomething isn't working
Description
Describe the bug
When running the data jupyter notebook from examples as a python script, I get images with different crop sizes (about half being 16 x 80 x 80, the other half being 24 x 80 x 80). This results in an error during torch.stack when attempting training. Also, this seems different from the crop sizes in the paper (https://www.sciencedirect.com/science/article/pii/S1361841522001700), which lists 32 * 136 * 136 for this dataset. Can you comment on what might be going wrong?
My apologies if I missed something.
To Reproduce
from tqdm import tqdm
from glob import glob
import tifffile
import numpy as np
import os
from EmbedSeg.utils.preprocess_data import extract_data, split_train_val, split_train_test, get_data_properties
from EmbedSeg.utils.generate_crops import *
from EmbedSeg.utils.visualize import visualize_crop_3d
import json
from matplotlib.colors import ListedColormap
data_dir = '../../../data'
project_name = 'Platynereis-Nuclei-CBG'
extract_data(
zip_url = 'https://github.com/juglab/EmbedSeg/releases/download/v0.1.0/Platynereis-Nuclei-CBG.zip',
data_dir = data_dir,
project_name = project_name,
)
split_train_test(
data_dir = data_dir,
project_name = project_name,
train_test_name = 'train',
subset = 2,
by_fraction = False,
seed = 0)
split_train_val(
data_dir = data_dir,
project_name = project_name,
train_val_name = 'train',
subset = 2,
by_fraction = False,
seed = 0)
data_properties_dir = get_data_properties(data_dir, project_name, train_val_name=['train', 'val'],
test_name=['test'], mode='3d')
data_properties_dir['data_type']='16-bit'
data_properties_dir['pixel_size_x_microns']=0.406 # set equal to voxel size (microns) in x dimension
data_properties_dir['pixel_size_y_microns']=0.406 # set equal to voxel size (microns) in y dimension
data_properties_dir['pixel_size_z_microns']=2.031 # set equal to voxel size (microns) in z dimension
with open('data_properties.json', 'w') as outfile:
json.dump(data_properties_dir, outfile)
print("Dataset properies of the `{}` dataset is saved to `data_properties.json`".format(project_name))
center = 'medoid' # 'medoid', 'centroid'
try:
assert center in {'medoid', 'centroid'}
print("Spatial Embedding Location chosen as : {}".format(center))
except AssertionError as e:
e.args += ('Please specify center as one of : {"medoid", "centroid"}', 42)
raise
n_sigma = 5
def round_up_8(x):
return (x.astype(int)+7) & (-8)
crops_dir = './crops'
data_subsets = ['train', 'val']
crop_size_z = round_up_8(data_properties_dir['avg_object_size_z'] + n_sigma*data_properties_dir['stdev_object_size_z'])
crop_size_y = np.maximum(round_up_8(data_properties_dir['avg_object_size_y'] + n_sigma*data_properties_dir['stdev_object_size_y']),
round_up_8(data_properties_dir['avg_object_size_x'] + n_sigma*data_properties_dir['stdev_object_size_x']))
crop_size_x = crop_size_y
print("Crop size in x and y will be set equal to {}. Crop size in z is set equal to {}".format(crop_size_x, crop_size_z))
anisotropy_factor = data_properties_dir['pixel_size_z_microns']/data_properties_dir['pixel_size_x_microns']
speed_up = 2
norm = 'min-max-percentile'
try:
assert norm in {'min-max-percentile', 'mean-std', 'absolute'}
print("Normalization chosen as : {}".format(norm))
except AssertionError as e:
e.args += ('Please specify norm as one of : {"min-max-percentile", "mean-std", "absolute"}', 42)
raise
for data_subset in data_subsets:
image_dir = os.path.join(data_dir, project_name, data_subset, 'images')
instance_dir = os.path.join(data_dir, project_name, data_subset, 'masks')
image_names = sorted(glob(os.path.join(image_dir, '*.tif')))
instance_names = sorted(glob(os.path.join(instance_dir, '*.tif')))
for i in tqdm(np.arange(len(image_names))):
process_3d(image_names[i], instance_names[i], os.path.join(crops_dir, project_name), data_subset,
crop_size_x = crop_size_x, crop_size_y = crop_size_y, crop_size_z = crop_size_z,
center = center, anisotropy_factor = anisotropy_factor, speed_up = speed_up,
norm=norm, data_type = data_properties_dir['data_type'])
print("Cropping of images, instances and centre_images for data_subset = `{}` done!".format(data_subset))
normalization = {}
normalization['data_type']=data_properties_dir['data_type']
normalization['norm']=norm
with open('normalization.json', 'w') as outfile:
json.dump(normalization, outfile)
print("Normalization properties of the `{}` dataset is saved to `normalization.json`".format(project_name))
Expected behavior
Equal crop sizes for the whole dataset, as well as crop sizes matching the dimensions mentioned in the paper.
Desktop (please complete the following information):
- OS: SUSE Linux Enterprise Server 15 SP6
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working