From c64fbffec3e335233737915d8a12c82f9b375136 Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Wed, 19 Jun 2024 16:08:03 +0000 Subject: [PATCH 01/56] Updates to gitignore --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index 25cf9a4..d459a02 100644 --- a/.gitignore +++ b/.gitignore @@ -156,3 +156,7 @@ Thumbs.db # Common editor files *~ *.swp + + +# IDE specific files +.vscode/ \ No newline at end of file From 2f4d58fb20d25fdd5d50e684a7fd3009c88350fd Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Wed, 19 Jun 2024 16:08:41 +0000 Subject: [PATCH 02/56] basic setup config for pip install --- setup.cfg | 36 ++++++++++++++++++++++++++++++++++++ setup.py | 3 +++ 2 files changed, 39 insertions(+) create mode 100644 setup.cfg create mode 100644 setup.py diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..a9f9cf7 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,36 @@ +# Setup configuration for the package +[metadata] +name = caked + + +# Options for the package + +[options] + +packages = find: +python_requires = >=3.8 + + + + + +[options.packages.find] +where = src +exclude = + tests + .github + .gitignore + .gitattributes + .pytest_cache + .git + .vscode + .history + *.egg + *.egg-info + docs + site + mkdocs.yml + *.ipynb + .mypy_cache + .ruff_cache + \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..6068493 --- /dev/null +++ b/setup.py @@ -0,0 +1,3 @@ +from setuptools import setup + +setup() From 6afd99e9157ff57c2a9a87e21510fc7006aab698 Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Wed, 19 Jun 2024 16:14:07 +0000 Subject: [PATCH 03/56] Added MapDataset and MapDataLoader --- src/caked/dataloader.py | 421 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 420 insertions(+), 1 deletion(-) diff --git a/src/caked/dataloader.py b/src/caked/dataloader.py index 95ab944..4f35a17 100644 --- a/src/caked/dataloader.py +++ b/src/caked/dataloader.py @@ -14,10 +14,14 @@ import mrcfile import numpy as np import torch +from ccpem_utils.map.parse_mrcmapobj import get_mapobjhandle from scipy.ndimage import zoom -from torch.utils.data import DataLoader, Subset +from torch.utils.data import ConcatDataset, DataLoader, Subset from torchvision import transforms +from caked.Transforms.augments import ComposeAugment +from caked.Transforms.transforms import ComposeTransform, DecomposeToSlices, Transforms + from .base import AbstractDataLoader, AbstractDataset np.random.seed(42) @@ -246,6 +250,255 @@ def get_loader( ) +class MapDataLoader(AbstractDataLoader): + def __init__( + self, + dataset_size: int | None = None, + save_to_disk: bool = False, + training: bool = True, + classes: list[str] | None = None, + pipeline: str = "disk", + transformations: list[str] | None = None, + augmentations: list[str] | None = None, + ) -> None: + """ + DataLoader implementation for loading map data from disk. + """ + self.dataset_size = dataset_size + self.save_to_disk = save_to_disk + self.training = training + self.pipeline = pipeline + self.transformations = transformations + self.augmentations = augmentations + self.debug = False + self.classes = classes + + if self.classes is None: + self.classes = [] + if self.transformations is None: + self.transformations = [] + if self.augmentations is None: + self.augmentations = [] + + def __add__(self, other): + if not isinstance(other, MapDataLoader): + msg = "Can only add two MapDataLoader objects together." + raise TypeError(msg) + if self.pipeline != other.pipeline: + msg = "Both MapDataLoader objects must use the same pipeline." + raise ValueError(msg) + if self.transformations != other.transformations: + msg = "Both MapDataLoader objects must use the same transformations." + raise ValueError(msg) + if self.augmentations != other.augmentations: + msg = "Both MapDataLoader objects must use the same augmentations." + raise ValueError(msg) + if self.classes != other.classes: + msg = "Both MapDataLoader objects must use the same classes." + raise ValueError(msg) + if self.dataset_size != other.dataset_size: + msg = "Both MapDataLoader objects must use the same dataset size." + raise ValueError(msg) + if self.save_to_disk != other.save_to_disk: + msg = "Both MapDataLoader objects must use the same save to disk option." + raise ValueError(msg) + if self.training != other.training: + msg = "Both MapDataLoader objects must use the same training option." + raise ValueError(msg) + + new_loader = MapDataLoader( + dataset_size=self.dataset_size, + save_to_disk=self.save_to_disk, + training=self.training, + classes=self.classes, + pipeline=self.pipeline, + transformations=self.transformations, + augmentations=self.augmentations, + ) + new_loader.dataset = ConcatDataset([self.dataset, other.dataset]) + return new_loader + + def load(self, datapath, datatype, label_path=None, weight_path=None) -> None: + """ + Load the data from the specified path and data type. + + Args: + datapath (str): The path to the directory containing the data. + datatype (str): The type of data to load. + + Returns: + None + """ + paths = list(Path(datapath).rglob(f"*.{datatype}")) + label_paths = ( + list(Path(label_path).rglob(f"*.{datatype}")) + if label_path is not None + else None + ) + weight_paths = ( + list(Path(weight_path).rglob(f"*.{datatype}")) + if weight_path is not None + else None + ) + + if not self.debug: + random.shuffle(paths) + + # ids right now depend on the data being saved with a certain format (id in the first part of the name, separated by _) + # TODO: make this more general/document in the README + ids = np.unique([file.name.split("_")[0] for file in paths]) + if len(self.classes) == 0: + self.classes = ids + else: + class_check = np.in1d(self.classes, ids) + if not np.all(class_check): + msg = "Not all classes in the list are present in the directory. Missing classes: {}".format( + np.asarray(self.classes)[~class_check] + ) + raise RuntimeError(msg) + class_check = np.in1d(ids, self.classes) + if not np.all(class_check): + logging.basicConfig(format="%(message)s", level=logging.INFO) + logging.info( + "Not all classes in the directory are present in the " + "classes list. Missing classes: %s. They will be ignored.", + (np.asarray(ids)[~class_check]), + ) + + paths = [ + Path(datapath) / p.name + for p in paths + for c in self.classes + if c in p.name.split("_")[0] + ] + label_paths = ( + [ + Path(label_path) / p.name + for p in label_paths + for c in self.classes + if c in p.name.split("_")[0] + ] + if label_path is not None + else None + ) + weight_paths = ( + [ + Path(weight_path) / p.name + for p in weight_paths + for c in self.classes + if c in p.name.split("_")[0] + ] + if weight_path is not None + else None + ) + if self.dataset_size is not None: + paths = paths[: self.dataset_size] + + if label_paths is not None and len(label_paths) != len(paths): + msg = "Label paths and data paths do not match." + raise RuntimeError(msg) + if weight_paths is not None and len(weight_paths) != len(paths): + msg = "Weight paths and data paths do not match." + raise RuntimeError(msg) + label_paths = label_paths if label_paths is not None else [None] * len(paths) + weight_paths = weight_paths if weight_paths is not None else [None] * len(paths) + self.dataset = ConcatDataset( + [ + MapDataset( + path=path, + label_path=label_path, + weight_path=weight_path, + transforms=self.transformations, + augments=self.augmentations, + ) + for path, label_path, weight_path in zip( + paths, label_paths, weight_paths + ) + ] + ) + + def process(self, paths: list[str], datatype: str): + """ + Process the loaded data with the specified transformations. + + Args: + paths (list[str]): List of file paths to the data. + datatype (str): Type of data being processed. + + Returns: + DiskDataset: Processed dataset object. + + Raises: + RuntimeError: If no transformations were provided. + """ + + raise NotImplementedError + + def get_loader( + self, + batch_size: int, + split_size: float | None = None, + no_val_drop: bool = False, + ): + """ + Retrieve the data loader. + + Args: + batch_size (int): The batch size for the data loader. + split_size (float | None, optional): The percentage of data to be used for validation set. + If None, the entire dataset will be used for training. Defaults to None. + no_val_drop (bool, optional): If True, the last batch of validation data will not be dropped if it is smaller than batch size. Defaults to False. + + Returns: + DataLoader or Tuple[DataLoader, DataLoader]: The data loader(s) for testing or training/validation, according to whether training is True or False. + + Raises: + RuntimeError: If split_size is None and the method is called for training. + RuntimeError: If the train and validation sets are smaller than 2 samples. + + """ + if self.training: + if split_size is None: + msg = "Split size must be provided for training. " + raise RuntimeError(msg) + # split into train / val sets + idx = np.random.permutation(len(self.dataset)) + if split_size < 1: + split_size = split_size * 100 + + s = int(np.ceil(len(self.dataset) * int(split_size) / 100)) + if s < 2: + msg = "Train and validation sets must be larger than 1 sample, train: {}, val: {}.".format( + len(idx[:-s]), len(idx[-s:]) + ) + raise RuntimeError(msg) + train_data = Subset(self.dataset, indices=idx[:-s]) + val_data = Subset(self.dataset, indices=idx[-s:]) + + loader_train = DataLoader( + train_data, + batch_size=batch_size, + num_workers=0, + shuffle=True, + drop_last=True, + ) + loader_val = DataLoader( + val_data, + batch_size=batch_size, + num_workers=0, + shuffle=True, + drop_last=(not no_val_drop), + ) + return loader_train, loader_val + + return DataLoader( + self.dataset, + batch_size=batch_size, + num_workers=0, + shuffle=True, + ) + + class DiskDataset(AbstractDataset): """ A dataset class for loading data from disk. @@ -361,3 +614,169 @@ def transformation(self, x): def augment(self, augment): raise NotImplementedError + + +class MapDataset(AbstractDataset): + """ + A dataset class for loading map data, alongside the corresponding class labels and weights. + The map data is loaded from the disk and is decomposed into a set of tiles. These tiles are + then reuturned when indexing the dataset. + + Args: + + Note: I'm not sure if shuffling will be used but the method I'm currently using will lazily + load the data from disk so the map file will be loadeded, transformed and then the tile + will be extracted. It might be good to include a cache option to store map data in memory. + This could be useful to reduce the number of times the map data is loaded from disk... + Perhaps saving them as hdf5 files would be a good idea? + """ + + def __init__( + self, + path: str | Path, + label_path: str | Path | None = None, + weight_path: str | Path | None = None, + transforms: list[str] | None = None, + augments: list[str] | None = None, + decompose_kwargs: dict[str, int] | None = None, + ) -> None: + self.path = Path(path) + self.label_path = Path(label_path) if label_path is not None else None + self.weight_path = Path(weight_path) if weight_path is not None else None + self.mapobj = None + self.label_mapobj = None + self.weight_mapobj = None + self.slices = None + self.tiles = None + self.tiles_count: int = 0 + self.transforms = transforms + self.augments = augments + if decompose_kwargs is None: + decompose_kwargs = {"cshape": 64, "margin": 8} + + def __len__(self): + # TODO: The tile counts need to be calculated before __getitem__ is called + # The amount of tiles is linked to the transformations applied to the map data + # This would mean the best place to calculate the tile count would be in the __init__ + # method and subsequently the transform method would need to be called there too + return self.tiles_count + + def __getitem__( + self, idx + ) -> tuple[torch.Tensor, torch.Tensor | None, torch.Tensor | None]: + # start by loading the map data + self.load_map_objects() + + transforms_keywords = self.transform() + _ = self.augment() + # SEND TO HDF5 FILE to be saved, some will be duplicates so need to keep track of the duplicates + + if (self.slices is None) or (self.tiles is None): + decompose = DecomposeToSlices( + self.mapobj, + step=transforms_keywords.get("step"), + cshape=transforms_keywords.get("cshape"), + margin=transforms_keywords.get("margin"), + ) # TODO: move this + self.slices = decompose.slices + self.tiles = decompose.tiles + self.tiles_count = len(self.tiles) + + map_slice = self.mapobj.data[self.slices[idx]] + label_slice = ( + self.label_mapobj.data[self.slices[idx]] + if self.label_mapobj is not None + else None + ) + weight_slice = ( + self.weight_mapobj.data[self.slices[idx]] + if self.weight_mapobj is not None + else None + ) + + # Close the map objects + self.close_map_objects() + + return ( + torch.tensor(map_slice), + torch.tensor(label_slice) if label_slice is not None else None, + torch.tensor(weight_slice) if weight_slice is not None else None, + ) + + def _transform_keywords_builder(self): + keywords = {} + keywords.update(self.decompose_kwargs) + keywords["step"] = self.decompose_kwargs.get( + "step", (keywords.get("cshape") - (2 * keywords.get("margin"))) + ) + for transform in self.transforms: + if transform == Transforms.MASKCROP.value: + keywords["mask"] = self.label_mapobj + if transform == Transforms.NORM.value: + keywords["ext_dim"] = (0, 0, 0) + keywords["fill_padding"] = (0, 0, 0) + if transform == Transforms.VOXNORM.value: + keywords["vox"] = self.decompose_kwargs.get("vox", 1.0) + keywords["vox_lim"] = self.decompose_kwargs.get("vox_lim", (0.95, 1.05)) + + return keywords + + def _augment_keywords_builder(self): + keywords = {} + for augment in self.augments: + if augment.__class__.__name__ == "RandomRotationAugment": + keywords["ax"] = self.ax + keywords["an"] = self.an + + return keywords + + def load_map_objects( + self, + ) -> None: + self.mapobj = get_mapobjhandle(self.path) + if self.label_path is not None: + if not self.label_path.exists(): + msg = f"Label file {self.label_path} not found." + raise FileNotFoundError(msg) + self.label_mapobj = get_mapobjhandle(self.label_path) + if self.weight_path is not None: + if not self.weight_path.exists(): + msg = f"Weight file {self.weight_path} not found." + raise FileNotFoundError(msg) + self.weight_mapobj = get_mapobjhandle(self.weight_path) + + def close_map_objects(self, *args): + for arg in args: + if arg is not None: + arg.close() + + def augment(self) -> None: + augment_kwargs = self._augment_keywords_builder() + augment_kwargs["retall"] = True + if len(self.augments) == 0: + return {} + + self.mapobj, extra_kwargs = ComposeAugment(self.augments)( + self.mapobj, **augment_kwargs + ) + augment_kwargs["retall"] = False + augment_kwargs.update( + extra_kwargs + ) # update the kwargs with the returned values + + self.label_mapobj = ComposeAugment(self.augments)( + self.label_mapobj, **augment_kwargs + ) + self.weight_mapobj = ComposeAugment(self.augments)( + self.weight_mapobj, **augment_kwargs + ) + + return augment_kwargs + + def transform(self) -> dict: + # TODO: Need to see if same transforms are applied to all map objects, maybe just voxel space normalisation + transform_kwargs = self._transform_keywords_builder() + if len(self.transforms) == 0: + return transform_kwargs + + return ComposeTransform(self.transforms)(self.mapobj, **transform_kwargs) From ef73dd26e4033d836f1216bc3ca270ce5d027027 Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Wed, 19 Jun 2024 16:14:29 +0000 Subject: [PATCH 04/56] Added custom augments and transfroms as well as tiling --- src/caked/Transforms/augments.py | 123 ++++++++++++++++++++ src/caked/Transforms/base.py | 58 ++++++++++ src/caked/Transforms/transforms.py | 178 +++++++++++++++++++++++++++++ 3 files changed, 359 insertions(+) create mode 100644 src/caked/Transforms/augments.py create mode 100644 src/caked/Transforms/base.py create mode 100644 src/caked/Transforms/transforms.py diff --git a/src/caked/Transforms/augments.py b/src/caked/Transforms/augments.py new file mode 100644 index 0000000..c241e73 --- /dev/null +++ b/src/caked/Transforms/augments.py @@ -0,0 +1,123 @@ +from __future__ import annotations + +import random +from typing import Union + +import numpy as np +from ccpem_utils.map.array_utils import rotate_array + +from .base import AugmentBase +from enum import Enum +from ccpem_utils.map.parse_mrcmapobj import MapObjHandle + + +class Augments(Enum): + """ """ + + RANDOMROT = "random" + ROT90 = "rot90" + + +def get_augment(augment: str, random_seed) -> AugmentBase: + """ """ + + if augment == Augments.RANDOMROT.value: + return RandomRotationAugment(random_seed=random_seed) + elif augment == Augments.ROT90.value: + return Rotation90Augment(random_seed=random_seed) + else: + msg = f"Unknown Augmentation: {augment}" + raise ValueError(msg) + + +class ComposeAugment: + """ + Compose multiple Augments together. + + :param augments: (list) list of augments to compose + + :return: (MapObjHandle) transformed MapObjHandle + """ + + def __init__(self, augments: list[str], random_seed: int = 42): + self.random_seed = random_seed + self.augments = augments + + def __call__(self, mapobj: MapObjHandle, **kwargs) -> MapObjHandle: + for augment in self.augments: + mapobj = get_augment(augment, random_seed=self.random_seed)( + mapobj, **kwargs + ) + + +class RandomRotationAugment(AugmentBase): + """ + Random or controlled rotation (if ax and an kwargs provided). + + :param data: (np.ndarray) 3d volume + :param return_all: (bool) if True, will parameters of the rotation (ax, an) + :param interp: (bool) if True, will interpolate the rotation + :param ax: (int) 0 for yaw, 1 for pitch, 2 for roll + :param an: (int) number of times to rotate, between <1 and 3> + + :return: (np.ndarray) rotated volume or (np.ndarray, int, int) rotated volume and rotation parameters + """ + + def __init__(self, random_seed: int = 42): + super().__init__(random_seed) + + def __call__( + self, + data: np.ndarray, + **kwargs, + ) -> np.ndarray | tuple[np.ndarray, int, int]: + ax = kwargs.get("ax", None) + an = kwargs.get("an", None) + interp = kwargs.get("interp", True) + return_all = kwargs.get("return_all", False) + + if (ax is not None and an is None) or (ax is None and an is not None): + msg = "When specifying rotation, please use both arguments to specify the axis and angle." + raise RuntimeError(msg) + rotations = [(0, 1), (0, 2), (1, 2)] # yaw, pitch, roll + if ax is None and an is None: + axes = random.randint(0, 2) + set_angles = [30, 60, 90] + angler = random.randint(0, 2) + angle = set_angles[angler] + else: + axes = ax + angle = an + + r = rotations[axes] + + data = rotate_array(data, angle, axes=r, interpolate=interp, reshape=False) + + if return_all: + return data, {"ax": axes, "an": angle} + + return data + + +class Rotation90Augment(AugmentBase): + """ + Rotate the volume by 90 degrees. + + :param data: (np.ndarray) 3d volume + :param return_all: (bool) if True, will parameters of the rotation (ax, an) + :param interp: (bool) if True, will interpolate the rotation + :param ax: (int) 0 for yaw, 1 for pitch, 2 for roll + :param an: (int) number of times to rotate, between <1 and 3> + + :return: (np.ndarray) rotated volume or (np.ndarray, int, int) rotated volume and rotation parameters + """ + + def __init__(self, random_seed: int = 42): + super().__init__(random_seed) + + def __call__( + self, + data: np.ndarray, + **kwargs, + ) -> np.ndarray: + raise NotImplementedError("Rotation90Augment not implemented yet.") diff --git a/src/caked/Transforms/base.py b/src/caked/Transforms/base.py new file mode 100644 index 0000000..fe84760 --- /dev/null +++ b/src/caked/Transforms/base.py @@ -0,0 +1,58 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod + +import numpy as np +from ccpem_utils.map.parse_mrcmapobj import MapObjHandle + + +class TransformBase(ABC): + """ + Base class for transformations. + + """ + + @abstractmethod + def __init__(self): + pass + + @abstractmethod + def __call__(self, data): + msg = "The __call__ method must be implemented in the subclass" + raise NotImplementedError(msg) + + +class AugmentBase(ABC): + """ + Base class for augmentations. + """ + + # This will need to take the hyper parameters for the augmentations + + @abstractmethod + def __init__(self, random_seed: int = 42): + self.random_state = np.random.RandomState(random_seed) + + @abstractmethod + def __call__(self, data, **kwargs): + msg = "The __call__ method must be implemented in the subclass" + raise NotImplementedError(msg) + + +class MapObjTransformBase(TransformBase): + """ + Base class for transformations that operate on MapObjHandle objects. + + """ + + @abstractmethod + def __init__(self): + super().__init__() + + def __call__(self, mapobj: MapObjHandle, **kwargs) -> MapObjHandle: + if not isinstance(mapobj, MapObjHandle): + msg = "mapobj must be an instance of MapObjHandle" + raise TypeError(msg) + # Proceed with the method implementation after the check + msg = "The __call__ method must be implemented in the subclass" + raise NotImplementedError(msg) diff --git a/src/caked/Transforms/transforms.py b/src/caked/Transforms/transforms.py new file mode 100644 index 0000000..35d6a35 --- /dev/null +++ b/src/caked/Transforms/transforms.py @@ -0,0 +1,178 @@ +from __future__ import annotations + +from enum import Enum + +from ccpem_utils.map.mrc_map_utils import ( + crop_map_grid, + normalise_mapobj, +) +from ccpem_utils.map.parse_mrcmapobj import MapObjHandle +from mlproteintoolbox.proteins.map_utils import voxel_normalisation + +from .base import MapObjTransformBase +from .utils import divx, mask_from_labelobj, pad_map_grid_sample + + +class Transforms(Enum): + """ """ + + VOXNORM = "voxnorm" + NORM = "norm" + MASKCROP = "maskcrop" + PADDING = "padding" + + +def get_transform(transform: str) -> MapObjTransformBase: + """ """ + + if transform == Transforms.VOXNORM.value: + return MapObjectVoxelNormalisation() + if transform == Transforms.NORM.value: + return MapObjectNormalisation() + if transform == Transforms.MASKCROP.value: + return MapObjectMaskCrop() + if transform == Transforms.PADDING.value: + return MapObjectPadding() + msg = f"Unknown transform: {transform}" + raise ValueError(msg) + + +class ComposeTransform: + """ + Compose multiple transformations together. + + :param transforms: (list) list of transformations to compose + + :return: (MapObjHandle) transformed MapObjHandle + """ + + def __init__(self, transforms: list[str]): + self.transforms = transforms + + def __call__(self, mapobj: MapObjHandle, **kwargs) -> MapObjHandle: + for transform in self.transforms: + mapobj = get_transform(transform)(mapobj, **kwargs) + if transform == Transforms.MASKCROP.value: + kwargs["ext_dim"] = [ + divx(d, kwargs.get("step", 1)) for d in mapobj.shape + ] + return kwargs + + +class DecomposeToSlices: + """ """ + + def __init__(self, mapobj: MapObjHandle, **kwargs): + step = kwargs.get("step", 1) + cshape = kwargs.get("cshape", 1) + slices, tiles = [], [] + for i in range(0, mapobj.data.shape[0], step): + for j in range(0, mapobj.data.shape[1], step): + for k in range(0, mapobj.data.shape[2], step): + slices.append( + ( + slice(i, i + cshape), + slice(j, j + cshape), + slice(k, k + cshape), + ) + ) + tiles.append((i, j, k)) + + self.slices = slices + self.tiles = tiles + + +class MapObjectVoxelNormalisation(MapObjTransformBase): + """ """ + + def __init__(self): + super().__init__() + + def __call__( + self, + mapobj: MapObjHandle, + **kwargs, + ): + norm_vox = kwargs.get("vox", None) + norm_vox_lim = kwargs.get("vox_lim", None) + + voxel_normalisation( + mapobj, + vox=norm_vox, + vox_min=norm_vox_lim[0], + vox_max=norm_vox_lim[1], + inplace=True, + ) + + return mapobj + + +class MapObjectNormalisation(MapObjTransformBase): + """ + Normalise the voxel values of a 3D volume. + + """ + + def __init__(self): + super().__init__() + + def __call__( + self, + mapobj: MapObjHandle, + **kwargs, + ): + normalise_mapobj( + mapobj, + inplace=True, + ) + + return mapobj + + +class MapObjectMaskCrop(MapObjTransformBase): + """ + Crop a Map Object using a mask. + """ + + def __init__(self): + super().__init__() + + def __call__( + self, + mapobj: MapObjHandle, + **kwargs, + ): + mask = kwargs.get("mask", None) + if mask is None: + msg = "Please provide a mask to crop the map object." + raise ValueError(msg) + mask = mask_from_labelobj(mask) + + crop_map_grid(mapobj, input_maskobj=mask, inplace=True) + + return mapobj + + +class MapObjectPadding(MapObjTransformBase): + """ """ + + def __init__(self): + super().__init__() + + def __call__( + self, + mapobj: MapObjHandle, + **kwargs, + ): + ext_dim = kwargs.get("ext_dim", None) + left = kwargs.get("left", True) + + pad_map_grid_sample( + mapobj, + ext_dim=ext_dim, + fill_padding=0.0, + left=left, + inplace=True, + ) + + return mapobj From 2ab426029603358eddeb72cb0826eb83d1bed119 Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Wed, 19 Jun 2024 16:14:41 +0000 Subject: [PATCH 05/56] Added tests for new code --- src/caked/Transforms/utils.py | 112 +++++++++++++++++++++++++++++ tests/test_map_dataset.py | 61 ++++++++++++++++ tests/testdata_mrc/mrc/realmap.mrc | Bin 0 -> 433792 bytes 3 files changed, 173 insertions(+) create mode 100644 src/caked/Transforms/utils.py create mode 100644 tests/test_map_dataset.py create mode 100644 tests/testdata_mrc/mrc/realmap.mrc diff --git a/src/caked/Transforms/utils.py b/src/caked/Transforms/utils.py new file mode 100644 index 0000000..c579f71 --- /dev/null +++ b/src/caked/Transforms/utils.py @@ -0,0 +1,112 @@ +from __future__ import annotations + +import math + +import numpy as np +from ccpem_utils.map.parse_mrcmapobj import MapObjHandle + + +def pad_map_grid_sample( + mapobj: MapObjHandle, + ext_dim: tuple, + inplace: bool = False, + fill_padding: float | None = None, + left: bool = True, +) -> MapObjHandle | None: + """Takes an input map object and pads it with zeros to the specified extent. + + :param mapobj: (MapObjHandle) map object to be padded + :param ext_dim: (tuple) the extent of the padding in each dimension (X, Y, Z) + :param inplace: (bool) whether to modify the input map object or return a new one + :param fill_padding: (float) value to fill the padding with + :param left: (bool) if there is an odd number of slices to pad, whether to pad more on the left or right + + :return: (MapObjHandle) the padded map object + """ + + def even_odd_split(n): + if n % 2 == 0: + return n // 2, n // 2 + + return n // 2, n - n // 2 + + nx, ny, nz = ext_dim[::-1] + nx1, nx2 = even_odd_split(nx) + ny1, ny2 = even_odd_split(ny) + nz1, nz2 = even_odd_split(nz) + + padded_array = pad_array_numpy( + mapobj.data, nx1, nx2, ny1, ny2, nz1, nz2, fill_padding=fill_padding, left=left + ) + # the start is at the base of the xyz grid + # I want to move the origin to the base of the padded grid + start = (nx1, ny1, nz1) + + ox = mapobj.origin[0] - start[0] * mapobj.apix[0] + oy = mapobj.origin[1] - start[1] * mapobj.apix[1] + oz = mapobj.origin[2] - start[2] * mapobj.apix[2] + nstx = mapobj.nstart[0] - start[0] + nsty = mapobj.nstart[1] - start[1] + nstz = mapobj.nstart[2] - start[2] + if not inplace: + newmap = mapobj.copy() + newmap.origin = (ox, oy, oz) + newmap.data = padded_array + newmap.nstart = (nstx, nsty, nstz) + newmap.update_header_by_data() + return newmap + + mapobj.origin = (ox, oy, oz) + mapobj.data = padded_array + mapobj.nstart = (nstx, nsty, nstz) + mapobj.update_header_by_data() + + return None + + +def pad_array_numpy(arr, nx1, nx2, ny1, ny2, nz1, nz2, fill_padding=None, left=True): + """ + + Pad an array with specified increments along each dimension. + Arguments: + *nx,ny,nz* + Number of slices to add to either sides of each dimension. + Return: + array + """ + + # the nx, ny, nz values should be the total number of slices to add, split as evenly as possible + + if not left: + nx1, nx2 = nx2, nx1 + ny1, ny2 = ny2, ny1 + nz1, nz2 = nz2, nz1 + + return np.pad( + arr, + ((nz1, nz2), (ny1, ny2), (nx1, nx2)), + mode="constant", + constant_values=fill_padding, + ) + + +def mask_from_labelobj(label_mapobj: MapObjHandle): + """ + Create a mask from a label object, where the mask is a boolean array + where the values are 1 for the labels and 0 for the background. + """ + mask_obj = label_mapobj.copy(deep=True) + arr = mask_obj.data + arr[arr > 1] = 1 + arr[arr < 0] = 0 + mask_obj.data = arr + return mask_obj + + +def divx(x, d=8): + """Ensure the number is divisible (to an integer) by x (to ensure it can pool + and concatenate max 3 times (2^3)).""" + if x % d != 0: + y = math.ceil(x / d) + x = y * d + return x diff --git a/tests/test_map_dataset.py b/tests/test_map_dataset.py new file mode 100644 index 0000000..2c56523 --- /dev/null +++ b/tests/test_map_dataset.py @@ -0,0 +1,61 @@ +from __future__ import annotations + +from pathlib import Path + +import testdata_mrc +import testdata_npy +import torch + +from caked.dataloader import MapDataLoader, MapDataset + +ORIG_DIR = Path.cwd() +TEST_DATA_MRC = Path(testdata_mrc.__file__).parent.joinpath("mrc") +TEST_DATA_NPY = Path(testdata_npy.__file__).parent + + +DISK_CLASSES_NONE = None +DATATYPE_MRC = "mrc" +VOXNORM = "voxnorm" +NORM = "norm" +MASKCROP = "maskcrop" +PADDING = "padding" +TRANSFORM_ALL = [VOXNORM, NORM, PADDING] + + +def test_map_dataloader(): + test_loader = MapDataLoader() + + assert test_loader is not None + assert isinstance(test_loader, MapDataLoader) + + +def test_map_dataset(): + print() + test_map_dataset = MapDataset(path=next(TEST_DATA_MRC.glob(f"*{DATATYPE_MRC}"))) + assert test_map_dataset is not None + assert isinstance(test_map_dataset, MapDataset) + + +def test_slices(): + test_map_dataset = MapDataset( + path=next(TEST_DATA_MRC.glob(f"*{DATATYPE_MRC}")), transforms=[], augments=[] + ) + slice_, _, _ = test_map_dataset.__getitem__(0) + + assert isinstance(slice_, torch.Tensor) + assert len(test_map_dataset) == 2 + assert slice_.shape == (49, 46, 48) + + +def test_transforms(): + test_map_dataset = MapDataset( + path=next(TEST_DATA_MRC.glob(f"*{DATATYPE_MRC}")), + transforms=TRANSFORM_ALL, + augments=[], + ) + slice_, _, _ = test_map_dataset.__getitem__(0) + + assert len(test_map_dataset) == 8 + assert slice_.shape == (64, 64, 64) + + diff --git a/tests/testdata_mrc/mrc/realmap.mrc b/tests/testdata_mrc/mrc/realmap.mrc new file mode 100644 index 0000000000000000000000000000000000000000..f1f92660ebdb7c7dcc2479937f10d7947f15260b GIT binary patch literal 433792 zcmeF)30zLw|2X=j(wydK5SoW)-t$^(8!}|hOs0fPB_Sz8qER6Y8i*zeWk@qcrjjz3 zAw?xc<|L7>^S^)Rp5M9W`@QG?I;L}9yO-y+hi7}9&wf9jJ*~C&+5#ylDc+Zt@XJg3 z%j=JSf-e(a-;|;>*s!~U2Yrr-;+ONu>1-C zY@{91UPJk2O1ULasfD_;ZH~~(86W|0m0ZxDu z-~>1UPJk2O1ULasfD_;ZH~~(86W|0m0ZxDu-~>1UPJk2O1ULasfD_;ZH~~(86W|0m z0ZxDu-~>1UPJk2O1ULasfD_;ZH~~(86W|0m0ZxDu-~>1UPJk2O1ULasfD_;ZH~~(8 z6W|0m0ZxDu-~>1UPJk2O1ULasfD_;ZH~~(86W|0m0ZxDu-~>1UPJk2O1ULasfD_;Z zH~~(86W|0m0ZxDu-~>1UPJk2O1ULasfD_;ZH~~(86W|0m0ZxDu-~>1UPJk2O1ULas zfD_;ZH~~(86W|0m0ZxDu-~>1UPJk2O1ULasfD_;ZH~~(86W|0m0ZxDu-~>1UPJk2O z1ULasfD_;ZH~~(86W|0m0ZxDu-~>1UPJk2O1ULasfD_;ZH~~(86W|0m0ZxDu-~>1U zPJk2O1ULasfD_;ZH~~(86W|0m0ZxDu-~>1UPJk2O1ULasfD_;ZH~~(86W|0m0Z!om zHv;w^q*cpOJ)UjlJVEPQP2t=twt|zQ9NUp=4q5&iR`Tpv4c^=G*FrT1k>q5~Q%UH3 z1&PFer*IS&X!~!7Kc@e9!DUNpiE#CLoHjEfTy2KpRQwgO_3~o=zaf6?p0qG#$Q?viH}OgmmXc(cUg{z}2WA^zptGzD#U-in!9Vtn$+R{Wuy65d@DF0Ppw^sn&u z@QSoOI&Y1z-@B9ID1n=VXxb+3pROTn!4&=#`Tu;fzh#``EZdmPNqplcQv@}3KKy$p zd)n@GZ@2h2)ST$d6Id7H_<*fw|~y#$HB_l33ge6V9?1{$7@DDOzLGpDN)y zj0qNgVlssOy@v|AWbA%_|L=Uy_gY5ZB|tEKj+W%2l?Fb+&lh(|x5uooFwtZm0q^T~ z?%7bs&+a7@->~nLNSX{W^()84HHS|85_L7giWmITm0QI6_cmj$lN+&avn)wSW2bOt z>7L(xSCwZAw@dZLF77VJ&$&wxBlqvbHctr`eO|cL% z+kb0(_V!!Ng*6V=i?&DF&OgEM?j=vMee?d7Rq>$WU;ezoL!RL6{-`Ay-g}m$cbX<9 zy=16Fwx^wFb=YFQ%}V(De)CuTZHh8gMTY%aFy#ho(!8mGG}Kcf?`0KYHW$sy9^6q>Ilhzodd}kS}W;5^M1_!S&(+yR816(GLs}2Z2hI%Kn%5+X#<-(CHc`6 zq^_GT^Eu`cW4r4ay>-k6a^#sXiJBb!OUFOoY7Gx7H<8Qsg$M6=XJn>|#h-@ZGH-g2kFCAQNlOBWNx4c`e^N&HTBGML zeO%h5SYvxnSs+?H>61irf(b4+zY=Rw(UllimE)C@ufsvy4 zTN}mVoZezGLqjOY*!oN7&DV7(4(T+TZ8Or^x~r0wmkIn=v_1T~ z9^8-<-~>1UPJk2O1ULasfD_;ZH~~(86Zmfe9&xkHoqC_L4xGM+_inbepc@tn+*eEy zwE5lTeSV^7lm7n5@AWSK?Q?$CdtUMaYrEYM{9Z4Xh-Qy_C^lc>CmF4=ShDJsp*Szu zQP5vb`Ijt1i6O(Sme)1#HtXyc$_0EB&7OTwf+98W61hjX+Unj!PE!r0vfohrY2xKy zvix@G96w&j;VYuRr8}@iPqOfHW4-awMy2=}&j!N#sF1v;5@E z_x8Rnkg+kr>P(u5%1>%k8B8S87uMt1+g*tjk!8fQcZv9x87*SWDEj|ZetR#$>M_cTl`IC02(fwCr6{nvm?*m8Q!TE0zeTK^ zwNqG_r1HaZP1U7Xpyi~4MKs+Y(C41irPUFnjZ*|(C#`~M8QJ6N)f@5s9xk|D=yc3g zuTr!=NhlnDRaw-dc&soCGZfrhQ!N_Zw^Z_2|0ZU;-Un;&4i={e$o;Tf>#=?&&*SJ} zQR6jt?0I1{Zn4|~U!qqk`FwaLv1L{T`Pf38!d6`(SC8FA=;^$d_*D&&1WJrB#Xc^Q zBl`kG+a9D#240rMQ25hX3+O4S8FEdG+jIaaIRXY!t}TqB&48Y?Pp<%#kx zIAc9xSCHM^AE@TuF7%$`EPYt7i%ixR;=8j;FkxZ>KJerdEW!GucvB(|ORn(4PqsVZ zqD6+7GI_D~;p}tA7`qnhUyhxFeBsqynC|p54&G=u>@KA1v%ShG{51$b$ue3t};v@zq^v3U*A(aZ`&Dc<-UVN;cay? z%CQj_UF#=NnDAau8lET|zo}JdxN90dqQHk*CO42-WM|LVu6aaV7+H^huiW^n&%K^- z(WWr=xj;2-gV^ctO3BDq72?7*hei2|%rSr41l%$u48OGF^*!Hfo>HHD-mc{jM5@lqvBiWFKG5n6 zc426;#On50d}fI?`65W0G>KY*U%EFy{6Aul_OqcOfYS=#OkKS_@nxn;*ra|d526d+1kzuungxme3_`wxT zKkS_BKZO3jW;f{$6Wo+!OHNPRPn@zdp=Q6~Q&tbNiLQ4!5>*WyVSVd<3GaSAKC>~M zSk!1w#9J!ig5G{&^|#A@+A;Ny>(jMMT{O+s3QthaCmlakQjQk`sR3G#i3M^R*b~>o zlAydLWZvZzy0goPiSfvy&vj{1pxhn5XFm7G?pOX8^dH9kT3AuF$#h(y>K@rQxtuCi zp()#I4n!jpE=gT@7x!2ePhZce0baGc;D{+_fYzd1dL+6>SS`Q(KlVR=_{bkRI-u;4 zK$Yr}=s({;^wd~Lsyh}DpK{W%b0&91bM3Ye#seXvcrpOSI1ABo=jm|QI(25!#2TXE z+_PUI{;9p6+d5y95fSxUC5DZzn8A=UlHOZlMdtGy#f+oNmpqD~r;JVn8J@%8Ta_hX zdy*V|Lp~IDa7_Fq@+Y5b)ub*na<0vd$eLEoN-e@khC)RQ4f6%k95Sl}vj?NU!#k zRKHE;Pr6d@OSgXu$?*_Of3G3g_cRc9s*NGYd<(+vttB>UZR{^|-2G4Xzs;YlH`?aZ zfF{14`bbfEs-rmd%`4G}Id25|muCJe+RyyAb>rUS1ULasfD_;ZH~~(86W|0m0ZxDu z__hL<n-x}c=yBVYRdhM zv0*30l+5!MCe*Mu9KClx2)LU@*MG97^v`EgQ&qajH$KCNg&`X-HH*Gt_o>#xV?&b! zQ)l49=XKMBi@Fjer*HKqB+o{X5BIeZ;nA{qa+2QVTpzE)3s_8Mip$S+=*Z^-IeokESjo>}4iX z+4n}%!}dR=?hO7+ln?Tfe2;DQuRe25RG~2T^n0xK=0URm>1&klRaffDy}6`}PcH2R zG{D}D(cr2VA5>}TFst;#$T`Kk$Qe1F)R5{5vemK?R=y#FrboH&V^dgr)a!W-BMKop8{Wk>C18p6s*6r5Biv#<9N?nJVdX-JHo`Hyu zlt~MbHWPj581RmWfD3JN;iU)upsuc#ie+Tzb-Y^o^jLdZd;T5LI@y^1I4Pb9b3ek& zGG^%5!GNG5?)xoH0B_<5HAm}Y`@DiwD?nw<`tys zJ4AgNa-YmCK-7ZEt&G`GH|R4w2rf?01~SXEDYAac4?Eu&?i^@KcP9$h$=gbb`vWp7X$cJ8F!Io_ex2p^E2XWznmj&!4#9adm} z=@BS*_yXKyIs<4uo=*MQukqy{oargnK4(bmxs*nod6GzlMaot1-QH%Q(X4L@If` zaT2w*bqzUipcMYPiifSN{YZScFavmTb7aZvnv*TL_;?e26_34zBT*8WN_@44;S1qy(s|nhYIwFkH6Zc| z>HF~nrDC^`-m^kNFRzXvd(JV!S2n&DpA*ST>`m3g8P@^?+fAH**mnBIym={f6`y#Q zfxDh|CU>j&lSx;Vh&7!&{N$yDauscq8XA9ioAmZ-W5MU;L-6BX!Bj6*5u;x>o+*;KN7YZdjwgjhNL-^W zu^P2K_~yk&$^C-{QR1^!)M~vdO7D||8Y!bpbQui&DaWR-{-H+xVzW&uWL%d#W0o2P zp6VX}?~jR?X5UGqPtG)K=k=}lKKX5=Qs67cTYw+4$w8fJwn`#eC(F@K*GPe04uxR> z49rkuhCkd%IeK?d%2Q?NLpd>E?fF=QJ*C(IS=-Q_@Xug`ZU#LtTSWZvbAHx%nxU~U zV~rTI$j>4cy+|Zo%X*Wx56sBU$FnHcVGeYdZ!{>{DT4x@PDV?Gp73HtGP8x=Ovyy{ z|Do+t|C7)8UePvDN@;?LMi$~qTu+iVzg`TRUtkUQPZ7I}ifH#?9bl>TOL$2)0P4b{ z%;Gj3YD~mUT(@AyPu*_+YJIv+Ke4)fV?2LhjG1WHRcBngri6O;zLlB1wF;Pr%>jek zXVZ0~Z{V_~M@5p;KEK5E`d_WbGGT>P#*AulT%d&L=QV~(U(TmL1)QLomL4PIyp1I7 z*Vov*+%n^r?MLLEX^`1_<9zmwdt_vn{(|Eu4Eo5u-o z0-OLRzzJ{yoB$`l32*|O04MPM1@?t(wyk|~RXDpbQnK5j2;13r2BxGuTRdqjCTKM; z6ZGZH6@2QP#j~(G_e^2=Nodxeh@Z96y?o3Ba{QsQ)KTaG)E+=~W%9+oiq z?N6!dx=X}__HgR*Qce2!U`49s)e?NWhVX~gv43^`tD(BvMd6^kX4s7nLBz3sv#83# zYs~Fq5bCC7!=+2+!NyJUpnb$$>U=;49a+4IakX7ZpPY7uP`@50K5l=_Iz39^hu!P` zSMk5C^T6(WIz|pyc9R~dcauq;5ergJmcx8M23=NWQOB4OQ0^nk%%@9ee(-SSel0_9 z^w~$eHVzS2jBVkMm}AZx-RxpBZ-MSlDIXee*Pp04bDP#F(SY=GE#%5S3}xyR;L$0& z;28a9@Xl5Y-hQ%_(eV^hSt=n^lj2tL-6k{qobfJ6CjC+Lcw?K`USzd@x;(`=g7lSosB zk1rBs%9Vo-4=vkh1nu3MXxD$c>ad?0imGk94D#-Pp!FRi41a zO;qG3)99X#QyKAA5}YutVAcq|=(Nr1K;^MF)RnD- zeP8q8=Lce@Xn)2JiN9@=qF{)7j5zdq4R)w2g=o&(N#>pLC7Y$r(T)daLHU;B=&`v5 z8^2qM4T~#-8(X$8;L0U1#o{=KTa;Fc|cq6{K1P?O|YR4L`sI9@QIci*dEc$d`i> z;Z7g;X+k`L!vfJ5qh9dVD`|TE>Dk}?*!3X>g69r2N*chHfFAJih!$qpd}(^~@Zrop<^+7b+x8LRImd&3gfC}^HFxM6D@v&1Y$ql) zXcx#XSp^g_t(dzN5zJlR%OLBz1AH-k37DS#hDwWhO>Og=4?H}>;CcIXu>M6S^Lj|z z4{3k$S!5>Ic6gZBdPxmdG^LE1dtf>^nmiePx>*VWBrS}bvlYzg8iq?Q#z(@SshBpMS z-yM?l+SGx+u}&mLMHb*zC4(fHdBX43254i`Sz)?HKeB0qGgGFZ1*<)5;gjlIrsmdJ za{N9Kd8PLwa)*5-HL0$aF%|}ZsPjSKZJ9D0+iM;ee#wG9f-n9(ziT5;3VNzVNIsvr zjXNcW5HmLJ$9rEjm7J{8`)=`1@ixY!=ay6Er>elXPis+&+zw=~ZV6MYqG*{~BTBcm zAC;VyN>|T00uuK~po?E9usm1+HfdD0q;IKJ)ehr)&%;i9`&(}ew9czly9 z#@}uGbsl0s?8_E1e10ChXHi1{+a~`J-Ry0M z>Jv49hkpq@W8h&a>HN_Lde)H$bUhG=nvl#x7gu6Kgx zm1i>fHumJWJ;m6>#w3wi`I%p;FAnD(X(H)qEm+W=okWHE5n{cKI(6aFJ$mxe#f;je zLfW$LCo*UKCEU9!U)<1Y$HyM3f4BY0-!cF5GSTDm_uJzLfs$}XPRp-~ z7*}SKVbadu8IvN+San`FWsrm5WApG|DsTS$J8;vS04Kl+Z~~kFC%_4C0-OLRzzO`@ z1gxcI@M{NTid~kj!|%B4BYIb3#PeAccBXrX*mTr%iATpvadX^r;hVN!qc7{ghMd3&-Cic6aQf2O4Su{l1p)nr%2L>e6RV zIwqjzA{{tTzK-dg<_Dj)zk~|s?*X}U1YMoP;xoTICw_2`tHQD3%%66Bzj5>=LT9lj z?Y7w#h)UmsuBxkWN}vL}u5|)y5MjbT&RvL}mga%@0|;&yoeQgr(m|}RF8#54F}YV^ z9x>4TuB37?5Hjgoe#$!d*v_Y3wD z9XK^Fidk0H#gvqG(>G)k>6}yDRP5w>GSiVy=uT@V9u~eK1%gnr=>2Xy%USEY)?cr5 z*=+N2g^S?%i9Aum<2%$Vc`0~vbuPNNY7xtupv{i&Jq?+iHblFU1W3Ab8`M4;03Lr- z2cOq>(|sqeVP1O9U^a~2Ll+%;MD;wbN$(gQ%h*l}0;Nb75W@!0nJNjt*Kz!oqu=_^ zA}X3cCM`fb>vqQ7e$@Vj{iLfdL3 z>?kppa0#{`SFRjRZA~#{Iv#6)XpNJ=+3*;gv%?d;3x0$wYWAX&_PY^fbrkj*q66A) z7{YAXQaEyLAv{x*25fDDV9tbA=y7 z;EKHzTGUU5y(zwf?%mCTo0JM*<<2)~e4jHYq5lpTJ7_vnv;N$7X+I;cAQsL@h?P8_ zu?)YlCkZFrC*cc?Ehrf&5p$?F4Nk1v3!0-UnIeaq%qBJo;&c2^iqu1NbCm)c?3Il? z)r;WEJuNWx{%aJyON#yQaUg0Pr3V*fb;16&(J-$41|yvRf%Ly3_g&gw=`owdTOI1~ zZF`T9Q4KH1XI=Yo;5@ro(x#7b+JsHQqQGH;8-!OQTJq~9CyQ8?! z>*!$L>By@y8s>Wi!{wbXVX@9KI4f5gJ$v~S9lsQclv}QWiUp~ZG;`>?)SnjZ9VE#b zmr3-F-%d5nx=HB;ZzrcJJ|)n4bvnc_n^2e?Ar8%4}zDH-sf-3TqXrO6H+cLJq4%fWP?yOfk#@9)wtM8k=8 z@e$jN#Isedl-~JLs^n4}S-xF^Fp&?XSZy5;G`Amon{xu3kN0Jo3$}xGxgPN3)F7y} zemfkw`4Oyo7Kl`j)g$#quVBY`Pex(Z656ZJDX=>t15Rxn4^@p5m?&ycQ+_o6 zxCT{oD;U3WZJ)$pg;?BLhXj7E+25?r=Dxa~Aa3ws@g3#}_SDT4Kk=oXkbNiJX+8Pv zOvTxHNNiDHx46`zTuWorz3c(f?jc~$+dM~JIyUH}4j;;0nGK}2Gy!k@4~$EJ7nRaW z4S$lFKs>o;NXeJJrY07QqK0X>5G${oz{9qV#il)&Aqsm~`OVHN4&Upr4vE^sUzW2; zWLTev-Pg|{XD{DPhb)R^q{?4`g)7D(dprq6&S{1R3qQiBk)8+zE=RWHy{(yFTssI$6l_(tAqLizYeuMl+VbS1TfncH-W7;!{LFv=b*>n9pGGH2b?;< z5gk*uMoJs_P4MR7~W%f4xh=crk5Ui)VFagI7j@w!RjD$ev~11m;3 zFrHbZ5)LyyI3bI&U=*Wz5gh<3$o1A0=HMV@@?Lc|HqL4oIjUb7Wmu#{PaJZOrlJFw zBD;9H=WUD(^W2MHThSXM^rb}BZ{xn{y#jL&*kB0-a`X|~zOet)LL`ArDDhYZJZp1^ zNxc-nh-NQjyt*OKx;75!ZhVZEbwflKEkHw_pN7VcU)sPetR(hs43P}Z??G&7b|X*q zjU@wPEvW8w71Xv}&q@Eto%oC-0wZ1@6CZ8ZD+o88_f6|8%@w=xSso^g;o)0wENR77 zKbgwTy1E}l`LBg_bCRIk&``Mi86UdozkwEAF6jNT15jGC285qf1$f{4RN{d?c;&KU z;l8s5lJp6_mhj zNt3ID+LvA+?436I!Q(7?8MGBGclx}c-+~4QwSYI%htQ(t z23+{`z9_F-i8rX+p1)Bejc;^+9WVHFsv!N;V(X9<`MVx?o%DG2G@HC}?{O0@r^;eL7RkTC11(%-fMU}j4p;xC^fQ?pSXv>-- z{ACARWaf-M>Rm>MC()>?dkVUsVuoV&hk)P$e=2Zu6W%|=6zfvx^V9AfF^(F-Tlr>& z$OY+O)84)!F5O9><5@A-P!t7pnjF6DBkv=D6OWBC8HFA!e+$-b3IRvc^q9cmBC4}t zGm&h@!)^vh{j~VM{ysZHHVE9_FDC|nOrleg?Lola(?+Ww!bqh?W(cWc%z2%!beax!uNFUm;R)R zq`%x>ghOV6Q8Z)QeVplR$zWa%1hhx*1fpqiDvm9X#{8KVLhbt4Z@ypoJ2C%pnY{Ef z)?UZ@iKBK>QSKGNbM^Z_J?6i&UfeRA04Kl+Z~~kFC%_4C0-OLRzzKX6Sa@}bm7{B` zE#r8bzxSrKAfl_Cr@7q7=Hl$^AKn-A*ORfgBcF&wfvWJr#!+x$OD>8y_Z%JRR|v0|>MidLi@lK$lE^y$*S&x#U)|P+b)0R_^<%_af1|@w~CZB^%*Nrd*$@qx8ZQVgO<`K zXW8~YyO}@1LSCTnlTR%hb_L{E&P8wciEhSCW0z5ytZMlqD0ku7 zZ|ygL`uk9DtEwAZou~#?8>K<|B}F>wa3h&r@Q%a^hml8T+T+eiCEu-lF=4L-PyTa< zFl@*k$%$3bcw3&B-tcS)G!a#yourtZrXgnE?l)k=k0G{O+LU#Ccp4qIorva_T!1=> zN5MGO8+wW!KugOl@TT2q@aouFP}AiIqQqg~-K0B=!S!lt%%<_*t^G-km#%R2gl0*8 z;Q`{fja?n_%O*L(2X=xfZxJHx(P`;(>z9wLL4cKC)n z1E>$1Fh;@R9WzZEV_d0B1|PQ-oRM7wlX`r4fV%t`wrc@mQlePT8u0%12(2(C_Cry6ts3a0bcRazf1gCo00`{rFA8h>o?=U z9$M5>Y7Gz?(ILmW2=dWUcUK5T^kK&C@*{?@DYgL*O228{*OSc6 z5~JW+B0S^}wLkq8m2FT$$(DyP3({6YwSq}s`f5o+ZNu)s`@{JVzkCk8SGQyvbR&_k z;x@EBLZ02_Yljj|li{FVHRx&op{&i#6qFMnhk9g9MI~M5Vfey!W^&~Va`=88=Dj~# zWIB49c>jeyVnf{mq0_Bbyu|T4{!v{d-Bf@VF6d8tJA7brXWIbE_8z^-Ih?U6i-5Z( zE<{&XhoGyyQ(zzYesDu%BnsP|j#l}pqo(R3C|b^)HL8$e&n&HjhKjLpTKOn6#is$j z_MeK9RP@*d6>(_U!E+#5X&*(KCg5e;7vS}ciiFS3PCV@H0jz(?eGwFF`A5gNmaJf6 z`kD^fJ?b(rPtb;Dmr5BIpFy;F$?$QR>xlR|<=2-)gS8v20`U=e->yJM2jsf~nDy8T4nYexADmgP0 zQmzhzsJi=drqk4};tE`9^i=(t8JQ)<8QG|lg)y(}Jz32s*tC_mkWXP-7 zhc>OYMG9>Zp!RhLxG`rP9Bmbb%=c)sEqOYu{2Un;yKKa&nF-Ol<=2@}3Mq61%mO`n zZ-mlpPpH^e!jxr(k&4|i)Li#gs{F7rea2aymWw?{io>Pxv|)SxQJdYHIxjHr!k>EH z5X8(PdVme@MlxGp5+Gum1DHI+8H%nPK^ZG+k*Q%1)N}M?RObE$wfHNsr2;ut#$zJd zt-lqu6{xWmXT)gU+D2ygQk-^F&;WkxLV)yn50I;Jh`F%#0QF46oa(tglEQ-*P{p!B zYD#Mt!MkmV9iCbKkNmy%E*0yQ^&w^(jQz6jF`++r7BaQUV`2XoKjgj*oQ_l z*|-m3=&4gFTJ>luyP9!f=XCQ~r?&vDI6oUc`*;)HZu$WD8{+9cA~VWasSgu6%?s?A zW(M1pU&ds49dc4@>3yL9sPoBd?DXc1Ze z;4S&0RfE(`KZP3{i^k88+St-{vfunX+F)@arf}mFHP(9;h@l_Dc;`;!G1rzI9yXL+ zcy=Odws<64FE(Xu+v|}ib1K{Sm?gVlt{#GuPBV@&+T^E3GkoRBUf5JKXYpvYFtNJb zWKlRwdank*JLX;X|Q7?itJLgcx3X*fnB{?pM6QC!qtL)j76{?H8Ed-OwX8&?F+OJ zjh65A&GPz>Ir87e)0{>K>)+1Dd`!2Knl~ev`GUQ0aO7&VtU3Z^JJ=)bMOV?fd)HB6 z4j+Z@S7NUeen2yaM#2F1$4tALEF*hy6}2+J1)nB6MPQw2^Q4)Y!(Nd`DSfX|69y|eosf~gs^U{A$G!TI>j0Wf!nPns7(b&Vxc*_ z*YE(oN$80p-@2h0TS5_hwi>!Q`2tl&1yJ{Q(?`ZrQj#6oWiMmMw^q|K=Xqn~>+S(mJs4T@?2%L;+DGt=R=c6;D8XES{O>Ch`WW67zQ z0sht^{G0Dr{!iAsuXUYO%*H-~;ZqpQ>%|-rlzpIvmh_<6-9Ds9!<0xm;7y%4*^iml zI+%Gi^F8G_x0dkfk&K77Sz@E5Z;5l|7ORd{}cDWyX=0e zP@BwT8-eo*B)W0BP~1UPJk2O1ULasfD`z8 z0#S3;@`exW6lN7X5^uVUNi5Q&#clECg1Pm@Kc)S~AESP>QChvDAmCrLhz=)!Va9};ipO2|*^dk~STW|A%1ML*@5^1^8| z@pN+^{KGG=5owQ689HRRSOYWAC*^o3jl`gFi}@{vOm#qL&Nj4ShKP^3xSYV;s2 zP3-AmU;6AYFkeWGsNRL2QnL6d*Yr2*&f|Uud(ms>X@HyvQ%Ef}fMf5U2QBM5!HA>Y zV0FJ*x@&#{P#$$2TET6wZ2u?bjPG$u7B^v{o^NE%Pr_*aa~DD@O6I%vS@Fj_`Q!NZ zH&@9wC*?rphBxpQXh!&!b4U-`pcfwRVa3ca_I@PKHVLcIX`h+!VXsOW zEW1cg@VP>-y0V2D*^z>mWYIsxzgsYp`e@(Ql{&g)R-Xw=y#0*kc(}uQ(TC$G|+*rMa z>{y1y^BSSp9OQldne&Am_cUxI<#MdfgEo&h=$I~;u+(uVfFb7eIi4Pa%L zPG@sfZCTB|^AZ2lZaB6!1!Am2@R51vR9`m zu~`G|qI{K6>`Da_mcME!+TNhTHk2=A9WHmEy2GugAYP4K;kXZ}-IYOC-)#a@#47YC zjTLy-aCJeS^UXF|rY|i=dTcQL>i18FS8mATdcj4MoT(vr+?5EmLWBsaIYFyPDX8r@ z4(1P0gxg1usQ)w#c6~Zz^`yLzqQY+EmbDLE3Q$0BT?cB?b7#BH_F?16Vsx`^FdK7r zG+WygkL0)Ygmh^LT{Fm(Sk^j3Y}MXN*r21uo4zmMo7_9L(ruDxxgWsK*G5tO@_T|c z&hhY+lMS-i5CpG#o`!1zywKQnH{tt@8nCZxFnZm809(H}AI)#wg5vUqqojvP@V-nB zR2HJlzQ3%^uC3aGlDC+%YxRe-X>K!6+&o9{Y?n1%Cij`J=pBXKj)@TGdZ!3B2!-FI zo?zP{b3FOUjxWy$naxl-#~HCeA4t-k0gH|OQ2dz3=q+W++8kJo&YT+rt=dYF;)><$ zlF2x0Im(=kQKHy_oqJJ)OdkkOHNX(TJY;H@fzI`?V$Y|Vv0dvNP)gwvFxPtrz4TEx zxlq)L;ES%|hiqoxgi?bF&*Xm;f|rTaD~q{pjcuLmZbue zk)XsTM~!BO?KzHiEf0ii-lrmOEmKxaM~U_PEX}%!r?B2nZ=k+|jNkynDPYW63A{gj zKg@&)Nb@koE{HjeR53CsC3(m#K((~d-P_wYQi|?$S?{T#ulK53iDat>NDu5KL&HBuZ3!|R;a~j4|0iD zWaGB!u|w-)QRebPQ1jAy;4oc_nOOCV@lCk{GY>=~?}^&zlXM3Nem)M!Zt`WK*Y^YK z=ul9Hxr3c{#>`@NGI@KP^gpWi(CJ-8x@pV-hjvdw;Y$n9jbU+c)ysHhZJ-Ghx*S37 z^?cS+cNf}KX#x)yP6GEP+Cw{wGiYVV687yt9-A^Y8SUJYkAiA?Ayc*cO#l7rpzrc_ zSf)4(jd@GJvo{);IioKy6P7OoLOCP2yo7-lQ%qsfv?q+o#=g{*7pMPG{DtgZ!qEyb z3hlF?UZ4~j{1`wbg-BRjpAB2wbkW`^k;pJF7tOSEL1#_!VL>W}Vk`yhJJ)&a`Ry5~ zW5)#~%;vGD1Ww3Q#u|8?RsqB7UqgShWk~mh02vs}gk_%^m}g4`prthjU^Y9z?emj> zdAT-I_FyDgulD{Qog>Nm`C>T_T&cvM24<^I9q5xY0VW+!MwvHrk)C%4Z1i+P3%#Eq z!~O~A@NG4eW_29NEHh(o`SfBZs>q_Dt5uPV^H@|`JRB-5lx0}k!A$QQ3cfB|h2Dof zMJMhE(P8BoP`bhaY7X!KEuV%m9Z9`u-i%B#(4qyK?6Tt@<-fd(oY<;$FmcSsieARI z1v>{t!!@5uk`;gSESK6|uOFu%ad8aJGP(c40ELF|o-FfNQh4a%us_A67-f%~#(m%~eR@7hV! zzHtIlx=FH16Lr|bsuk$1AAu?~hJcc?;dJWlq0EV=b7A{+FLX|F2}Pgo$)4@aXE(iH zi*{N{(7fUcFht@E_9-2uXBZDALF$Zul=okc*(EFcTJ^l`jOpvIrwW=nnWL|pL6gBj z&=gbv3XT22sL}3V<~4sXQ`mX1B&eIwd$-RDa+P5twjSWQ$SCzEsW-w0w%Vt z2nIggkCcj{(I7i7gso^obNAY!OFL`et=t4i=4*mA_f4qfYFSwG<+6X2H(%rbZOAOu z7FU{)1T(0X8e2brF}GO6oM4ADublH4_O%>PNjwR_En{RpRhpgf24~M-K7y2@TOdY> z;EC8ch_4E>p{?zUnFJv2_N!bEFBo9FIfo<>u_&jz;9PJqxZ*SP8VGZ9uELEYtVwFlyh>1iZ%I z_lI9IW>;D9&Q}1GxoAqORc?Y3ObtbHJzCMe^C!x0o=Cg+W zh>ah;1HEvSLBzEbn003$xbxJU%2X+nd@4!#=DvT-mA@LlrMcfyy)6;1dh&r1?F?vHqudm!)DbTGC9OaFJt*&8Naq{x{IsrB&$ed|!A~6bhXOT!!5X z=zd47EEXVDVV$+!};h|*L? zAg=l-?1BlfN2M}|+f_qX)Mf&`;VjCRS&8Dz*1)m@i$FJR2$W>ROnu#ON57#_JnvFzahWMYe4c7V=QVmUrG^AE(cPEA=^End+(df7oj%|Ze=eAN+64F; z$uMS^Ds@jvlN>dT#qB13{^8s0pVuX@_jvx@*dp=5&UpNu-D5(gZ8xD`pM&}Lb^h{P zDqG@3{}l3hN-Wj6tn*7>iFe6&IXwJ{QGj?yTfVS!&E_B8?(NU<|F`MpXmx>Zz%g-~ z$62gHgz$HR%&`M=w}>_o1UPJk2O1ULasfD`yX7x=iNMF4D< zOP2PZg7-{4j2|fQ#0&-}h@xz;pMGu7^Omo5SxF^7LOW9Aw)wQUN_LhwM@w6znmSW* z@KzvpvQh`Ty~qfAx@)kerNUwQS>-$yiWX6t|`}*zY=#2)FL)Ldq~b3 zTSd7pb)e6kcA}T<+CzoTOd&mwbrNgNU!w{S2Gduo9#VTIPbQO>bkjEJ7lF@5Jy<>P zE)!X%NTnW1_~vDWei6DV~kEz zfGas$;QahY;M8bkCg`LR&>wybjtlRE)0{Ja#oPPT%-P;QrA%Uv`fe)a?g_B|#auLQ zmJDkoF<^r~bR*@+0CXs`7QUG_9qK)qfEH^@vpO5Rko1FW;Cc8Z6XAlvxr-g3YKRPY z*V3Po9eM1h_&=#rrSm46!xg*>$ow2&`<%38r;b))$G)gU`*wt&5Mc_E&PqU@K}u{! zj}&CRR}qcAw-ELP*6`q|rQlqG4;_>=nV6_OO;Y8*O4K7vhhO16?wi!N|5g4xEQS0& zDxD%*X-#bE&^T;iLl-%~NCzmzABDB~I_MI=7RDYhLpPTnLyASYC_z)5H5!-v<$2^E zV3^Gcq!ww$)`c`6r>gBJeCF2wi@i4w$Exl6$59BGlPP4T%-68@wf8>Pi87Q@G*Buf zN}~p;G$Djah9X6VGDby78YoFB8YoGMkf97|pcH-ZK{SMoA1f0-HdJa|7LS*M!9SdRTw?5%f1s!>i(^W0uPrm_ES~ z_7Zb(0KXFs>#2h;6I9`){$r##*vVW@cA~fJXLw7wlHB!FxS936lYh#5+82M8dq&2C zYC0FeY?j}I?)L3RUY;Z>&OZYuS9L%c?=A|kNI^%R?*WC2&p{+r0l!(uV9p8|>>Rxw zvf}FCOtTLD-p~snZi3h;lECSm;m|x;042}vpkDNe^UPOV$0VG-{0oHTEX2N*KS1~7Db#R$A5-%=gTDJ9oafhO zNi3qCv{(q@Iwih(6)oy0XndG{3VP3Pcb&o#KY zZXTAJDuyp7-bUQz&(Y16@nCZ`67>2Rte@?M$Lvdk*}~b#y%3>SLZ(dU?R@I=yDdC} zOFE>6W-#}u@)K6$93zwd0kcu(WnET3NJiOxpfhe2qTv=k;On}`tUuPn6fahV30fy1 zccl$JA4A|}zFR^5#&pP3Ca`F&F=7mnMqv2GG zAr8^I4qJ^};pr`DxTtrLS+lW~{`d%(iui~0gBL5PGRtgY=Y$$o{Hvl-&rRiHXYr;q z7BaR1Cm|!Y2TCo@qhnRN%(2xeNODmd7?0Ha={q5V1L_XJK({>jR|(=P?H6G3nnf^3 zg~RR%!$|7pY9@}*q;;9E)R$-prh4x&R2S#~QwpTuu6Yb7-tdI|c3H?hPyluX&4O34 zMb)eWhE>yzEkb?G_8?&^h;#)Y7^WG@m5zDwy3wQ*WkrD4C%-^SjKYz@I z3%-FMGMEmVzMepn_2cOCk!P4UC5Paenlauorw5wfKSv*}vQTS@H0)1Z24|vl;qt z(gObDtnty$`yn@D1rl2&0+TcJAn#l(`s~nxJh}uy`SNvU^2tbgeFGobS5JeTmpNWN zaTPw>6az2cI>I~65YTa+0yz~VO0Ky`cj#hjPs%Z3jYu}jsLx>3@9(njZS0rv7CZ?n z6{hR*Tjchv1y+}0{MyS3&+U!^R*@`_yeFWx+5nG#UI>>S_<#rhL^#}%g7(^OWjan* z(W{=-)9+`!rX0d%FwWOXQ1^uzSS#g*Q}Z-&S5O(mkLO@V6>Tged>QN{a*@5T6TQYZ zlE}Dxg5~}8f^o>Tfl=3#l6-jDtT6fsk>hcKo)IVm(~OjH3ZEjrzOfmyUrIvH?OdcN z)(qE_bZ`kh(qHX#10aq+)H%z5%oYlbA2`Qkd#zwpT<0)7FBG8l^&80DXNVWX+28`l zb@)QE8E$r8iI3(D!XeuqNO}G>CdXhIwR4*JGO80~?TOhdv$^c!|`*V$oC4q*yzBk;RLjIy&zPS=A!Y( z#-LgIPe7cSHkKE^2dpfAC=DNr*KZKR%Vp*9BiGyTq>w}(jwMsaL>>}9wA%kr-qlY# z^mO$^!|<`$Yzf^9q=yZrzv}K8vAet(anlASb=o|n6q7m|5 zbNcwb+vMZWEY^PMo1>m{e$xJP%vUE^HOp0q@S8=vseJad#TYHd@7-xsePJa;DmEhZ z=dR#4OC9f+2lzx)DikiMK^Ns!k^c2MCP$Tv23LM$)R*t2*QzMf;?ob)-aTijXtz?{ zP`(`9+UQ8f%G6RVO}^C0#~(nd`d@vQGNGS*R7f)kIig@B zeh0p4IAH3)SFoj%kq@NO5EP+#p8o?~Y0{JP#F)64By)c!do=0`W5+(0^-q9c8%I+)7p^Ar^* zKxMD8BAteW|CaampPxncavzh@{swkdc0M70a11X=GK;*ts+7CxTM%bS?F{mb&M=SF zvy;v$Yo=8~?@-??)kx)WksRxBl`O*>;eU(V|1R5fu?kV-(rOe<{Do6K7F46ST zfUI9BPtl{6^q60>2UXjlgdNek1T_Bk*F|gDI@_ zp++JCNMG-Sb?g=DZv> zT_&H!NwcdU1YMSqnrH{%+?>bRb2^cAsLPf+Z{}WJFrPWywmgc~pE`#sn-fSBQ1X9^ zFXfj0B-XAW@!p!2^73YV;qh;5A_Ig8 z-ug`%3@@<+?Hm*Wa&sZ$_VgW3KQHKSSraR+DddjReZkvm9YjC9Z_liG@RAwl`H{IF z^O#B1Ok!pauVwn_=gi$eDRdxl9AjSANJWg>gE{ED8kIpK$y7eG+Yq90^t*4VAFkX&^wR? zzG20%C$s=f$#13yw;e?{ciw=Sz*A6WH5svNrcy6MkN#!&+ba~CwADniZ;!dl&9a)o zTR+c<8T9Bu;@AZQD~^MbVJ`^#PGivZrkcio6?xJluJ} zDQQ4++Dw?KXD9K3n~!iyd@I;HJ+n-U6^4IFy@daa`#(O{+GiO*uDi&3y{Lya9;+Ux4%!uJ~}YZku5!PvIS2DZ1yL(F(bth!(b z;*9q|_zVsh4^IMX`#jXMQ3Nfqu%RcplB5jHV$YZ=^r!e*X7MQzI?F4lwaQjoFrUXaFO~5DOX5t}L1N`Me6TI@91BN5}R`nrF=x6p4&26v>zq?1jxD;ZLX2uEcW8AMW(`yZNE&O7I*rP ztTEcm%~|%~#y)};e?@;cl1{Aq=EUpi&0oObgs3^0Av&l55 zDqKUFf;r%ms)AdaX5-r$t~fi~0Gq!{2OGT{G)1@=J)Eq7@+=q94Y#DIwHr5)!8<}Z zd51MjeUA!{I&S(?cBO`IcExTBBf42*{; z=`wWKaux{P6NhVSJwR?oKfLcrg2*wNaKJ1PPJSVAn7I?SV}FIAwhmBL`3Mfu!H^`+ zhdVoy379V&m(u5MN!1D za3*G-7-yj_MOCbpqKH>l^nxcn9TGO}MUkttnJY;HyyP}9(%^Lx zw{qVb){<9YqqZ~lEs-L21ni=wXIL|N@kf~p77Lg+ak4OdSP`GS*Z|l3mGG>vGs_ z5ax671KF=C_@15)PLV$dZhW)w^^mDJ_G~8VyZn}sv2H@*cRnKXllox3nGe71r~%Ux zQdr`g7GCDQ5b`2i5tGP|()0`%fsuWm?}ZY1bJ@9EWhe1b=efogw-ZOdt)kl;m!aL` zYmmy~FN~6hH!4}U0`^KKz{`>*NJ&@2TY4UVRj>!}23&BBumX;rv>Z~M9)kdH7RY`H zW}eTkq}P6NMX##);ryaj^mIcYDp=FI{QL(UM$4P(oiAehO8%^ zrZsCKNw)`TqqdW}zKta(e=4TeoDqUTw|vO>ECwZBb6~&KdANVL1Ze+$(0%qE@YPV* zGIcUk?=OcQ!$8O!`~c3^%b@(TAKF=`gwic_k$YtsQ+GiTJzeezhuzfht=Hj@s~-<0 z6HM?URThr-*T=eilkivH4p6u?GM^PKU|bKdcrWIPkDC5M<3ge@<_ev1!VroDmGJC0 zu`sP69BOOMLTgPkxL4cbea5SBsHri&nq~=?YOT;A*%yevQ35w>cYr`)0h%rP8Cfj+ zgz|Ngk=CkY^uXE=JkCzXD{54+(rR0HlW+_Me_Vx$D-z-P7Ax>^mH@X6R;Z^Wn2r)~ zB~PZ^9<^@Uc7G;edVeQvHd7p~oYlk{v>f*Gy#QQ~kv@YDAHvCQL2P+x5_S$>h>wrV zBRt9o<>^GAI+i*J3G|@Jk%LV0Y73<3y&b*EdW`fgo`f%!CE$L_7%#4H!QVLpuyDa% zFxPklGuB>)`jAp&d_$EXviWJ_51PDdCj_|X!X}RT{pDyyaL==Hs8g&JsPaQ4xZ5tq zx^wL@yMGzZp4|?ac1jR$lnPy|X5-VIlGty041^BmLD+|pT;hBswCEKVx=n>vuYDu$=ae}}_O9dKow98S{o!g+ca8y;q0&*e^*oo-ZgmJ zkUiFMP{Wajb70(QcQmGU4Pt~RqIzRZ|aWooG>m zFdiPEcXp>SB%U2Y;?st~;mQKM{f7{)D)0b-C0&eT-YZIzw}6bFzv-9cb}!^5n50h{ zOH`>`rlMcSpaBUBNNHOI^UbY6VajT-*YrnM?-ihn78-E)=w!G!^#M3s1zcwR=>=ady;0t)r{)p*@p6%kCS}RapiAl`j*GAeKQx|Q1geg%@`UTA4A)>Z!ny3 z5TfH^!Ek#rgscgJQ;n72JJJ{Ca1w(|^xNq#$=_+YZ;h10f|<0)v1g2XLkBZ=TRt6d z*O)3l9zZVY_4ws$nrHI$ETQvB#Ov57>L#O&{2v5^Mo|O2_-2Odtt#Q@4ilKm^#u79 zF`y)-3iU(tph>RRt<_XZ)e6+I=RD|%zXF|k$KdUf{lFh5 zgB=+oyrfAC=WXzVwILQDBdUzod%cFjX>CiL7c?Y%F1ChBxp>K=N!EXpo8qdf-01*WU&9aWjyp z;|Kclo1DD^9|8v8p!8+f#uNKB84kNEphZJkWk#|Ejy{xp4nskmrVfPAPnP_XTJ;@HBn9V8x%(-}HaN{pa^p{(p=c9toHf zT;0H4UT#hl1|B5Mcx>``^*pY*!8OhTb5l~*a3*!Qx}6q%X~n#D(4nV`hLBR_65Nx| zX0WGi9Y1Ot@}Ft=GmZrrL1Xp;GgfIh&AlI>LLO0yAZ?#)C#omiV!yc*{LAO_KZA$g zzxf-1-w6Ch;5P!l5%`V3Zv=iL@Ed`@5&@h&-Y7${$h2!^8*AHw81{zHOty1|277p) zwpo8d>>u(0{uO-G>#Q;3gsX7p{Q%NSrGd=z4km|sZV?R^i#ZYX^UT81j<3X+`(0wr^Ktfl}$ZKZl|`QQ#_{bF8AE#m4BHpB5>mb zlMm{aEX(XBcH`Gh_BJDR_S2maoO1(@xKB17Cx+hk67GngP@^>?+V+x}xO5V8^v7AM zH9nH`kq_qe#)vW{9?Ovb+6{(69{h4l1vZ;=lvEv>6rGFy$B`f3a@C;_IH$odLCsKWW4eZG+ z!=sK({5!b+yr!j{%~`AGLq2_EL;1N?Q#xA?P?O^#=w(QoDfPO_49-nO#pXHi-FYFF zyRr_)t9fBn{xO&tIEfCM>!Zg0AfQwA@h!15fbA_zUFALAW#=Gbarb!c!(CP^Uh?=+ z$B2H?_^-xKD?^OFwrgY4F!nz^i_(*XmlnpEd^^Auwmfa1i`^I4Lc4KhgUCDIh$}+-Z4$z*8N>nSelo*zt zJn9(ZzoPr+`L#1eST?;Zf+{~o*_K>qdKaul0duZ1YTcFSv~dvZSf>K#LQ>IdtJ}am zvkE6veS;U}_n>XHDJ+Y80!hZ6*p9=&3nyNHRv!k=ybFazg3eIhF^D>T#4y`ezvf9? z&*pf*!$0L2Bxd$;zK%b``?|T3j)_bk=>c$`;aAy&rcHVUbG`NO{%Qi)^Fo1sJ_+AQ z62kVA!d~aiPJO3-nQrhXwjy@LjiD1KXdsW`*XS7cC8A#+f^N*lo@O&}jlUL% zTvviEiG^@|hZE$ePKH;5_u>2bAc)+=LD?T8&@#}0uA|j(@d+OeZGQrHmu5mh*mRI_ zEl1x}B+!-RI*irB#WefTEuN+DTdqSuqFMKAtx?w<_$fQCqIO0bGsKA;w_?hl6hK9> z3NRm!?Dah)h5Kt%u%9X)uHg_MRKSK3cVz(Grzm73Zml(Y87_F&1QW{3kO~usY6JFz z!zpF#>{<$)Cl3Q(^<_v{mIR&TFkGrn0h{lO;fii0x*)ZZIa+g-cWvkj_ky@F$5|@S zthK>x)X&0X{C+dbCr7wekwHA3VKUvuw;J{39EP~ydDu4J6zgvkzzHk`94|#-@9gW~ zSSJq)d)~s6p?Z)|egxMxwgB?FirSDhP_?GG2_8Xka|#@au)~FKS76UOYw@XaJ6v_v z5a*h|ge%l7w9llTUUq*Eb;94Aw{l|yw{nifsP**CggqSND~R`C++{lA$}`6EWG8cL z_YWkctAW36bHp27Zp0NkobZPSg7`q08(5X5L1p`M5acX_y7BK}pr#(|pGKkxq75ne zj`R$=FN<#(nL;O%58|?k9JH3&_Wkqek&>m1@G95kRjYku#YUu1b1zz%= z;!*2qMc2hd?fV|eG-(o&*ft)jQu~n6*_SAAYZO?fi{t)SHonr53SSmW!$6WANQU)7 zbW0xerj`IHA%rj9k%3POFEHjK@l)~ke8|RpxZY6>>Koh8nj~wu7p;Izh34ZOWqF{C z7K67p7l!BifbQ4B;Ad@yw#;8ZwdLFzwVpoZ{DHWTvz4x}^g|76G(e4(f}X~g@I8Ar zo@O`(S9At};JXcwcsm=sW3Iuhnkx|aYGglimK+ZCGR8`gx^TOI0N#6h*!5#9ywsL~ zRhCAODCz~=x5iNJ)enn87USdbFF~?*5me;xAn^k~4p}IMpT9eX5~7b$hh82SHT@qw zoQRV_TJ-4y2axb4FA&akhRjk!yk$xN_K_0A70c(sjn>z2$(4mIqxkXeW^qutbqpo$ zZi1Q#^^mZ4EhyEAM4+2q0COrEDxz%Q zBWpEWh$EqT%_S!JTN=+ne)6d0XW;P)LfULO-M;ueDm0P7ytUG}@r=3bg`C_&lM4Gde@SYIhon-kQTGXxyfr>tBk?0y9YL0Er4ZrQ$VxrE-Y`KfcJPT0H35& z%*BUAw7{#?%wv5+bk|82EGl$xI41Em4GPIuK0USAf)_dg#1D;&&^%AzJzo zY@d(|O1)#S%3%_J%GAPj)&%xk6#{FK6#Ahcj*j3oMz}@+-MTp$rajyaxp$sGg^mIC zvcCZW$u98ic?>+Ck`FDn~ho*S`)gCP#g7z51ybrhuCBH=@}5guDL3k&2);zI4|co>$O){tI|udxBdv&-It!{?q>X#~5(lg7fO_0QtgbF1=%!7iyEJ2XXdt980_4 zuFdD5@XS*bsHp_I1{7d|r#5^$BaCJIT=9`__dvd4BxkAA9j{%K0N*uz8UF_jWWT8^ zck+odB;T_slzp!{b!k;BIdx3|C*mS)<|bV~YFp=@Y5wP9S!Dl~8}wxd0l2McfR*RW!Gk=1cwQ<4XN6;6v5+J7S#6IUUh3hp zAL;N86~Gpcv#|DCINDongoaBFGji&l)Q80v{<1wI|NIPQMm-{Lf1kqau9^ZC#Y(tn z$si1f7ovy3wrGv!F4)YPgEcjz@ckW8us!fA%---Ge6qD+npFTIB_7hA-r>Be9c;pA zOV}@$?|-KAUmZ7|x=9RwP@tPxBIvqV8|pb!iC#=Q!5CDIVHC-8Nb2@s=nuI7uJx0! zvd;=^pG;w^G>~6oc9}*3t4`WBwMo`N_}j_M9?{T zQ|ay*Um5#Jd(g960BRoEIJ=d=xu(aUp6!9m-^^fy_8z8+LZNy?BH zVs~&WWK&spxvu}$(*2+L`2G1e0>2UXjlgdNek1T3f!_%H{}qAtVZl=@eK#1L5xr)z zbH{$u#1Cgoa?g@RD}u4b5H@8vb(M z!XKse=lxc3ImYh7>sV#(M>%b`6S%`gD~N+<4iK(2&k18mVPeavc=lu0%)c#{+ij+Z z!Qs~Z#^pwCrry#yro__qCeK784Cmxb`j5UF0risXtg9tNp5X_cw8=$ky@)uyygQOg zur%a7EKny0MFqKi52vwt1Ci{&XAZ1)F~KH3CMf-~J?=ko{8wYzOj{FQ$Y;H1I?8z_ z|BADxKANqPm228%K>vqtczWYguGW+Y>WpG8(_ib3BFv_viSJG@Lj`9kuPLUyWw)M? zQ--QYzWLY4C3}_=M^Om7GezMqdmdUc=UCz$!`!)|Y zpKZ@Kk^E>;LSL`zMg|Y#z*(4qZ4NwiQH@2%+6K~k0oC+e^E>p4f*thIEAM$WLwv;9 z4}aZW9t}Guu298cUh_6RdW;)~_OrXh8=N?n@SevdmIpPGvnJ;8J{?eD^5*_P&SAme zRjUDs!&>NDaTA>>B#vG>twgQU*P=zo`O(k>OFDgvJ}K5U|1a|eov@ z_vg_}XV5fSS~r_l*cQ*5-8q+*UM0lboiQJMn0y6-a%;im&K=15cnF&05t?*&D^gc@ zhLmMZA)vJqO@B0r`4Rt(+;6>-)g>v&zLMC_T6$@=*#y5orydZh%Qm>5xRN}swwJEb zkVZH2R-s#wMa(`8A!gr+Ikf0Un(4Vc#8~gDKq@2q;4evz!`g3F;!Tor`1q~c$VoR5 zE$Xp@)2vdsc|-wD=@Lu@e>(5b@>C+eHJ8xqIYO+K>gMEbzhGw6cWKnUI{wwWn{X)8 zr0WBZ+*h1MPjR}962kmJH1822R!X4DcUCiIjr}xdbtZFjtT^Z`ZUay20a)kfj`<(8 zz_=#~uxNS@2)oPUj?+^xF;dsU;;{ir7cycN-Q7#8XGT+XN=dxHtUjVS_%b{D)Aj$8 zllHIZ`QM)R%aCX7NMuMQmN0W@E{3s}hM>K}3`&b4k<_^(NZPuJ+3V859MYczU#Gr? zu`A9)$7cZnho){ZterO&`xlRcS_^p)oOKe;CN@IR%Iy&F{TVE&DuWrDop3)oa4&7&6FdAei-lwtyWE@HPDaWWe;)wB#d5@MPoCE0?eav&*glqId zMq)BWXr=Hq)G<{OWiOr$NiSKLP7c7!9L6N1d0Eo{`0_5^)yJd5PcDMS7&J=oZ4 z3>)nkG&QP&mYQ*k9=AY&)@eS>>tpA0{gW@4ZSTw*b=>i%j9q>cZ`8-Xp8c#?np`kp z36(xWhBV>Ee^dqXTfY#@8FqzNFxG` z7IQCXr?NE9kKTu;SjJ*i%&{bdW4m}wO}g~rqQ`WAtruz@dIZAEM4WC@3FTw5;q3+u z{4lK!?nb>wyK4QRbyXO|)c-&iQcj_d=l3wa%qi5R-vD(>6X7(U0BrtT55$pruz9}+ z7U}rI8qF_op}Z9?I*Y;6oDIyqfJl1w)pb;qZ648DziHHZ!r@y8*JdYBFI2M`sgD(i z)BX~5WQL%)d-uVK6q}$q9s?n8g?sS?(9y8|vFE~uy_80nx?_-LqtH);mM zG2>NeMdJ*}u3HJpMGL(TtvnJ1rF~`$-YTQ;yX(PiXAzvAbO@3cj?}7I zK69k+!Fa6mbP!IpV_d(l5+c;pAY;!69yiuuJ6Q`{XYv5P^bMdPRX*nVyEJmo675mv zk15_dMBd})F%MO|fLCvWs}@biUt$Vi_NPvmGI=DYa|sXId&*-;3kP6K#2|pyglnQs zP`Co2>qDDqWs_hgv*J0E>??;BpEd$MaZ~)*cRsG0xf=IxGr=G0j)3#D2IzLM8`0O} zv3=%FcrCvkc{La_I~x0Wmwn8+M(bKftp{w_40-wG2bxbp0=5vjP(CjXDo`DKjCu>Q z(}l3=3P)T%vS(b|mk%G@9|BGEQlk#*U?>`aqlCWS0Ot|K{? zpDHP=y+axYyDo$clm>KkE8$or0UULA44n4}XX+X{sq8tjyxb@0+{-bQqsIOpI!wsX zjjLoWMsCW-ERqb}h67M=*9Jdst%Z5B8K`zR15&JGpqtr* zRAq;e<-S=^{Vfa@%O63NMO-A5e;qy2n+lKO^O0lAF?7@M4eGSOpl5guNnu$gPwNKH z*oRH{+DngG=6~wme~kTZpUe&)aU=O>Phh?)&4S`12G}Ue6Ypr^;0P->klp+U1Q*Z8 zjjB?3j+h}hcZGlxUolWyJ^^)15nnzhhxZuMSgT1FT7rU^JVJ_@4qdcxl036AXd6BK5yUv zM3J`!_figVV;IMRAzETuAk}XuNzN>=P^ok86B3|yn7@nnf?=rySYfv+QZW`22~ zy}N)Jx?;yWu;V0q<@%k3zaBqtuO{K*2MQf40ekjY2mY+w49+yARQ^-NEQtm6!n z?BoqT+{5IrW`Te31GuOf1Cd@wVba@W@ZGu$)EAD!FDzzaxx8!8{8bU>UwR4eCk!He zmkl7T+JJ`3s_5O?R)0$#fzT-}?(|?g>SGTZO)x5f1J)xsJ`?W2QJw+rSS*U$r>fxE zrXEmPy$HXW`5lxo||6Y~|U!-OLcT<=}-34Z%0Ypc(elP*BD*@XMuf zm$@+Z6bJmCt%Q-ZIF4A4VPCyA(`B`gr*yD&)HdW#8ve)_-FU%FO>KyqtTKW6{H>CX zZ;)l2v<9emxyKl@`(AMUehK8=cnX_rJh9^}KHQ^W4YSc~6|1>Hw1JrI`^#5BEkR&suE<^iNWG;>lwoJ zHFYUBoV@VVnPYl2|Ci5;|65*$&pJ$DJuEV^YB^!LG`7*~i(?jhRf0IV#H^8LFLi+u z=LIo-8IzIsE;cjezPOnGf6GzL@Ey*%qU=Vg9#t{%^03Ug5~<+Mq@lI7C1b!p%8-d>l{C^dJq>n~M z9;=kh+=#90v!ADMqZ2bLL0EdDkQFQJDo{ba?)G$}DaM`AT%b-?j#; zzO=>;_Vi5Jh>+&uks7B@KCB$Ur&`w8YApZrs5ZgzVc?QkA5_dsL^=vvdsY-!fXFh?6 zyP^hT;}r1Xd*ku5BzcH2nu{DZgo662bMRJpDKZc%tPmm28&4YFvBdlJ%|R*XFIgJ9#ue)tl03!WqfgU-eE z@a^Du+-Wa?JA&UL_PZCf!0vM1Y)3!B>F^0oh2=0;^gtK4-`0wwhGhPbum4{?gXBaD z6En69U#6tvxE5>kt?L&^9PT9cU$ z#jlO<3guYf8DB$JpNl}^0YNPHVKZJQIC9^tON7V{Vcij$tFUz2ES}>4ehSsPU_a7ZdMyJVK@yPcJBbW1TWm(HV^O9pMm+A9}r(+ z1Eoi2BNx9IG}EmHNh##Oy*o$Y@I)Q7`uJSBZLKz1^t2EO&^^fBDIKn;yTRbLLuf_# z4o0PDkly*AjjEA#=hYrAB_6$+!0`@PW12Q2=MVF6rg^}SyH1F6gH9v8YQIx&go7F3 zw+5h)&;Tv$*@rYiBKt5CNc(FHzLio{>YsyM;cy2(2^g8oV(MJ&xpU5 z(RzzkTNI%ER_~d)A_wRUGZlL3GXY9%nl(|OE;Q&@Y9} zW0K&g-b#o&HB!@qIt6E+?m`Rc%ha}MYeyY(nm*?RIY-EaId^&mP#2W(2VEM=ufGar zo8}^~no?%-$9FV&NsI1j;v&)XT*&eq=>gWi6doM83WdqZV6tKYu6fW367n-4@U%8? zS!EzrqK+e7079cm(8cBs2)j88?|m>44_E5oP*NT9Z@&(F9wl%y!W&J^ilDZLw2d17 zqNAFmU+)vzQCMoE4vRQ`p)ngb;B0UXOhcO&VRT@@Rl2PH2tB^WA31EF0n2qhqh;^K zP;ym1YBN{}6I^EEd!8z|#JC#9oqG(i#1uSJ)(J0235D^kda&q-8qi={@B)5EX9QC>_ z>B}6VV6zC#T((1fC?DECdg86~B=GAsMi3Dp3TWO$Xi3oovzx~uyObXfIuT&GFq1i1 z9!C3%r!ZOaH&Fhy!zkJ0KI(4C1^LQ5BYpjDKzmyO2=?!QC&q74@9ah3G14Q$^Qs7( zDM>-?LBou}_HoRQvd26{2{sqUi;sHU{9w5l@v%FP;y>KN$O$OGx%ZOzqslV;J$VrJ z$Sef4)G*j%B8xLGSz?dp`nb_P5vs@MAlFnIbePRUr=uuT&{4r0iAZ3KGtVNV6bQ%s zn&H)17Oa!!qKk(D(XPWWBmJCKK~0`4s>zR_?I)1bq@|C@6vzEst?|i!$eyTrsmpo% z_6@1B!JYPaxDKs6lny>h4w&j*j$dfqgthNoV9!E1%-Kv}<$X$6Dc2hNaX$fFsnj?o{o6q(AGljy5!&r#ma zLsSX>Y}(eZkn&5+Bm=)Aj?2^v6OB-(U$!>B^|+m*l7E5cKnXF2yv6 zMYMZ&25h*Ygezu0g|SanU^ZR^XN28BWy3M(t{1?&mssMjaU8tDB_BixG4xQTi)Wd; zja+y|f*hA*%FB5^g{KxNK=KJZ;u?QXVS60?@XPJz>mut}CTVH}tPkVeFdxHs4_rs` zUqyg4bV6sATY*xeDDH|d#%aC>L2CyEZy)M{LU%9hTQCXtW{=bf61Bny#h!xWeQosI zVJubWaK?1?gA8^+v=KK6G`L#BVeIw%-lkRelK!y0_Rq5O^LW3{I%B8anXCc#TZB$S zKlSRtIfh>s(37_|pcLHzOw|b(GyEP-J6#0FudhK}Jso;gEiEF8bpYx2!OqYC$1PI8Hyc`@s3`_K{RsGeLKwoQ3Zk;Txl~bKK9NvR&HXHV zjV<|5cGS9PG<5&Gr?KUtY0#!Fvf&lY7zAj7?fKJyT5q6+6i=k#QV!XAvvJ?Gl~^Ew z!Tlrs%IbRsfN-0Rl4mN@!df0gu~*XHLibPlKYe*@6jd9_y=p8z(x*$4S#$pZ6J;XE z*gA(Y>m(MB_#|#a-?IS_b27t~rLLe^tcTvJPiLlGQlPe;T*hsZU+~N4m7lczJ7bKF z@07Z?_s!y64scrnZ;-Ap_Vf5%Uy)%K#?bB2kC}OkSZJE+c(}|t4rPZjkZ*|}Ej9Nl z@45Z~B3846RWxz$sBM6MhrWOJnzIUhhRat?F`XYs3>%ak+``f9x~-cmL19Yjs3q3PnI>!=O2W5|P- z#*Eqz`|tkuXs@$xr;T>>2%2p?l+IqBw3L|c6Gz?5YiD|bPa~y;51ETaYpIM|TSx83 zA1z%!?@RZ_rn^2Au*35cxU&}Za$6pr;ixr~n;qY?*vK%qbn0e-1T(dO#aw>pV)FEc zOH{#rJ?6q-Jqq=ShwSJ&G}j`I-WJ|Ogm2$stn!%sr}%k(()wSG3sY8d3S#QW>gCrc zndOqSWZiAvK(HbqV}6X_yA?@H*WS(Ds5_NcY7$JhZWKe0J$sP)4P}_QvJjkSSb_O% zH^y;>IWM~Z49CBgG=1eg<1eiRloG;6etqXn_YTfQn<5j@si8&8-Lyg8^}uJ8h}}KP zYC#Au^89t`iIOb(7QGp6dN;vVhIF9Pt{8fb`)LT}_`nYBZA7^3h5b8uolT`q_j#V|E6sMg2+C5v!(P79nG zF9T=f4uJlNM0ia*qB|-_sR5TQ+_wh(f9YK0;4VOgzd`7+)mSXMl8sxvlwr#45_Br* z8(N}NfF|3%K@Z$Spl_Kl?jE-et0w5-Z^8yR=&B5UZcXBpdFD8C5edmB>*=Lu`bb5` zeZ;}8MlSo-5^~?iS!DFsEUx~;UBAR<{-5Q4`whnZ$nfAwrst??ebpK@^hQwc0; z`x!)EY=q^zmcX$i!nkPBWb7p|AL}&RVhw}FkzO{rFd!m@ug6Tp`@K}~y9uj7!|V}L znKYltUpbL(Ydb`}Oc!80*}b%InGI#mQXq~j66J8u_p^SqTbZ_Mk3R15&pb#cAZ~hr z2(?70lv(V(5oYX}g6F4sVuj=rNf|4lL<$X5iXx&kQKVEFG$_^2^SiG5`aRd@ zxiCJ|$D1|aH|;u3$i9kQ zYd64_7xzG__duWjN+49!@o??u8bqxe1RFafpg3&?O4>!yy<1EuJV;%X;qJ$e*0K0Y z+So$dbSkC%5Mrs!uc_X&GemT%9xffQ4$>BqjteCE` z1@wSbhp4qH#DwYTXZ{kK<0$tqS~@xv`IL-?pj|a^apGvw`(Bysm8${qg_fivRf=eT z|Aw_1#Ngh8YY-_X0HaNdaGu%?l(%XVd+c}~nxOCz%PQwXjh{7<+C77`BqBy3c_~|9`RI&3li|Ms4Li{G6GVb90&aq%OE{v9e#%wqYH;4 z(IHfdW>31vt~q;!?$ohoW6F=?4{gsNwi|tCq3yZ(G>>MQ@qWF0r)7u6XwRD`#xs z_ywDemVyOkx!CS_7)VqNAUScNU@sVn)lKDDWiX`^JhbV$AL3NI@mPV(3h92gBhPsj z{I=U=B5ue4rsb9qYcqKO`nvutUhkW(4T-sJD;w}yD_q?#+^_OtrIWc&_V7nPjNkFW*tG@+F>X$zZD%}!f=h#ZJg@494B5*hGF|C zV&W)ABIX;wtih*oyw4;!t}lcH@l5=2ttKvUU5SRiPC}9)YuG{D0*X6nLBHj02d#l3 zZzhFNzEg?@>^Y9#u26s)qbMvPWrM;Ujt7%kO#7f4T#ZR|6!k+NuI9E~& zk2YL_rw7>f#j=Qk_0MEUY}+14T5}D=`_}2N+iV6)srlF~ei&%*2b0SaL?ls1irmw5 zfu8XQe?2z{`&t}hWh~37sc@~|{EthB6*8A+GHTEAP`>py+|G-JW(7%LY&1|$Z!lA; zynsGSM>1zJ`r<3NG^5>%nvj0>N4Da)1G={CHD&{E!64}eFraKPdC;s#%#Ns$!$H%? zmK0z3EuNPW&O=XB$yG378?#si;X<%=^9RwvZoEPF z4YFedNFg_XIqg0|G^zSyzy04{q($)q%`a0c8%2n!-2xK##}YdIIwXEkg2KC!xFtp& z(aQ~R4HE*V&X|*TBV!@K;Tp232u9;{?&66@2JD?t_*2FS6V!ga_)QfQ+w|9UpPk}^~AmViPDoo`dSY5pZLSQ(Q zt&W2cvwgs+xE(+99SnTKcAO}dk11{k;%}M6x|=O#ubqxU*IT{N9V0Pzv;|936~m}a z3*<$HA!qpCEn9wS4vUyLhr83_pm3Y10rTo(7_uatxS;YanD4bFO5uBev+)RC*4zY< z#YSY_gyCeXWS>vp2SYL>b_TgSRhNv;TMkP<%Hjgyc~7~;{7!S=D_RK_GcI}~w^zjLEg$+|1cpsgw{}k@up%jWc!0{)uY`BY*J!^)hf zL=xAOfzR`Rn_7o3Z?`z!^!g^=_pleMmj}QDXNEk=SwdE2OeLk8JxJ|2Es|uFkBd8k zQT_RPCS|$@<5}^D8SGIR+y-ZB)j#gd&8IOsp(f0DfJ0_XMlNz|kD2ieeZQ8+%! z5EM6tL0R4xI2<<@lDD|wrR%ri2)S@j5!sLnTv^hyJsAhR_{`caUdXhD7t!u{Miky7 z_aE^;xwC5(Ct0tJ`e5jeK4h$dt(n8g)SP%=t&PFjnh$Fp$V1i4Jh;+Y3+0hMV1DQm z)*HPFIXT4Bd%p}5)_VH!oLq+6*%oj4kN6SP4BlZ~I&u&dGX4~6x>y3X3~L0-QyvhR z_8u8EEy5i>Mqsr=ns^0`A_q%rKxNcod}xP=EfCA2eD1gNTZRPndk*^DHvK0rCht?U z2r_#isO~*SPgXNRlX*s%Q(lX1q(rhw8HFfx_d-0&Z8&JHiHE~|xhr3^O5&*PYnab= z86ujr_FMk?lQ92}#V_Ak#?{?EU%33{L+Zgk8Rp68TlD=a8|n7ZrmTwSDEoL!3>q4* zgY#`NkotB5_I`y26>`r&aQn;He#>9~M?n3!dEbSx?UfAS4Gqv2D18tL>Qx$U>{!v`N%Kc*C7X!Z-_{G3427WQ{i-BJZ{4oYzjHt7< z+F8tZH&GQ;tgIF}wup%|Chg+u+wA&FY&w4oMgL2wmR0kcqVuWkl?v>9>C3F$kq=CQ z&vvSm-rjFJc>mG-u_q&IjV-3ER9LhpecWdrz3NFEwfX*X`kf}l#Ba)EQ@@zwyhA5|J9PmO6EB65S7*@E zuKo0UaRHC)?#`LuuX1U?dW`=0N2Xl{^Z=SVcoNExLMxH$F4cIJ&>T;_vjm=0O@)jnLHI|{ReZxz9!_^2hR(H)Br;rwNN%Pf z@W^UB;xh-!oUlQiUsafBz0<-EUHkuSI{lCH@s{dk5p>T>Y5ev?4=DL~kX`b9`!jmZ>t8XQa0f_gszHf;OBY;w9Q zI`n*)u=w~*;gEU})m?8w9kbjnY`QYxZ?UsvGbgy~yo~8x{yWf{=#6m0Oq$f&K88;# z`ue6kkI;)Q!<++4p)6<{C~j|sJqlldK79|G8w|-zp#nL0$Q$62HriIJ z&$?WZU~;K4M#Zs*jh1R+KR0oh??XO`7Ke+dv`5AwdD(D&&1VSL*;-&n;=h*3-j37c)C>sn)PlY77Ss))lC&599E!(3f)Xo&uq42-+0EY zAw#rMeLt@t-Rh_8>5dluCML8Rtx#f(W?m@9xrQA$Y!M6I_Aeo|xe%VqX+!1sec13- zJl+;D2$Y3Wfot>wFV8Q=HcgL_T|9*XlM$R4rBBp$(?p`74bJBrfvmU+FuNm7gv%I0 zisT4aYbVak2tir-Z_)Df2ka0#d%9q0pnxyE`KS7MRKQz{J<2cXgn@(bv}-xgu(1c0 zZP^di^QVF5ZXWF0@Ce2qkA_O))A)7yHu$)cA?v@zg4yY9$h>nJt9aENO}i!!6B84l z(bpGLJhDKBn3FS`SCNwwoyfvt7UZL?0Xbv&89T0vMkk89kpIOe?6q)z+PLtZAW+@x zr}}vaeq`;raU{)=PDFFp?*`5XnyhPUhmMv{xOku#9C*+O@e2-t;QOry>-oy(#{Lxlmm0ruJooPxtx%rBCRkf{hE~!~=7)$3YkdW?kO$XvE^V{)%dCFS)kBwU zTm#&o{qRosl&wjc0w88YW!w{&{*F%wc zV(brXeO%af3r}67giDH^FpaafvZLxIV4ccpynDl4TqO#|Dr^*(BxCYHqZ1;AoWL97 zhO_=D79z`6+XOB57V(#;<=VX+wCqpM)osqt%vLV0u*y!p!qYFb6q$Y*Kzn#kWD>u9 zM7@i1;M|3o>;X=22bl(o}8vBVnhs zIbVOrlRq8j>$K5u>qFla?Y0G~@ZGGqzBoq@Atrb? z@Y>SwzUnn-Q>8YB!pB%~-ZxB=FQcGJmMQqGLr;=OH#!d(r}Im#Dwx)q^{ZN$m}8}Y=0l290X70j8( zu<1w!UOLp8O}zDuR#1NoEm62rDH{wd zCy^JYBuK%dN08=p9^UoQ`hULB?e zm?sOAs*L{h941v4V7uVJOn&IitHSxA8dMY4o=LHcKn_P8u(nqVQnS@VZwo&%*Z1E; z2foO_%10OQr>Yn{-tiG~-SdM{aGk;w**!xo*SIi>e;<^*9LVICG+|qv3F3?*`D0z+ z(18{-zGx~go^unAxGN5?3bE^^3-3wmVwb#0O}xgL6UU&GbwYguc04C`ccoXtCxzy_}0 z&9+QWVMaUdq+D}lM9=2G<0mUb+Hq78O#YY_`2Uq!bw1C+_?sn1RmP2bw0E}1^|=+B z)w&wTzqSIQ{A_TA5AbDZIW+!A!-LAxuyv<4xVNQ&V;G1LA@FCAuw)*;6nyW%n;`&U!r8#N#7fVAEEj#m z3zpR5t*eb;iDeor_uCHx1F~Ua>38_%x*7(T+Cci!G)#vDpoyzA*(*nrn4R2hj82aS zJx=POu)FKBaD?jvQB8M#)d||Z3vC6UBftjYl1q*{| z?v!Mt*|{8UWnO`Y&$ZxYM+u7dJ&URxcVd}Yp%C#g57?07;JWrG?B3r3OL{JY+5EA% zAf<+p|5`_-6{Uy*E~!&f+Z058;mi5ozk2iIEY}Fmc)a7Uy8nWE^6n;%Q`q{SmYIM1 z2LCt;oSir(Gkkb5ZYM=bz0z#X>?_D7OCDdcZ)0@|l9}yoQ(4s-Dg5%Axf#S*sJe@;s4nMEGM&%cbid5DKr`63_FkBsK66-u+06WNvywd3@Vu`f!9llq3fZ?*r368vA6UWcrI-~HcYRF`neCW!VWpC@NGXk zEi|9eRy3e%__F^o8_?3+SYcqrQl`kS0@-+HFf4WddMHa_pi#h z?ynIZiFv||njDVjx>Q1$$_aQNVSvrdwb;>E0;}44Lsm;V3|Zm;gKb}9(`E$ql>Q|-ceToe%zuOV}DLG-K$5+^~LmFuP>*4Ix02y4p{T6PC zZpWQEd(hb1C+WuPlLa9$b$?p^?GL_x^-Nclx}BF!nPBP33M%cW2h*eH!*u0$P>b9p zsMEDl^i;7ZCM&U$ZMi3bHn<#PkVK8BJ=>FaVi?u$IrLv)`m^_QZ+x}&oMFSixwcU- z_2etT$<%Ou?xiPu?GtANPRT){2~(me(2b)6-a^qm&*}Wczz$AsfpWj~zdwtwKjrVG zeYJ-4QoG^~F@Er#B>vgB4Bk17VRo%t`TrW;`%|x9&-`NG7X!Z-_{G3427WQ{i-G^Y zFp%bak<+y~hqra`0Aa$D^TH11wSYT&D{tG#t^XBzz`yzqx6?oJloy>5oe&IQieieH zc_r=i+5BM9+e!W3NBXbc`B&V3xkt`Y-^rF^udOCp%|tO}Z<5f9mn_a)I0u>5cGKp| zKm14bj{Ku;?K917@18o$8yK=fV7#$GkaRVcua}!=wli(cCAJ0MhL;OHyH}OQ}Eq9TlPjhP}!<}LaLJD4d@R3x9SFNW5@GnuPhfiC>^Cu zGJ5HOLB}Z{4>jtvS`vNFG>zW)>KT1am&;c6D4>%Q)KPh@H{RHn%Oi2D1xXxl1P8on zRQ)N1y=^vtd0hTbxL)S`zttYG9_|Tq&N?$4tpen^Gzz`HJBF=WIDy^oU5`#5Z9!%S zz9Rq7i+Dl(Ab4N$8DC2Gf|1v*ftB%Hu=ab053ei6W{XmB(sD(#c9azTaNL@It1p&` zj|>%Hv5n1~yB-%6)?&x$naK3(P<*>^H}vQv!J>#Xcw?nTriIl5@8luy7Tkl0?uD>Z zOa!*8)9|R-Ghum_0hF^*h!d1SCtlel(B15BxBtykPTb^6e~GR6f9u18=EmBri=RrF z9Qe#O#}5D%*=;b7dlZik*MyhDOo&TDU#ydlUtscsLBv_30LFdj+ncmeifr3W5z|5~ zve-}`b~_nC@!mDq$}Ei)uj!_Yj$Ib)J8U3)qjgv4?Vm0jL`w*aeiZ#JZK~Tmge$X} zOQpZ7WK-Mk;$gPyK1;;Dr(wfO5k9m>S+ zYQmW+kT#bi6%*1nL zEc({e(%80T5on>c9G-nb5|w|7rKKMa77j}e_^EF)NL}AjtFlY|!0v@O2n$01Zc^}8p$5m~tj1s472y%525uYa5XNad zr1`p{_e2quX|1ajkgzNqUL)&%>EcjYSin+b ztjOUc-)c9|eSTN7hnz%sde^hMcVe*qyBLts%!aF3 zI&f6p6_0!>z|O0}ab&m;zFj{Wr@XWW-_NP=pd$?L{#?a~ABYxOyw>Dy*rm}gonfv= zKCiPWQRF85j{f@U3Nrz#vFyybEL0=5=42^bHk?LVlZT<_7FSVj)JeQXU5KC8+o6N{ z>PV=l%kG`6!x;GaEl ze7=H}xGIU>9Sz5f6~N{>h7&qZp>5~@5*;`XO82&bc<>LnI({!sG0J16=6MU}tWn_$ zM9Tf@zmkcQ_#^cwN@Hgxv*T_fd)aOXS`8-XeSACGd!QaaRzHS`@mzGO;T|4R{|I)@ z-UoAc%0pxNU2JKhiboV?v(mD!`<7sKfz$WL;AHg>ezd#+%fQdzHC%%XHq?X5U$XGi z!ZF}lUVyFkq~Tq%m3W!WCL|Ghh@LjXLEt?ii<`48y(NW7@$C~-9CHFU*3c-t9JK zkH6b(JYvcpP&-Z}v^qmOACpGnMU_}`NEkei+yMS5J~*r09_{YPz~`@g0p;TBa7%s% z-qbxBn+)UN<{8)Vj$&WzSl!N!VYy6LS}bG7o@Zu$-bTOB`XpR9LW>tTXTHtK`-kj` z?xgVq!P@*%PYE6!IL7vhs#L$Y|K~dV-FVo=Zmc!N z3x~>SBa0p>^y2QomP;MtOdf9{@ zZtTQwwU?mSO6-o}82=~dzxTg<@gZL4M=8;+ zOP8r}^QGvTSB8{sw}6tqbAY*QaRK$@?!(hKA94P(gZL84M9*$#qZQjz(IO91R_Rhb zwJ_kDQ1*S}PswAyhwG1CJ19`%&ajjfnyUv=cg`=MQ|_Fg8#7+flVW=)v-%JwY-b9} zj!edL2YL64-zzus_%8m!o;XT} zR%O??3`3T0+gPe0hw)o1qW5JCXB6jNWxWojBg@<8u+)J2SY3TRI(s>uwi`cLIDD5m zf3kZp_ip;kevRk9hv&~;%UiTnU@uCgd(KTnqi-bRs{Jw8Cq@c=IOazmnZ1KumHQG| zn^&Qmk@Ju?*B;Tm%}j6n6OmR&1Xn4v@~6haKa2JE$`8wUz#ro~iMA{hpt|qHc#>>6 z&eEijncX6KGpu00Io2UZwa3V%$Q7L&D#3oXUr2pZzWY=9)9>Nv@3rI?>D*4A43Xc7 z3(TOI+<&aU{?B?o zdEaicM=h0a`lMFWx^NTC1ni(K#pEcv2zkNAjj=+NEsH2y`HS?>A+k)*veT5T#$CbC zFN^ynfBnyQ|5g9byfei1=on{S;A?S#_+(|l0i|f(=g?qV1F0~(CdUAN&S)&;aH540 zfCWtag1*>g@A{=b{Z)8>{mw52elhTifnN;#V&E48zZm$%!2kUWwA&k4=d4t+3%Wgr zmy)8x@7Xklw?yo|ZAD%4Ut-VsyBRd8B3xPBRKbO=gCZ*>E*0ABAWDwE&sT71>X$9~ z@5cMv@88ioXI&t>&(1nULpV8|qH{A}GUE%)Sbak!Mi90|bjHE^Kc*G~HheIGQ4r1g#yv67fThCNm!jn<7|vpfTo2m6s>2OUZ1I~iD3 zn%tKo>656$V3Xkc%XfW|TyOrZ{d#!ZXHKQEjnMQ>4SoJn2fHnJ9a<(Xj+V{YgVK&% z$77#P2Jub*3@?V_`-Yaqjw7% zt$l)Tt}_C&i{kLw&2!MLh;JpPtGb}hSy9Qa~Tyn6td^0}}5JXIvz$VpH#`5g}Y zSd4E!nhyhuU*X68m(la z$=#x@2AV%L=3coRYctVcg>akb1as!dHau$EXShhsB7t2?Nl%Up3FzvE3jIJ4{#ZqIrKRqh&Oie@)%czgr>IJFEybG!)F)+V8+ zN-=*?H#}BNcN9$>1#`OQLorZRO>hrH!5Al`|11Outkh)4je*^HaA1z zu7%+5=npyx5)idF5{+?7r8hD*KgECC0wdAutYb`b^f%H599o(o9xpMQj^C|8*o^VUqX`bFVSt#=H)@Ln{l z&66Fhz62*XnSy$}5?HosKtZ_~EU(dmY5AdGd42?`ihTpqtoYzEJpugmmVlrCH>AHM z1?zUahRjvxLF#1;?u}i7e5$XaDT5QB;i-VknFmDShCgYzHI1kj1;fUf#VBQ!E%UM| zU8MD1T3Gm1muERYr(f#^D9xGkpfiYPU^bAp=dVKKojrWKo&m|7!=Z&e2?qm!!c{4zVDa1;M)>Z+@ z4CB!-3Qbg$27bs)2z{81Q%2v%BjUs$dqOAvz)gWSAI6i7TW>(M*d;9MqKzMU96=B0 zfoPa^8+%_=jcToYu$xs0v|Z(2NKV;#=yNu1^PjbQ{lWuyfyd!FzGX@|2o`%J`B%ysuK0fos z6MesXj5YVuKzie(;e?(vxqPA#JS1ko(soHO2v^1~wubnYZ!3Cs-xOakeu_UEDw3ky zWu&>I6c%|6!@(n;;5^Iku#QnAP1{6pGA;Ta+fhSo49y^^@~4F|sNn*#SV zcY%@3Yq&8<7p^GZ#plhg;gyDtaL=h0=oM4RjRhl!UW^;4>V3cxfnp%xdl$S;D3epu zM?zWm0dU`t1FzrP;avwTnF)7N_*9;1zvf<6H-|Gi`hc)yzz$~a)bHrkRzpZNUIdPV zJfYHm4=D9)fnF7Hc%5{M8!oY^z8&IYaKGK-jYlh=LZX%I&njuB&51Ngdxu- zkP+S!$+#*tvToHWaK6YTF&p;7ZnG53PFI8`nPqsqR3z4k-j7WenWODVYv^f{2M9Zo zGyAo6lcOYW<2QXLe`MTVYK^V~8>R68Nj&G{+wzBT)sX$rZD>vQSl2=J%iGw=zAqQ# z3kTAZ>OmZ>#u2(zhcw<$0HdU*cxSW;tb0a*f=?tIkGF?-J$1N#C>O5FxR4E3PJwks z8FIMS&T_1#uzTbT*yC+pZ1hGIcGs60+Pis(sI2R1zs63d8xVy}_ry&VuwLFTCFQ0~_fZ#mv3nPea-TW>4v4 z_UXe4rn!6~{jNz}G(4$RSVt`twh5%DPqWrgB|c6f`Ns$N5$BzMS}&NAAI^PdGF7-? z)*kBk#JP0u#V&gD_${ny_ET2-rXy=Wk7Vj5l(Xl@NPrVZkKmbRL4g5Iu27Y~0 z0Z(tk06Pn-!k@Oc)V0pCJ#f*2-y~cr%(yc|B#>(mJ(v(fpF4b&emwaqRUKkR`7+1Y z$>C3Nu2wF5HSq<%WMdqEe<=G(>@a&IErQie>7jGhfr877fkTi0@cFK#qj_+d_* zaLC>90^|BO!YRqFOm*W*ls<1TzI1O3igEB@*KD87G~S)h_+*(e7N7Q0-=;+htQur^ zA9xY`y*dL07Y{Dx+iS*f#l_3GNA#t6@5W@kI!l8f{CrnbVfT zU6pPjNWF%HIU&-*tdU-#D5ryT$L@8^bcHx3I{PD&wRj7wt+12z2(e)t_r4ZsX|xDZ z_SJEfI9E6kg>P-Mr4nsB7j@dW2UJ__80%~!lGtO@&_i3_*x_VhS8!|iKfm*@JRjO& zX}9kDR6#+t6m_+A`tFTnyHgFA{5ck) zb1Rl}FRBSSu3bm|Hg5k&J!@y{aqR~074Evdm7ba>$ENuYWQRm=r+1w46fJkpqnOYZ zS|$BF{qcM)eKX=c{eEE-Wt$!=IJI`3%}dGuEcSoY&z!f#&U^ZHVcYaWbVVh@w(C!1 zD4WSa`yr|wVRnh!58~)ywv;C4`{wStD zQ!bCU-m=Wikb9k;Ew~{iD>A>PE=rjdC$L?(og3`2hUa-)Q=q*T3&q9+3$L|X2yoOS z?(zNi{?hUA&*1&NvR8f6tPQ8M+J0@^!7WGoxl?Wpv0E`m+QzHK$2yWzZ_DcEao@a< z;>~x=w+qJ?)$WX@fXF-lSSBzM1-O+w1Z(j7khR#d$RSF?U$qR(?loI=^=Qa-P4=HQRaj zkN&hh=Vx&Jt7ik%$8z2mYx3r<3>55?vlGe5B~xTa8Fe5eRTMAB2&|o!_RAjouipDd z?tfP(HG8vHXtgFv$0mNazs;HiBkL3|dkf#R63)fyEZ(HH!NLxgNum{Nf+^W!icI1> zSvF1g2`f9;08RJC?Bz-?dbxHnmA49s%nGCVS2a_9+BWz{aQ{~3q~{~6@Plr)Mbvyx;3XzhU|_$AS$ZoVS7EylHMP_>R{{2s%{P^Iyk^i!xk# zC}k5*wtlfC7CwB8C(P`?=6iCWsfs3XOSojvYY#H*bqb_&0-&dHE6CKX!or(EcEyhE)yV2J_vdtEe4 zxhX+bQiq`=SBW%xOd@j^DHCU3PhwS|MPBUDBtc)w>|+Z)8zPN3 zBfQ632G(ATgEg+HAURBvs8(Et+mU6^Fh2_n4^M_qEn?7Okc^XNUq;EhPvBXblCegC zCKAnOY57(|%CJ6&YO1|L@!Els+Mgqg|5CVaj= za(E)?4Ieh!fXpf%JX#}xUB5g7IeF}Xi78Ixc9b!B(;Elp!b)MDo*Yq&ng~ZI?*74~x(NSpBxjF%pU0rR)vK(BA?i^sie&BnoKxRw*D>kvbEtr5(e!emj>NQ<9p zTdGrz+scZW3U_SV#AJ!aVsk+bXdF-@;p_+k4TKoX34{-$bI@7dDSU2LFnl#qC9mqn z67I)TNZpqT7r9-)GxUQXF$;*4YQfCM$9R%pH*9;TN#_{y8PQZuc0teXjwyr^k?}7w*Kz z)ER6?KR~fED?m+lIH_)E0YU5*sAK!$nZ!k7pGB%LX~-eatU3+r<*KmGg#}PIF&73d z-T+PGMv}!d{0MkXAT22Zvfa{{EXdU(dA)bxv_mkCKb^qDl@I?Z{}X@vkD~VHgjL~8 zz?XYhfy&xg_#!R_PAkO7H?46bPniaEya!1t0dy#gBcsG75x1j>u)yRxy5q}WU*%Ge zzQHB=%aqB;1KRNE+G;pg91CT$FJP@{mto3)$z+tZ2l3fFl6*g34C{t?Lk;}{H*4%e zLB0ia)3;Im=6}oi1?E<7IP_NS2;^Dk4+=e&r1(%Td_MIFznFIr@Rk8YP0R?cjb8y4 zQqGWXngqT#p2L!=AYc=(qlaJB@Q~VQ@LFF9%chKn6fPe}xbE!BB{UkA$?XE~;B;VQ zTA?`1isZYEAko395WCD9DL>o6N{-QGifoanx$s86?brCFjdj#GHCjV{4RYP<4_o9Y zQsZkygc|qZ@!3$gY&#OD&ec$QVIUzW0k-Sk!Q1ve1?!VyWU2gB?7X8Hd8$o-uYJy9 zM+aqtFk>Ws>oyK&?>`GJ!N$Za*p)ojvLul51a`hZ3aLk8v8LoCwgy>J_LYN$BW!JX z$<<~3@;|XJi??02ubbaAYAxNPREKVDy$VP7dXu}a1`(Z5OH$)l4QnrH!peL-BBf(P zxZ45bhnHi0_Zmpx^uU!1E%>%;C|>TL)#umO0i5FVAU8GyjEWxNb2{(vvKjjzd<&nH zT#_SE;~qmpRxV6+2CzS~4JD0b=wo4D`Yr1XIzF2d^D3NARCZ7w^GCBb4f;6p^f552 zwIi;pGGP4fso>i64WC?^glEXrz=d&|WNAtx#L1SxKAjxc#Ax*8csYWz2Df5c`&?Mz zw;k&BF2a{8D`H{#8CDlv!FR{)$4QP`AvY=)j2+&=o1UwH6-I;3+8wCv>3F(m(iWb- zp>4nBvnwe<+|I;dB7^HqjPFMmyi;#G9OWpG!r7BZp>r2(e7*^vi|t^mG+(lJGP`j3 z5Iqv4?oSk^P9YzizJb_XTX2fI2BBenIloR;z=xIj&^hxY^HT~OpD#-@a_HG|`rtv9Ue&PuP@-4`@@eU+u#}v}KrUYiM-HD4H zd_c7x3vopGA_(sDZ$30zi$wEmNu2*mFo|t}7fKDV>6|}QgB988K8q+C9R^3?d6@jv znQSOJ3lDPIaq!xqp#N|(-X9Tx2VOpk=jUkPRQqG>gVWEc5q3NKC1blkr@0*}bA*Rw zx6{Gu3P@8k4qGMl`CD~tgE@H&NeP}vOf2?*nPwh-XR8fu8$Lksjs>J=unoB)qerC2 z^5D#-Oc?t86Rfou0an5B5E@?xABqNmcFh#tXD#XIcQWj;3Kj|EU*RR^iox^6 zH+Z+Q3XFE0!%M%F;2QQeNO(>r4cRxKv*sMk4S0=@rKZw4DA>Gf5@XP zuL>*D6VYSKW>=~)VQ1!w=BsJ^lyxre5>S^Bj&pau#@~$E(7jdf*r1_`2wpWY zwr*W)>DKpXS4{z0pR|T`cs7t-(Kkk~;!87G-DgC};}S&A6oQ5GUY7GN6@>I_{P_p& z-~Jk&?%=JP7l`jiI71H0_6$YuwrpcsQdZLmy`$(oODh<;_krv?zh}(Da~tW0D>NCTG?wxF zs7t>Jx+T2s6!O!Y|Aze^$9W|;IC9npc@g^e1kXg_!nv7+f)mp#M3sJP>4Vu(^m^%F z8l~@MXRLHZWi49~wSOY2I%CJapB;`S@BD(cg-E0QT6T=TM5O5Z!ywVn+@e0OtV+tx z{I2LmQ>rNM`B6%8x<55o$wu_9URQ9|t+U_r?my%Hy>l&XMT?-fj=YIyVuho0DQb(@ z3Tmj%4kpF%7+Rk*2`}iLgLe3Kr*x;C5bXOjo{vXv z7WA6x3ntCV<}JA7AvnL)RVZ+)5!_7}&hH2i<0j6j=$E|ndw75E+Je%DwwBI1{7n*- zf`SYc5hZTUoaM8qTI(*Ff3kxexp@WMHZD=5FvX4X*FmdpQ5MYng?LT(1YC1Uj8>gsMjd zi&~$|p(aIRrg70t=37rQUH;u$wEbeIVC`-tp={d`zFSj{9da@KuUUildh5WQD~uLc zZM6~YD;^>8?Tb3PR<2aoxPvc>i(rKA18aEa-3quLAKm9(iw(AGOWyjowuk?X=l|{c zqSumkF$F@t!}S)y3ArADKw>rjQ|~o9%bPoFC%&R=?o6EdU&Hn9)j@Zfrj16ov)%of z61=1eW!{+3XuIkIYyYb_{yp5kUHkFrfcdGpcdbI4!)>k&-(z#PR>itgL+rnr^WU)l zI{sqd7X!Z-_{G3427WQ{i-BJZ{9@q0k%5a&g_fI#-Lq-xSYnIc%(fkW?wpMs+WEKZ zwm-{7_-Klwx_BV3yFXUAkdV0{)%VY?hDx?$}n;j7Zqg1Sw?yxzS3 zY);q7YJ;uHH-~b_Hy0k~QI1e8`W7YqE}ky`aERIa?KE;-yaZ>B-HKn=2w}8U2EJyS z&E}_OFk_Q$(AD)PMXpP=`(+#NFVFw;IfI@toG};9anC>7F1&N`ApNRwH~TDyLb;rI zD4(wb8B@-Ie0$&8V~Hkt?y?<@q)OxK=an!tEJe2Z3mBQEjC#Yf=( zVeh@8qH40WagZpIbCM)EckVvt)Gj~-k)VheFrb171rZFGML-mZk`WO}qJoi(fgp$p z14#^k0Tod}1qB0s%v!(qTeIfQ+?he&duRPV_~&$=({%6sRMoDky`S1!sdYJ2e;EMf zwu|sCxf?BhD-7#nLQwmX1LW>}X=2g-NM73RJWikWyg#)qZ1nierJ@7rx7RLZEj<32 zt#xrNC%Dm*^Vr9g<5j$y`}z`-+%>O&>X4E_hfd`ngY|(x(@leCDRHpfo`!R=8y4=^ z1Ro7@(Y!=`v}EuhX~P#yZ42K?y>fj<#D!nv1l>CGr?tU>b@{XxS0&heN>8|-Vwl8g z-7tb%Uqtj5$da?pFD6!%^za@PpC!+|T#6h5Mc`&JhFKonu#jVjzfW3;y=~O+&0Go= z?R5hmVQW}x8HG-lZG>3O4Y1hvCG}!k2XB3<7RSBoCA)q>9y|PbD0^>%2J0k7^rY># zfBF2+W}&N$#l>1&NsoTQ;~ax(^xsK6Jot?|B@>8-+)AhczGPy9kUuq7FcjjKYGNtf zV_LHF`fu$)~2^;v3ob(K2K4KIO!%BiCr8(T2`yWRi8_{G{cI zVnX0W5b+JW5rT6k@Wi~-*+t?ff63;q7ImRLUn9oaWqF0G&x;`Sw+>RP!*-&3DN{gh zS`_3?u0|r?vq|HCYD#1z19JJD@PjyUoWFY`%$lf)-(45Pasy&GHO?A;Z5fR-JwFW# zy6(Y;*%tV{0}H1qMnSMpJQ}1eMAIu|k?Y>e)I^U|syiK%FR!iSZZZ4zOYwjvrBc?$ zG69}e$RLqY#-cjqKT&mJX(%nE9>P{K@f{y;U{5Tij#2y3Q>$Yjbmt1_1h_-M;)`gl zzBd$K*94P^_aS?mH9qz}7BoM#Kr>ecZ^;gURXr^*@#i4icgclqTl_%NTm;_gq#?1( zGe&DBeIs@Fm3cBzxxX~FXY=B?5q>vFy_#tB^RXdtzKBDxLn9(<#c_kT3;v-M4!y?T zQF^!7=(%w%JV|F3eiau3kG54(>X1ztt}R4cB(K2CIzyZwR|*r3V%&U759g28d2&>X zfkRsrvCWPW2rSqOKh(~ni~SRkVZ~<3M@pV_E2ndtDj@-Uw!d)35yz%EUNUTbxCW%FnFFq|o zPTdIlHRgbrfH=Ig&wy&7xxgoP9%)sFg12)!G==Vmm#d?oU1&KH@pdB18YGGP_YZUB zqAOXj#c9m_|3&i)%bh>>@jAq7sOPaUu;Q>LUN?=z4pVq|$gmaWJ$r;sdmKR_n|bi& z^BpAY9)qHW!|KX?Ty`ns9b8&^xT2F8=HO7;D ztnq%63P9-#p+dX|fmTW;oa);Y&ByH1>;^~T|U+x`?K^cek6&$pB?*Jg1Wb$E*N z{3y4)5D<7}gJUZvt3Ue92G*Pr2j-*Dgzd_SI(eZt?~&sGE+XM`bHL}IBiv%kfvtld zcq_NSyry^=U$gg9$z>Y zEs=`@zFbQj*!dhtAQ?2k6~-0YPJa z98#=}gJ!M6%a4rK)=4fy^*@S{-$@ZLxITn#Pgw{zeu&|YK^|VbeL3c%Y2v^+=aILU z8(Afy#G~EwW1ld!9}|B=%qErnddDH+{dYl>Xh?(BkRHg^(Z@?}GjXinHMldP2(NBO zptFm5DQ}}g$oFdooaa1%oK!QsX`K@8h-ZUfhdyo@nv1W0d=0_&+0Z7O2kC*0XkB&= zT2Ou#9UkQdg-cJtca|;3(;RQYbKeN4JJ5|{PUs;iPAj==A&pnFU2IJJf$%*M?B(jA z#HN+6sldwjXil~>6pz{#k5xRJ`qmOR>mLBIWeq55`cA0a77MMx=OJCF1@;OG;JnE% z!1S&hu&NVaq^AV7oEL>dHjg24-75Sx_b42d0h#YS4#FG0fCXm-Y!+_@ zx;sC9>zohQ3_X#5cODU6xpGY0!r=O8KQC(JqOi8mYlgl!Q`u)r)2MyAey&6^@%;I$D$G+@2IdvD9Hq^8aP8LKMV}=4UuZPxWf&r0>fkFW(qOBg>w` z<3*KFz&?YNTT3XHq#E)3hRA^LH)&@(J@^M61W1$7}eQ(v2AWbC98DD3X{qoqDmGpIThxMR^4PS@kfQ+hm4+ zY3=YAJ!-qGI0}aq$jAy;^kR)Ttnctd27$HY;{hk~wjn<-}Y>@=3GBo zHl=|I@i|1aR<>~^%Cb0Dj0Twn_j7)EPhc`dgQXfbz%7Q++E5P7&n+F{2gXM14EBBCC}RyV-;`Z5S9I*6X%41^Ks9?0vWgGbpzs-TZezI-A^)dVd> zM>iy))$i|6!a4#}QIj%Syo85>hMcK95g)?#63O`sZoli#)^8Q9<~E%b5x1GsQ=`O9 zw3p>JB5_hQ;WV}XT`iTq^#YkS$rx!Q+(WDQOVPrZV6>s|3U%;B0Qi)?0KN_t(0!Le z`EY~@zR5ns`HD=kacV5dl{-!(TS}00vq{u+)pAmuo=X@^xzBwpdu~iN^Iy?dFT2#- zzBrCuTzQN4?d~2@Wpf%?G&7QFx#9ssq22INnGfXG8&FdQ^hn*Mqqzo8%s{0s8>#ZO z_lUfY23$2^TTbMYQ(Uc8%Xt&|^mrugH8=3)6`s8hho^k^1jqd3IaU{c(wOF~e?|VG zumK~ttSbyxQiSuxzmPk-=NnHyU@sL@EeYbPdtkEHeso1Vlce2>=QY$8^PX(dChkn4 z@m_2R=A3t^V-D||M;Bab%_<)yAOtxkvwg2@WAUrsQ(o_{}s%~KQoTNI0EAcj3Y3Pz&HZq2#g~zj=(qq;|To!1%WA7Qq8u+`_gh{ zau`d`_%Jmpj4E7!Jg+q;7{*l?WAXV=Sm zDx`}G)N=-?n)5`mxefkJ<&{=AbG~|-IHi(0$cyr_nEzP6O+8VB9%<4mL%d-Aaz+%X)CHEa1ygTg166NgL91D2$k z2dmDqw3M!5>QJ{h_NIkAH(nq~@$W-@JQ6mBS%dk^GeG&Qgw~YHsPM{hDm^@x)G1p@ z`ingw4cu&~zNUCG^7<;C$c2JG&1d)*`P~N7jYlh;Tg+%^qkXg}V+?y$vmEj=IQ&)X zxZ9-9aOMTJa^e(IdH(f6l*|DAQT?#p;pLNqnJHz;OQ5GoOWPR{GK$`+A9JTZ$d!SR}!QPtKg)qEl&G- z7xZlHVfvz0__50!|M*1V^8NXcX&VRczHWtM9TU;(m{o+X|6cCt>5|+dQ8PKJKP@>O zv$|P02SbJ2y0M)F+W5V0x8kNDnrftM2y!kevc#69Wa%+h?``Ac(#*(VmOVR${asoRo> zp(~JDCaL7g_MPb3`-@;IXMypQWMF1*MlHu1sEj3@=yQ?@oNMpYvoGbHx>TJ-2{5NeII`D1$V+PoP$Bj@RB+!}fj?Kt72aW6|slweZkR7K?aag3tNVSjkNeM^)LvJKGwN8CZ-Na&JMf z?l=mMY$7ivE#~dL`j{nS5K43KPd87mHvYHm&kI2XGl{|k+KM*^Sxe5B@DAKerSh(6 zKx5t^*x`2--f#?X@YSE7M^&S3cLhKqLI^)EJ`6U3?@@wt40!Obhij%&pw_Y-eGlFP zJ3biV)3(m|x_~(jdZmCHPmc10lsu+K=9AIz!sL9{4gj7LJEH!r4V@VATdC`1Bzh;y!mkOJXYIv%kQ` zP7ORi^DIbL`6E{qQS#ECm`}j#`Y4P)^zWh{_9t#Ui75a{?T3uRI?fjOu~cqj^%W{CqUr<3TpP(j*q=rE=Eo z2^$l4?3qR$=Y~=axw=&z5@dz3K|~g)?nN;3;Tj0M6#*JsM6vYRS1@UU5vY7e;8m4k;@5M(OV!=ZOmFhWLHrnd#;#dm>nuqXI+D?+c~36$YC83HCw zf{p2g;QNe;nU_C-iFO%0=N?1%gbpM5&1vAcuNipmqF7s74I4gG!?PBdV!wq-ut+ML z#6K5}X&ev={j6Ta54<4pRg}^9cy#cL2qbKdgoX>+*!ztdp2(HOy|;IQRE-u+>6wQG z!t}7My$2pvpN3OC4RP`N=kU;@15Iiko9Lwhp5WHU$t< zCV(CIlyS#(I%b^Kz+c>DaoBkiT)1N5DF0>=IebTFO!A*ORX`7|FJj|O9=sc|YUIn$ zGbpP)S`cMsiwPH3JT(6*h^$wFZE6^D)`UXX^EqJItAdqATyXSNAH1|y7z@2>Mm^Ln zIPNNlQ_)PE@92Vm%A9~>^9N9+=OO5pr{V4j9XxBxN8p=#3uZBlfPVKqqSYJ2th=J< zI=?ev_io~t`p;k1K0o%{w+zG|nfaegxV-a4;~&Exv<^vFrqf{Q-O1Zseu_aFQa2{5S}cXmIK4!3yyDTW^a5%LSCpLZ@o`LJ_HX6>d+)8TeWJZ_oXhny ziX%G>nCQy%Iq3LM8R9dTgl*ru<73QfNc}Pq9AAE-r2F@w49z+e=qiblbxx4Iw-`K^ z+=;xItCh%Gb+akOFhk1sr7CIId5qk@Mu}WpeVX@v?rYBHueX1hbM^OR|I0mEVYJyK zp-5KKo_em%-C!ahF@e-^>Ow(31@QM85v(rd4w?JYPpMDFQm~WT)?LRtW^y2QR+2pYU&&m=r{35A!tOLj^0ik2@8>sCamqT%%ux1u*@3vv#J?47!UR|SY9sHw`d%=WV*zt`hP4x^xX_y#(Ni2&a2`q z-VlEraja6F*mzikhjkWlg)P$AmxrPljGu~QT7UgDUVql>2`5jRWAzfcS)dWCq*$E& z^yn;>u!tIC^}1&?k!wdS?>xC}cE5C;(VzAEgQv6m9eZbs_AmQs|GR$i*Uv6B3Y(^6PPOn|@WV1?l`QQ>yrgABS~DZ4SCMn& z%qC**J#C>4+xDI;rH1LV94*1=x8p!4GAv?u0yaO#$X|*zZmR(hpdE8ci zlOtjK4gMLP|L*frk$BFsA1v~I*IaaDB^?|tFM-(l5M*p1NF^M;OX=+p0-4fIRG1rw zRPQH(_Z~SsoAQFb-Xb(rJ{~d!eR0?}37k@S2~O@6!i?zH(R?mI;U0*7YL64creUSw8Ia)i zi7FDwp$-oRA-?6AXt+F%`mlhG%1Q-*@!lPFb2q}C_$*lRg@(5;Y=NhnAHmmUx8cy? zCn%jrq}aM_atvl35aD$`%O-YCGa#>9 zN>K^Ivfx`FiBIo$!|R>Sz?Zx0QS-8L%FH&KnwTh#-tc{*)@8L(OF3UCi8?7TAFb1I z`%x5J(_9Ii+(q{8T!lHFf?%fzklx2YI+JpUFv%#6k=aDH?%rm`j5^a_j=lUv zkG&sCxOm$WQsZqn5?n0`brEOKex5bbIK37Ghm3G{2pc<_SHNAVCrB(n3|5VvaTgs3 z1dk*4k=|x~^x~Q#uns9=r$u&nXmt&2Xx;&reSg3$=Q(g-@DR+MGzk4S22t3TBC>UJ zBzM`91{S*|hOuty+h6wm%2y0p?#>Y+?89}?!P_^Xc9A>on68MqS>dqr^%huDUjY*a zt+1G+2j0K17LxY~Vb;3!c=j?wd^3{;6rVhrlQf7{Z4k#Yj1@R&)d#TLWe8t=Vj=aG z3|J=|hK7Y2IHRBd?C+?cTyq1WsK}iiaV?Dg-FWQzH~#29N3RAjlN#^vju*V5uJgsh zkhCE-FPVzPUU}gQuZ6J7JsLPh6v7z+cO2~g4h;J%!9Y$cNQn%#5u+S!6Rdi zftj5>VIF)yhf(T!i7g~ z(WN#J7c(0^DQp<6-JyWiZJP{EUs$-%g2Y%m{~@HGMR<&V)kh6 zD`86*GBAT#x2+*^l!N$*uMl2Mw}Qa+XFrt;{g#-@J^S#@~ zWP_f@zh%Xr(O!qIdTt)B@y5F4-9tdE1` zwDH?7?J)RCAB-LMPz}eg67HrIJmU&B=l*)FF|CVM-3Ly%_j+=lt}Mjvrf{jaBwn(} z6CNHU;reKMQ??d3`-%a6*O3VNo3A48BRk=3j2i@sSR*&lP>7o?j+1>%;n20w{9^v2 z^=w4GLvWWI+!t&>0r@)-C2|!pXB2_dyw!M0#A3WKz!FoN55Q>o0JQb457N-zMlF#Y z;BmcM#>C6r5~j-exQHSff;tf9pNuP~* z>MU|C+&{Pi=hRQeMW2;1Z^C!**mxYm+9qI={uFquSpm1C3&8QDGZ;S>gbNlX&=@5N za>x6A2y#1F+Ta29O{6VYc>pc-Jok3&Jh~ zUbF%qx@w2lteJ(an32F=@PJ}p&Y||~iYA+`zUK-}Djl;Os;Ug9S(~M^T$?SqGiIa_ zb6%PtO@||}{Y){O_DKf$%_{idA|EilE)BIhd~+?~d`prd zv#dd8&>TN6?1I<~H5hu80r%gk;zx6&akOaz%)KFoyE+-z{$vC2`ECdGv^}u1+zZZW zeL?4^Uw}u}^7z~BqcD=33Zp4Uuu&}yUx^s4Q|Mxfo@`Mkd>aq^vfO{@kVCs~v35lk zW9ElWmZps?XM6VD(Q$PIZP1X%b7m@G?@twI=xjE6x-J-=Ih}$Vr$@QOYJK4JK@fNK zmP2BT7+gPNgMv;fph5l$>c%E1m=f_F<{!<2l@Z^OEZPXuKTd#{UM_Oa&L*E|C364x z?caT+3SR2Z#2;4Z;I3!episIT*~!`= zX21-JC8I)Ch)yRaD76w-YTr|e>pr8W7xK}kI}}ylZ$^C%iy%)N;1Inp4|2~o?i`cN z|5tPsPED{l=(UcmW?#i~`V6mtoH0BnVCOLuw`|q+-|_ zBITF@Z;9p+&NUYXds*yd?wQ6*gd9DJTvu;M&f68k>s!*qZI%?}iq9-$ZE0xy)A=W_ zI}Xqfi3qUQcYoou+|?pNwWBB@v;$Q;$RcXjYijfIYD&Fv6FJ5F1J_i07W?75_l$E( zqAVw-$kTJH_po9e+&L=_-s60DPUCbM_OYVH#MuW6bXolLGxT%SdB3gS|Fibk9c;uf zl%jF!+H!esl%k2Dj{?Mzt>wJW9#uRcl}+49hjLjzQbK4h_fySnJ_i2ZJSZC~o*HhC zKVf>kTHJC+`!xEDHxn4`bT#^C#~h0|>%;%|bN}r3fA#t62N%p6Ln`Tex0$n^y_RN6 zczUs%q^wwC^aNJPJ9lRGy#q9ntr~yV->E*7XyCPKn~8C3g_*K)khumY-mK+~no0i> zx!>+{_*Ff}|9>2TaRkN@7)M|nfpG-J5g12c9D#8J#u4~WM!+ao=ikAC-8?_U}gH?Qt)r@dAUW(qPcas*~35WXXmsVBV_RA=R7qI};T?(lRCZlvu; z_S1J0STdhJ&~_ek_`Cb#UzKGn|37@4qHkNgo-MYX$x8}VB9FcoM6M!S&^}@V^HpC{ ziwf?M^!B@?Yvyx8*CLEp@2|!S6?fr&x}nT=em3)${j~q8%y@^C#bWL(IC~*o z)bcn^zu1${biT2jV_O}`Yf9IqGG6AO)8*;lIh(@Y3xx1f@e3$SqYLeF6oZDj-blXp z5S8q;ifZqPC4cgk@{D(#`qO-Ff2|j7=TD1iS*pws5g|^%vt+Kv&{=NW%b*PtfDm(d4KRYEY08Ha%VeaKnU*hM%B66) z`dGL^d}miE9VOHauTi;S1*m>T5>jvNppGdfAa-aMlKAuxx=yzNd0iZiJTrwpgQw6I z_a02PWI!Hk4s4!C#}2J5%)H@*ZTr?>R$n>X^SMvuZav7WUiyl0e$|QJZlCm?OVQ@) z8#7=3C}u6pQ{x)Y%gJEFb|l`E4wp}D1)GR2)V(wt)*bYQp+Z4;DK`nkud&ca9|}#) z_XWXa8hEFv3U0Sr3_E;=;kD#Tu;!Rz^W_`y#;7(>nx%k z7%rwTLyZTUcV9+LeF2cbPJnO6_`&tbXgQZV6(E_u9QUtO$IeG7U?s&MmHqbAk4YIk zua!qwO~R6Np#?6#&5xVgIFGx+F__fdCyzF;JyB9xALX|*m)gHO6ZJir1y}FV!RtI7 z+^Y^irtn8JF_{4>?_}^mqZ}S7-UZjFNc2X_7Sgv)z&0JPVc)m)FuO<`Z*Gc#8MU2A zHCz#@ZYiQtGX+vPrj#qvHkX}6X8jg>I=oGnuA7&^6EL|<4PIRa$qIMCT(bS?9I9bbANWC7`{Wu z$higz)-J~3J1>IS-E;8NiyyC%VZm~*cT|$566ku*2R@TLv~T1PHO*!=VJVU{wz%*A z*ZCv;3(W5u>M9<~ zA){OZN8lS;2wPNm=+vzOVy%1bFUjvPse-;D+>&FmM~(O#txTDSS;M3dF0QjKf;lDo z5bLWKDmoaB66IGz)M*Z!?~MVWgo#+FQXAF?Sb%~rft^-xvHXs7D1Ph=O~t<8<-ZIL zYP%32oq@E|Fq(a356b-MD> zlLX9>Rf+m&eu_}^b(AlZvp5zuu3ipHlc!*L5qqpXin&Z(9a_3J2Bq7%q7}L(;CJgT zqcBob{hvBC{->> zp^`5w$QsS{adn9>c3MN>(BP?9c0LE5rcMX>y;ATf;v6ceB7u=ofbe`Vc;%;rRVPfw z(~hg+=z+=D{h&55bV9&9t`xS|JVO=g!H^kYfrHeaLHUiZz%X?JuhmB=nfNJ`ov$&e zb}pAIv?X>-{Eg2Y3`@Qo306h_4sKKPW#Vj4JDCtdLpfplpdjcGgx(&lJGOK#4s!GY zHgfIbh|PZXz}2bV(SVfZ&UVYegKQ;G0$T++3A+&DlRbXM@!ya z+~vp{+RdbLC-#BzI&9(Vmo5h zq$KvOOW}z^)8PJyGn@&ZfM-11iiFNk#)H_kT?yz|6*1f-lJNy?N3ZLei zuHLrS^2oaj^h}Fu?4Z+q#0}>Rv?fyw%Xq8eTR-HWXIlo6wb%nbSDK)!gMp_^*^Vpc z^n;zGD7X*YLYY(tDw^^ft)CT!zHPY<<&D00cg;ia%e{yO26sC-t9&ehn7uEUP6fPR=g=~FKl=X8C(v+A9@8k{f+xWFOw^JT>33fqmYY_uk!B5>_3Q1$^wM8*eD+ui6y`LaTeNqW?n zY|bD3<-fara-C#ZCRNGS^`aAXLYmaa)KKbB>PD(;gAGdJmY~^po8Y**6t2{f0E%5q z2_~tL3k%D6t~Q6b+Bd^E$;-otD%H7EOE8M&3g7U*ws3Pufgmx+slTw3=Z)XDail=3~aI z8NT#S86_5~mH+uQ)^|?dYSg0WWA-J#&SGECVau#JLYB|2(aoo~Pc@O4-mm|!;y3=e zaRkN@7)M|nfpG-J5g12c9D#8J#t|4t;Qw_59vM$EF4U-{iGGu0ML5*63;X#wAHE%B zeR3?NJ7t;vwz%T&#(9&`u#x|T_2x?!w$jyKrm!`*hP>TM8ANDeJ;B}*%Gi2mkT(XpTX326XoXmBM2oNpS5WOZyL&ofy8F8PHh6P@C-hC`^dwrEz#}rDKY-{4-hq_W9OBwPx2cWLT8TZ0;sKQKBwlGV!`9 zk8ASLkLxU;PP!dRqq1MQqF6Io5Rk5f$JhsV(d)tU#Vn9{qKFrdo*#Bak~oeb37flh z(48Ss%6v3;hzBY1OYz3PmGd9H-w-H650jSSUMO)OlMhWmT1%CYRlo(3KX45>qvs>F z>h8K!I;dnX+J7tREEIbGNMF71oRv{0D z9_A=S@&D;#;i_CUvqwIWC(~0-jra$^Q`aOY&)4=6dJ ziv7$N;7B_)yt1qm-plEM@t4KmZKQ!MUtyfG{sCY;2HcbnpejR1-b|2ZCJX8Rb{)Q? zC|hI_OnT9h%Zvn0KP%PjD=#>_i&E=JhXm2D@cM>3Tvsu`j5Kk)$CnGv%*CMLqYSk- z0UQRa;Hf1)KCkctR-xG-#y1VfrBA?CSr98~C}1M|9>@tSLZUSvi7zszIm5~mfBQIC z|N0=qEh3LCzL?9^77gIexw(WVnEe50*DdTV1j`;g&XRxw4iMkUD z;GJL$>~gL}U(24MMs+@T;8c!2Z(RryUUZC^>QjK%Kx3U^m zhHgQ`YE7~yeln38Da=)=a``QObocc`w5RIDoaV$v;>evX6v5U(+r8q@-u+@Q$9y!d zxFs1BPKAKLOdY&IXeI8S{tdL=j>c4{9fm`!T@dj?4%(s#v_jY(NTh;Kv_`{>wMD2o z+7%2%Mc|Y5ZR9EX37t=~MEQK%P`^+oH5hh@=+cn+Eio7)DKA#?0WTi)tc|>6*hdZD zi9jd4dGI4|5Ugt!;PR7Pe9Ev90wayF_(K{#|I-FLbq|8W7Dt$R`Yx)rSP9Qvx51<& z1l>>Tz;dk@ko^lrbEe3_hKK5C-Jm#C&b&`i7Bm>JsfSf`We8q3K*p(Ye(4Nz$8vSE ztcFC|^si2w1)JqY{2S@1z`@IiFj-v?&jmc&6}Q?dS|V_=~$h;*NEV5!e% z2)xt?pH}9=!j^7`f6)yt-6b$&B>@}U`;bY|2s)u}59ellMyEYWspxbWvg`^Qoe^n* z)&Whta`j|z*k4H==6m}~_-_c*p~bWvW?5Z1&rJ}xO(t5BP~!kGJX5OuViy_IT7=HL`V2pVhoG8$8reS-BX_YMu@mXr&HUyr9ur?W`f?IO z$V`TNN_3dGpgWK9R^EbkTdsp}*E_JB*98%#7m>666Kd0xy~reaA4-bwLq*E*sPIQco6;VacU857hDtY8u^P~{%xYdDF2j;`v-a_=Gyp()4`y^L4?K?ZPEr_|pVBwhT zXcGM*{wBQL!wOun^YJW#Z||7+KY`PNX~Zc7ws^G+FS7Lv5mU8{#M@e^ zQ`{M#(a{X=0z`3|!cXA7S_KmOjzj5dOW1p6BDnND2iuVaP-EMPw(hzL4ohXR)ZE4R zqKgs!vfT{KhgISHReo?ds1H_GMDft@EF6EX9(opsLFR=)^c|;AN~bgl){Zlr=s0zj za?Zpt$^SCzxY>KV47ygjINP~#7FXa-C^74)0eZoV2L3}@xT$6xo>aaBPY4MGuP@4= z-ysK9$Qx4hOz{cX0k|<&3GTeG2TRW@Ag?z9FZv|0#c&kJ?e&D^y<4ID!XXIVkPRbh zOYpH`Ph2>fzbW%%2ZWqzL=7|MkQt>OoW<_)jCT?f#=IWV4A3&~Il`saJxpQUYA@z> zEQ_Wb_P#|biL0T^FBbwzi(vXO4oLi*gd@0;SnRSVEL>uU%LRG(*;Y@uGT#p^S$+z= zUuO$Y*$IPQTflkicN8*T7=L)T7yE55h8c!!Fzc&6X2yA7=eh6TP(ud#yjGs{)D`9E zq;0ooa9{XK=lN~FoHEdh(P8LEM{%z7dJ_J|W#lTbqv(>`(322ZNE{(yz;-HTjL72$ zHfumyN*$XBFT*3sx1g@+GMYa(8R+Z8aX;G#*WHl7tp)oa@oUcL9HotSF4_j-htlAQ zs~Ns4J(`zq)g_p^%sWH6hNbQs{iT6yK>d#K)}LVW=S;+@EQJmT?0bO0A{@u52UkPl~3x zxohFjXzn<+#aj^iejd6!FF{&YEjn80OimmK=d|CL`P=sZf3!j9y$p*)lV+A;`)nTP zzBTE$D4Vph@g!Sfmr(;f7ts$pQGE8ZKAsmU4~ybAp_O_DXy?py>O^WBY261r^`ke5 z<#UUv@M*3HI_9F&9VzIUEFa7hmO{0TiNyB@QGXitf9T+1Z$+=#(ZD*VU&#^4(dCA< z#Bw)GZr~}F7*T$sHF5159Kipg4BBy+AmbCi5@NCnM1uJ+_f||Yd!Q_d(~-81cPM%a zVZ5PZH$*k_)Hvl15hjwsHH1y#MI>IB2(dsZ|)g_2e+~JaLqb_Fm(y z*>sM)kRVAJ&Ad&lP-k-u6sK`qJ}I&VnyZ+S50}%ue(W*#jP_@6+dVmci8Z`O$rZ#- z>kEX`ikG~52VZfj@`HcNzU3du{MYyQhl?1WbdfN>wmro1sB9WNtd)<=T+_peyG?QX zXNYok9JXK{d3*i8%s=hRyl9-4?yR;GC7kZs46e9!G}q|cXU+wz!Tupr_dmz~`Ddfv-A}kFR1=c6KY^I+>CtYDh)PK?U-}vhBkB%cSj=(qq;|Po+ zFpj`D0^F#e9&)&**Q`vW&Ak`%C`7KO4vg z`yN_?i72y3x{9@E6TvAIo5xMuGm|@k-<)HjH=onQ*uxQ{E#xGev1A(u9$*M~SpCQB z7tdZfWtsDM7fZlo7x!*5KT*2l2l1J@O6K*>CUcL*5f5r;WWDe_GFWRF*?!c4*uIj( za~Ys>lNT4Tg61XsDgMI0*Ioj0t1RxndMy&XEWN@1 z5x>c{G$G?nvj&-arRm%a^)Gox4+s&(a}ILTVkYsN%C3=;#VY7=)?s9rG#f@H#y|y2 z68n}1!OMCvkdKLmrn!fpL$3rZAM3*hId_<#<&9ppKO=4`KKRr9ZhvVD-3mMN31koF z&21x6(f0|Z(ZG)aZKKJk3L$dOk!6TgRsaM0gz)r5DXBcg` zo`8zPlhM-jrLa-@DvT7CfL@6?#58ZF{2#C5=~bF=K2Lwo+;m;#w~mE6zgQEA$dl%~ z${3al?&;IhbFFx9-U@k5-4JT1jn`=0g?V$8JtpnAyO zYXj`n&7i);AN1Oi(0xi77SX?=$EVJs!hrQq-kkyYcakAjQ6Dtd$H6PRyWmDvLZKgp z8z!jX+Th(f1j!Oq5DAh& z1eByGf(aExqL{#lq9`h6MHG>b{jIgv+Gp;}nVGX;-?{g~KTljQ)xYkiySl5o3a*KU z!@;CcI4tfQ2pJ`z;QkUcPrDjP$cJGDAnF{#C+!=VGXDu}NwWGCh$53p_Bq)EE1eU2`@H~Ie z&idsitPY%m;|m91;(j*}eo}!9+P9&-Q*=SKtrl#(3?XfkIP>MB= zR_)H3ypXHbl*5Y?jK6jsqUDuPJl#h(myJVXIVF&J))ZGqg@a6VAnIDRAHL?##CO*y z;=<-5Q0I69GVW^OGoCb-Jaz+027V3te7pdUc`RHpD;wP7^3dh*vy8#iPt1?+fq;u! zV8G!WdSRPKZ;k(R{wK98A*-?-J6PLPp717VbWsqt34QzJ2RnG?kkwU-9E-j%i=vfa zxxzPCRucl*&7%-1Sp+?wWbo*l!LU8>2~?h+flUwkV4q?U99d4lj2d&0t6vP;zFkF1 zqvTQV+kO0~&t`dOkC4J$nU+}j{Z=rnoQA{>6`}c!w@}?+zy3tZ7B2rh51&s7f{~pT+_-lV z9-IbTqW2l@Sl>lQA8&+H_3NQuP!lb9BZ7pt?L&Nl^{DXWI%e6#4o2`<2D5UV5nV4h zmvDY9|EIoRV(|pqMzv~g)5cuFJ57YL-!+pi_;eX<*r9^=-W%qs~w*qyWc` zT>vs62l6(Gf?DyE!JIoqh^6TUVzo(dvsML{rm5hUX+jVq^%b3HwS%SeE+d(n%OT|c zppQ*+8rtz~6LWws%dDkV)6y@3$a(5)oE%x;a3N3Wmr^N% zNkRORm0%`k04`oO!*Y*=F=?TM51+Kb%~RuGR=y4T73hW(mI$Dfh9^j1?KHUfW-=t1 zM1h>LBYe!C3j)7B!~KNS&@Y|=ORHsZ!u@wpnfwN+^=B|!TI%Q!^;4vDX#(%7tIMAo z!wl@KQcSP1Vm;4rKdWgI+e1H4ogBni7iSaEZVNG3aN!qxs&K*)=f8trrv>gBodY(i3NX{_8XS16gP(lb z1S<8rQEt*RMnmr`{c|@WN6WMRlpS)xoyBG;b@JAtHZ`oCEed>&OmL#z4-hyP3bRv1p(kq-8fQ?7LPCP!YTRMyEYSe(5O0*$+KwdZ%V5iOcRc^7 z3w|uFkB@8$1^p|U(EDl%{McuR$H=PU59|WSvLhiNL=l}c*iXsa4*OHz?{EG8D|sa| ziNz>y z<)<*;$RA=hM1!ucGM+DZ8Y+vvGqZ|I$!RCO|5X3;o9w^;DWzmu8(2tkuBP7M8HIfy zls?R2$l5&+n4pFiq^*JjvEdMPWFjoo(T0R=8=!npFB2XJhHIoIlxrGc>+CsLS-TC6 z`=^7(O%qVndJ8^*J~(=Z5Ef!a;n8RALUe8nJl}5!Kc}r=j;~Q8_47Xunf~^BIet5y z?Fr={)Z^2i7cWLTYK6g9&w@`I+hX}4(S80X_wVQB z8=i4a?nx(hFn1^~nIc*yU;!g;){k~~J%pxu9ULUH0G})o#<{a@0?o3zN5QHF#(A>>iS zTSRmbA?{sN!~$6~vrrc}{ciBQDhRngaHj>wH4VG`znA0phPT=qw` z6I+-CLj4qsPyWm0|E(OKC$F>3Q25H(sr#5GaGcF2PYUu#ZDV$U>BawLzgp^a9gfvg z6~bX|05x}vFWsCKPR|P&^y8jsPZkLE{HbyGcc#O6l+uc@alID6IbQjI7q_vIpQ@rw z1iiS%pX4gVlUFMJb9&Ek3O@4NBLqeWj1U+hFhXF2zzBg60wV-Q2#gT;4A9uB$vW#@%XMw|`#Y=x>SZkivNHao^Rs##J<2*Sc7t0T`u2Otn+v(Hz7`uSl!-8Hk|clEX~`H*5lX6lC=b-o{M3M@*?SV z%B^&5iabLU`Y^a|8PieF$|Qw%(hjTUl4@-D%hnz4#*^$Fr$Tv!iB<%mFp2Qdy~`^} z*-b3G$)Rmc!kHy|ThQqYSsYHtVEwQY@XEyk_FJT*0;h8HBw`nmI=lgm8cRY`*jey* zxPk_f$J3icfFBfilH+t-j?EJQo4|4pOjQpbgT~-0_}w zHSo$?5bg4}qd~ZqnsXtDtUfT7uQ1wU*w%2@d_Bt7RzbAz!T_`~eLB>J*CN5*^Jt;J zGrarN05Nr$kR6~2{zB_uFtH9ywzGuYhynZXxo|Q$5VDhVz~_Js&hJ=;-4iwN5x1wv zzP5pRsHMx~ZDc6N2_gKTsN^A)U$J_wS-88X)uUWB8)NSQp5i-UI$AghRb(~5($2v> z`H!W*B`pU$6YR0M&KNu`K@gJL%Tf2->%duPguNnOfa0vppsU~osf#4=9eW>aUrS>X zr#1MDbtVuC=b*6EUCef^dDPccuX*mi&O^Em7xGBms<`2-?H*OfdzR_IO!u!yv~?*=ubc+q7uwMIXH_UFZ#RU|I+!86uwIM>{<+-_B!6rK zU#So9-Rv8r-I|84o|VEet{2eZhOx~0vx?Mo*-`xB;txY2|Bc!_%QYDnY@eDrb2R69 zaDzsh^PY%klm1;RnBX;Y086s5QIsbRYkdyi<}U=7$P%R%qf!vGgtJ#h)1|G^q(s;fX?fiF{ZJcza{^&*?L<_$@I z8nQUTc7Jddw{VOlQTNV}6ckJ$9=bP?{P#lW`_~pY?dyswu@J6Hl)x9_A3@`ki!edC z7iI}Rg$?FKaOcriRI`B(Vx~s`&6bD8m?~h+(Z`d2S>tV;O4xmW0vHd*a>+mW8BKV% z6IJ=;F}`;qDBFuILt=k4{@%v6ZyZI;^oyo+pGMFI6{57Rwm9V}Wxy2H{z7A}55~`b z{R;)H?m;Ya9ViBvVw=IZ1WPKugYeE35IoTbY6WkBd}u55BpSkOEQ)UkYT+vs^*{W@uIi2G$GS(?;tlz0{eg>Ue-|urnig-^KaZi7OrV1JXHsR zek4QF)2Aq+?>OXjnBrUKR^TaCayZjA32|PBBWgx8FoXF!B$8{8iQZryB2PQ?F;0bX zlgT0LuQ&0pTYTV#sx+`vMNNl9e~@Vq<7&u+l8c?!Gu_jkqTnP74nKGdI{Nc)Ovh$? zIg-MA$5%q>n^a`tFcC?su0hxRtx(OEEKoHy#iI|2;0K}CAk=LVJoZ=)TNwt&8Cl}_ z^^@U%kTA42O#}0&&B!-Vkv_3Kn3$J+ns*pvIbv6T4vAm4_1$3ZGesfFF0PBIt-=tf zvlvX)>EO)|7<}rc6kgBv!AFB+v1j!KwB!g6h)i?%vM&@hoQwrQ-T|nOzlTz;=D@>C zT6o~tRapFq!HWW%u=&mfu)L@d?U=U&Os*uM7n-HCwfkY}*&AQ-;GGvdGr6`Q>HpQ= zFXJ5W>LQfR-KB|i4Y^Q0ix?Q@yEg8qA5)p3v6tGKE+XRpkfZPk5_>Q`)kntB`wU{FJqZg zmRG1?cQ*08*x zry5MVR)_Xg5a@MGDU&%}lAd*4pO9PnWJvTMj`I#nWsevZEw+!hN@NmQHRCAN9IU{! zt)-wkg@+aT1n`H=%Ahc9EnGzMxUWwgTgoPbWO6I2say*?o)}|UCn>zU))brtzCpxb z4!?({8SpSaBQI{cA(iAVbcm*P!)FO>H+Q>&j8rer*WmcOi(5s6s@H=nC z4N3p=xAT~szB+zhbSmAQ-i{g#<6)!MU@S5J!JMWBS~$V37`|N+!mrtZ72w zD>J3Az}!Zdky?TVQ^YVYqpu*7)_ZXC++8T?y8}Y=(?GdF7_`ezF{3p4DA|jWr1HV? zA<=#QR`%bn8&E1Y^w(xmlT}QMsD>~e1LOFlF>zJU5?jS#YEK6cn`gcllW;F)3L zaF3KYzU%rH?1Ki7|JZ!GnAyXO_DDxtUvR)nX*4buk-(AcQ*ii6Df-k!(`Ab%^9{Sy zhOF)UZ5fuv{j@#x?Hc!1sseFzb2hmn<`EsHE`YeNFh3qtcB00x>Vhp^6gvkW3Co6g=Z`b+GuDOWbA;hsPE6$doIaPm z^Ifz}JX`<2)cGU=a}QyCBYU(9Of)Is0IHtLpV#QzrmnSZB# z$<2kfr81e^=Q|{cN*4t(F*l90h@VT|+L*#e1CB#7?*2RSjJ!KSV1&R3fe`{D1V#vq z5EvmaLSTfz2!a1o325I)Hhz<(V{vKccx%oiQQM=@HnvR(E!H97k1aAjx|#eZmut?R zF4mFHa=fmKS^UX|D)^VZ6L>pz32~=ipTvF}kYS4pwzFJyU$gA?HQT-pF|a;g*Y&6K za}O1d1uh($_o;QfPZqu8ipCgPTJQ|ry}yHMUhF|$J2Ri3efK@@if|L}?o=7RsYMar zGn7NHcOrh(*^?aasN`YUulD!K_CRWtIdUwtvTK@WV`v#*^P$Yo8eMT=Z^^aeSvNb8 z8obHO$3yRsN(cg9nNvu0{~<=^sRcb>TZQ)el}D*Xt5M`cXL{AAa=NCxm@0Q3O8{Tu zFPpcQ9{OQDv5U*rs(HiN;wR2^??`1=MP~EN7nG7r;y(KAiPcE$)I(6+Iuk2+8DXn! zF>qws8H8@vq7%e!w0cuEL)|}(teHNv>Ed-}-6>n@UIgZEak<2AxuVWrqWYRMyFh11 z>|cMEzupH{^+Xo$*wb(PDb_ZGbDjp@cV0ZvpL>GFErYff&5hu)OcE#E(Z;0YOx#>F z1^;*@in}(B!HXvh?pO9Rgo@X%2lI}I;+Kx~$f##C9Zo->mIpe}OMGIe;O8@lM(KUS zRv)_rmvcBL+xS27zLMsnODH!<73!rPL)}@slVPt_hYt^{!Q{3GHvN$g6*;=NMMw{0 zJs!TRvjBhW@xZ0TJ#aCofl#NxHD4uJDB6CE*=FK|HeYZ3 z-2|N%X81&*4!)Go3Bqf)!o3ZZ;8s%#6G>%wJU0f~B}`$b+HGcG>2AvY_>p0gKU3=l zXV-uxx%5geZMt_2ns_A?Wj2mO_PcLEN!mQD@zV*1SVuv9?IC!F0Dnuc!Ttp|;OC0v z;QsjwnlNV^To1|y_Of;`TW^j=uuyW zd{&-9;d|pDCR!Lbs(*qp#hK7Pb3aVY=|nOO&GgcFm{|Pd?2y<1auixjm#_q^UQCU$ zIWG}myZ*&H8*|$Ygxl<^^o8fyh}#kiRIoL+)YiwQq#ghqK(kx69nc`p2&N>CT&0AD!;y! z84~-Q&arEjj~;)q?Hvf=eCZ0}9?j_F7}tuDYcxEWz2bU z6*?zWqVMx)+GWNwe);O#Ln42gwV4g-b>UcBzvQ)R{otQ$c)|a67ZWv-p7dkivFMla zesp*GFSOG75QM#%1XJ5CfaP8%JpP^+9+fDL^Y@JgImtf6ku`>2U(chL>)CLt%L9&O zI>Ve?FNoHcfrQj>IGytjT;4Sz3!XJS;ZQK2yTW})72<;)HU-1)Xqb-%*rLkkT+Qg?nmXJJGa6BZ^;9v+DVWn*97YJ2DsCIFyBY7GTeWV z2La#0;lQk`NW^{|G!A?Q0Y($wi7o>1z8mP#v863u>duG&MR*DEcYQ9ztSD{ z+FZKh&#h)W$-}6dc3M0N$)2iZwtAH@rXq1j?|UJeW7Umd=O6PYtoAsS=LW9t|h&uSA=xpQFW_Sa4tIA<~rcL-Qvo zqujhO+9}hH+#Qt6*Ib&-amsA6y5@QAf0Xy=Z^tXn;z({STasFN{s(g}V?KN_j)eBl zj&LRXCJ5&dSn>T}?kua#AT7EVY1$~l(O4a@TecQu?T!Q0`q5a=NelOnw#VznR)Fdb zTbP)a3io-RVBHxNxbk&7Gf)%CeCy7p^?L|%<5F*K!Kf70DoW9Y{ZRbB*q#1Xu7CCV z%K|apmOGJ@Yhw`dPyPn7jw*PM-WiDCmqMraD=2bs!1LEz;6=_0AtXx@7awbfk5;cy z!NQA!@n~vcXSFt5Z_NM?sZ_|lsSOX$?1bcq4MmnL4F5A6?yt+Y{ zUmqCBy>g2Vi3Dw&=)!-1)4MSq54!NtY~xwkRAtSW*um!q_^xSCPq2lZiF2NAWEKCv#?OWev%E;JXpQ8lCWr*KR6GU7K}+{_bnUtZgeA z^c(+y6gmy?mI4#JV>bf*Q>`m@ut5^uAaVieDQ}wk@yXTbxSeEbad^B>qdLk^)vey>%>k z4P~C&c>!XR{te$VZJfH);f6UVU{*Cny8-BL)Tv;;ggUkwkwDQ*DYJ%73&aOc|QtQ zciQ1i;%vO*a~Zf+>wu2+Vkk@r0ZaJ;G)Ke|Z0cN~$6-6@R?6d78x3*37>0|Ui$O6Y%1#2&DK1 zLfrfZ@XJ&Jm$j&3a^XC1`xt;0C&n`FMfFVTSRb_E$}3dmJ`pUS9d=zyfZb-wXw5)9 zrCsSqy#KDpJFeU?B)Z$*>!Z9A;<%HKtR#J9@@e%|@r>MzFX+XKBKT?I1hZpDf%}y~ zpUxg%n0+K1e(}yg!n{$STg^vrJ~=ZJ!%gU{C}T!rO&McJ>43+#yRadBE8MqCL&`-{A6@mPMu3nB8+4iU!dfh@Qebi-PAMeK<7z#M}#=HM#)m-}yg zUi;K0(yxPawR19m&S3-MxrQJA=Cow)=?Qwgfw#&$mot9c8IJy(tvjUI2aafQUZ+~{ zHA;l&2>~vs>Frdw_4O$18PrFQJPl>68b4?qw2cMCZ^BG z+Q-PlV$+gu|7E}0{1*xq4Ylt$hf3NA`;!E{rg8!!*ZG8L?o47j16rw+pW%N=`}up{ z#=m;+>}+fsaFNZOS#+4sO1Mmv=*E*?JH5!FzGVKh%n|>Sf0g6Ndm{u!2#gRIAuvK< zgun=a5dtFwMhN_Q0`kWq%(vecwUKz%&C0W(IBiX%IJBh!>*LSUL$)UM=cW9|zwmIX zgtb9UD4Q?5hSwW*j(;>PobTX!f{RVOSmX60hGbp&AOH71_hgaYKHDpvl^k+II&Za4 zI3b4`Nw(?{@@uv*vHCy@NATU3VO!hzbCL@PXuGCa-U(UHZJL!u>^m1o1ukaj>#a`o zrK??3LBbMJ;6XI+#hud}D~H?c@+02d*D^U=zorO|+LrgWE###kvA6tB%8-?yXWprC zi?z{r8E=zsBk}RZ82Zr`33S@#-JqLFEPC?AoM~JVMs>(lla3*4$fbVq#J0uBWY_ad z^5%d&A?O^*eHdsrB=)ZViTrUNovkz_6M1^Rid0RZ4&&G-i-PYP!NtY8c$7#x=!N&8 zRPS0w`g#PD*Xqrvm?zVXKARZ+BU6LEG6d0NMkhHVYB zq))~|K$&B+@>V@-_3K7ZVUmN z!M*dbE-k>hauD(teM0RE_n`96^BAY{V03tEC3D}5xD?g zo1oO=GR&3NflE(bps^Y0=+1}5VA<-7gRWWOp6gbS_2oUCxl(u7_7N5ohuGH5(d6yh zN07b#{gm&lX>{PaI_8JeAt1NuV$Dl~zDq^{@aA(X=yVO{pE`CDUi2Yw+!ndu^pt1@=uXG^egT}BfRSZ1uMSTz!VFt3_F;c>N;7ihE z{E0gW*Gzv5r;{_mTfY=+jm|+(!%FxB3Fw)dDJt#hrmb3Ym~|{wfQQTB;(Zx>^_vhb z8j!)t9<#7i$SU|)FpsgBZB53p(uRzGa8QuN+AsMwZCq#e=?|WqJ5}$wetLGKFW-@V zBbdr~_P9aECow#0Fc+Arqd8b;WdgKVV}Tc=uvwNC6viEf`_Y9^=k^dqd@evcIzB*G zxCVAiyA9>r55h-v1x(%|a7U>unB3n&yEumPW9qzztp68rP_p4}!<_at?Yx%6_k72Y zWd4?rcJleH3g+}+9FeaB#_)Ne3am3MM`y&Mkmh((Fqlo@_=*KsBqajAFS-ujo$YbP z!bgxc${2Lj&p<~0B&;=VJPt}e3{Dr0z>jz5L84*_I_?xqDJ1XZMu)`=iLQF#Tcwrt z`33B(Mdx_088!0McQ!Sjv7@YZMbUm+PNM0QAXYl`3HDabhZ|9iNNM^ll#?3=MQ`_OCVH@~dJOF#sS)kcO!J%naAoj;;`2Ix>&Xlb~x|cQ~#cMLmj4~y% zD{K9bvDeOB$~rryo9EouO&aenq1S&Mj1k}^iG(V)BENa2@NC~(&{fgFvOzgu@Hh!I zm!=?VR}7}JTp|065{O0rfaO7&SjaC1gpK&{l#qg#&z_@}+6J^K%ouqTFJbaNs?oE` z%c-hyE~L;A58m>LnM0<_{jAu*;c82eH!b9uUCEab<(dw)&nn^L-c(SYRs$>UzXX*< zH{j8)iSYHdH^fZKfGDds=N)9W3s zq3VQb5VTkoHo;*K{^*SZFS+89-UO)gtOmA;D$d|90Y-5Zh^uMf#FWA4A-p2E6)+#a z>J}JGDxm=Wt)+uL5v!3~j~hDJe2jTH>IPL4Do8lLTE`t#x12Rq728aS>>si@+Pxjd zw%S2XJe%`{r0LJYw0yrB<6!uPp1nU39TUC{kJgD{zcLcGs_cZggR5YJUKTpkvJ1@g zj4<*30DO*I1Wg_tkohGW3WILJG^2V@-z^KDC)+^KooK|4driM`nnx-q81Z8Li#b}M z1FVaC@`r4W7W$&g;!yWS)|c$h+|>nd`HF_C2-6F)l>4hgjCSb-)T~~DL<3%I{$MN!d-%1Fhg%IiaQydsgZW96@bPh-(2#Z; zz4AFnCkF%&C+S9R-R3bIok?{=;`{UaTxRO%cSd^eCYuMn(Xm?4zK2&Q>_NS;OJ|m; z4*Cg9ia;*Cy+}Dw8(xg516GnK7W_8o!{qIaen{S6q`5uJ&KN87++`wo`x|4K70wu& zJL7B7bFkadnOI)t3i!yWpps3W$oWchxu1Qr*oJF8hE)FF%kkSW{-Z3fBlJ1xJ)qC5 zlMzO$E5yKbemF#K$wG@h&PG4w?xNt3gGlb#TD0l;dGsPn8okJWNuM|*g*LCyh2n=d zAUdTPULUH0M@iN=-LnVgM9fA)H!$@gaSOLPNs1*}J!MG7u0P8C`?<*sSx)KC4!(Si z0i`pTFHK^vA=-Dm67{BZGGA^gFyhlInJVM0OwFuDW)6263g5DqX_%Nq2^XBCzGl5( z#;6bW{|mcN{;?9sx;Yg)<(!6Y2?><7axYcV_=qp1FU5&1JU%SGKfje>q3CWK!?lw* zZcS-Co7Rg&@ZIOsbdfkZ=0_V9mOGA~lcvJlZdyfeqaIP0qFv~$ty zw3W7dTfyixR53EyIsm0wI9xygAI&(7d{4B{yUmNpz&di+^#0%L;J2fVn})4X-cL@? z+wuHgYh;M(mUzB^u{ud6_K>5OvB_SsEWWkb9llwlA)i+^iMNGU%42WdMRtT=q89Zu zQI_|gGbi5!KvPpbTyi*%J}qly76yvZ8a79VZQl4t`Tuw>weg2lY~)YY_F0R$cTe(o zjb?kei+I4EA(_GQx*yGIS*^j^eA|wV#I<FMij+!*RR8%`Wg2;Jgun=a z5dtFwMhJ`$7$Go1V1&R3f&VuNNUhAbTCrM#t!W^{`|LE4U*RXk-=Ap7{g74tzs<(} z@6^P^ZUa`)u|(e3`+;QS-0PIw*=XvaXE~8Fe3ETYp2x%^<8nEsakV&UnXsH=rJ-c|ZNVm%bpKpd@zQ3S!63?({VRQk zWWVe`kvN42G<63m9oX6~BDDpsHHLoCW z6tC>;OP&hv5>Mx>26qAyWJS9x51Gy7AIbRN-Cu22%blLPo6MMYlRmWe8B zaA9o(OoD637ppRvW96wE=kAhuGc8C?cOzLIpG%5f6C?C?84{o0e>h9Bun#qi>e*we2^O|nxDPpv}I7ZbU}Y8!ulN)^)i1dm^llE_ugtIaSGtpIgP7 zASOpu=_xbo{an!9ZW^vVk;8<)4>q^`0y}nvgLCLU$heQ;L2enksJa`jD?9?-5PN9t zWTQ!k65xsH1YBGyht*pYVWmSOy`@fK*!)jUd_QhcJMh}}-O|ZiD?o8`Z;e^p)Q4Mt%hiLu&rXWVF{8#)oZ zj;dnWhIdfcl7pn8bJ60YB$PC6FG|apfmS>SW{SA;>1`?9jQ;#xFyC4oH;6c5Wg!cE z=Y}L+WWEw&%+E8@o2tnwy9vYA{ylOMSUmZ!+*QmNqJ4TO<$I@!xnCFv*ZrNaS>GbO z{-zi%``&?mn6#kuYr2rLzyQ&%Pw4_y85QDaM%AWXqWOs{kfTiwpqJzEI1eMNWH}Lw zxy#}H?l`2Tn?^lR*BmzfB2$goDFq?CrA7fn)fr{_gVtK){OlOiu4QA<#4)&8OcBem zTi|It11=IFuS2*( z5S-r;2F$OuuxRI2P`T&{W(|Q5##O;dSJbd}T{lc{8-rP!R^fZ*n)ul2!a+X>b9$3Q z4Zk|hW=P_n{Bg``?(sKskhHcoK-)MjquYqoi@Edw^e_kV8I&_`7n)RkkpZnq=(~wF zDAaw2#T`2Mdqgs5_`HGK3>Q4GatdBO8StuSQ?NN}CZ4v|3lFqoylI;}em=evz;Yvc zTd|!=TUX4{d$V9j^t^``RW0tU%(bzr(C3I4z2~XgrxRNrCz8^)Gbp}qKU3b#>c&kIJSPU2IdxChr*%{@ z($ig;M?c%qr+_q=>+}FJjpgyFgY&TT%EeetT^iq=I+y`QWhz8<9f7x-cfp0iNOY^q z1>Rh(g~HXx!Hlj3=YtA3sLB~LYLoFA)j(*y7{x>^@+0}J7kLw$I9!!$)R5{u+(*tV z_Q})+vu$_Ekdc#2pYCa1JB~+f;Yn7#u8?0LgR>Gw&thC zl`V3q;F+1yl(C>2^WcLYQr;m7b&Ixvp`19L87qswq}+k#%4v9mfF6FUAPOv{QaC(i z7e4JU4x=JdJaOS*E|RDMXhncNk2XQs4%^XzF_|cAh8-#{45gRvw;&qT<`0X{fcu9q zt1XfHI1in)h|O0_DMK9v`bcLS-5Gp|Sz~Su^@1ls!tVlXH&()}cOFCX*JgCpeH)CL zV1}P%KZ6t@35++hv3WxQa3&U@sG>x)gi%JzeH41&;2qWr*sf8friNP zVFEgwWsi3H9Y$x~8$ybo1so1K0DJCdzzLIsK>kv~ZL6YTl%g(bJUfO)s*i?M-~7Ez zsX8oW2Y4}j1;<#j@kl7;e?Np)+-kv;$Hg+MQvy)p@f39Ca}yI*vxr`Kp@f>1q(ook zP|VoSE|iy84L!X|c={a^%++wl>-Ihb4>>hhdvqN${;d-!(H&%2>=!$%^GMrE-&ke2 zHN5$k<;XoTt`yfulDXKpmyuA+r5}tDVrFic$HcEaP5Z1)pv3!osJYu2>PSTZRlBc% zNm-@~^R>+Ite-a6-aid0+aq9gjwVPR4`4oinMnqVm)P1AhYagH@@mC5n}fbJoZ@OV zzWEp(GVhWWeb+pjmd{D0mLx17yPV#V@_U2G1E;Q&qBDc2?v4s_^c^K)YRUk$VR=59 zD=2}dj}^kzt5V^P_+V^AkzkR*%f|jZir0BX}6=`LBI(r4ueLRJ#_!Y}Lb6b23u#H#WPmUaFavO`^SH;Kr$2@l`L%jT z^@qRx|3h(Yv^d|q+bG@o(;FpLKyNyGy-W>f*2yH!x7iO^Uq?j^iQYUEa*q7Q2!Rm- zBLqeWj1U+hFhXF2zzBg60{{0DXs$1`SuFU2!_qT_5?M%D%W?(OS&fnXTCMfh*Oc z&o7@jiSlq&M7gTd;X;%QWd5L-b0!}Nd6T&R$U0C}%VxExJmSIG5R#R!p9+3O(YHEE zkkZLsFtn_LiDfp39IZ*&#;o8HqIWH>0o!W+FL|roUal6h9iqd!X85nvNm4=<=auI} z;_0*yT0DIpBX%!}iL9bv)iE7>dHXF`v}7FORZx`hsC@31Id^TZOP8>0E=05D<~P{b zJlr#^Yli=d{QT91+;5vb$@Jb)jMacT`o-RdjyWBJMr|8R*B$`5k7>-Lb)%`GKzovz zFqSMBM-m^5Xr5^KOzxD~&U{t6olk8`#uHv9a`_1N86>*`)T`_3%8?~o8eWo+ttV9l~1F{ zsY^%Z`pJTSo4xwZjjPBo0&PL%OE;x0cNk6 zge?Y4a9K|^#O!c~hAA}=d~GtkQrZc#Gk&7|n}eD0D%P~Gq$b)-<{^PRJv82U3?pcG zkIeX`IBfR!-|9kpVW-i+G9z=*&#~6)N;Em#Bfk8Q`De&U+{^S5&N=km_aew&`~XLO zN#Iw4`q+Cr8!wtO9d8^}JZRI_#GU%$c%lAhwA9#+8KaelChxZe<6{SrQI`)RvsjA~ z!!w3e{s~8n%tsSjt=&^aIF62${EZFjRF7jVegB3E%JDvp1h|tyLg6%gK3W8k+KTw) zH7$IlQx<0$@Zg@&QP_RxI~cnxfoUCPsQ%?xw6A*}f{b(~K9x;pj=w{6cuNe6J?Z!F zKu5jJGQ^+Fnitc_Gm}cB)GyXECsPYhDvN?KuXtd;sR5la5P_EWy0EwXEBa}B0hK?$ zgiJWyOqIGkIyT`6dYdi?sX10)WHVR~YDvhq*$sUe{hSt9^@Bjd{=?e-4M+d5D%LAw zuQr}eR4sfdy z4sM~<-abIHn$v(Diz^7(dFrVVrqI6tVA* zLO3;E0GlU11(S)9AgEsf%`;xYVZ~EOY%p$))4K+0Q%o~IQE+Hv|E;f?Bot;oQ!mrz z^FTf4f~pX~oHL}F_r=oNq!4rYq$bi)ZAA(4<6xzWEwoMbgz>RNhgs zWD~}+8v6M2VEn7_HLH-w@lg;lup7L34#Feh3us=^Mig$Nz&J(SBwG)w65pgXxfwBz z|0QPq?|t6?>A7sW{(6S+E8s{5?ISN!MgkHNYLXY z{p8a$a{FRuUaLbR+h_ajA)O!0sgbwZP^Hd!_OpfG7L`Udho51#l~$mjaZVuasfN#< zmBw3B@4@kR@_0wuM9^&(hj%j!zF#gk%v8x~DBWEi z+zQp9Ro@>b%gEz3vd`d6TLp4Gm5K6V7r>HTlfZw&O1SZ1Kcp31L)Z5_ryrd!q<3GN z%^;^{#&p>pW@ceIOu~NcA!q|1dw#6|fB@@TTb|cy);Zoal8$3qwR;Md5ptT<8vWH7j7JCl7Rle3+R}OzGMz zX-3GWlj(9GnOW)b)RSceyg=^tVd*c~Z^ezsu6oO|AM0#0?-Z~jcoVph#a_IeMTd#! zpQ@?T?-w(>`s-1ah9USdgJbYfL*y8mOJ~`PWo|A9=FWI2q_nmX6|N6MU#s&N?X@?O8Kyjw}1TZzgAtg@LzD;)>F}jdz+ui??2Q?28K+b zC)Fg=N_NNTO|k}q{*-UYiezzeLkFAuRkeoNs1!xdzM{iCdh?EXX?GC`NqfR}n@Gsm zcLg0jZN}ISI8Y1hbpI0nnV!!RO}qLcES0OOS=9&qcxtwh#0wD>QfzYs;cmtwr;Xl9 zTA$4)-j-bDUA~yWQyz1K7d9@9aBY1>ok~_m`@M??bF^6w=H&?mslz%*@4YEAZL$w_ z^PMICl=jgf8MFS#`yZWOK6uMEYEdZnf=41B8LAV7-{kl?ay*`oSvGHZm^kmU<|6J?6BpjNC$UsZ%pRmEKLE4qv~i!(ILH~yVb;hgLdFp&6*$5FO0AVxo&_b)raA9u}?277ODBmP9ZeQ6H}jO z^R5Z*WT(DS|I5DLeZ=)KhnSdnCcUoRpV8-uC=B7lvRzy^`qK!Zb30h9)A{mMIE5SdlGZt$W20VKum)*(X{&`lC04-=-(DtnW;vO6xoz0&a(~+n{+;WD zwiUde=4q6T10dt&Q^4bX1CsW;jb7`NK~uFin9R7qc*T~Hw&v=8$vW-7Bl~afURv^i z*E3d`o^-4X(W9%O$Y2`y4FPd4$eOALT=gZ0?z$M(d_!IlWk#5#0+%Ii12J|c^E{@4aj)CT9Q}~R6l$EJQx)CFs7+^hKY4<-?j1mQh9I7;D}}A@ZN$=JzeAm^ zH0*pA2%F}!VDE)Mm^SqY8h0~--nw3rjCx;81!q~%0%Fst!YfY+1MWc{X#Icf-FZ|^ z-Tyf7MinZe0a2NnZg)EKv~T9AqC|rsB15K9p^%a^OGPSEG*KBEBt=Csrin*}3L%7K zOoe{@*0R>~v7Ybu_k2E|C*9v#yMN9-w?ps!x_j?)&ff2RZuiV*{8|ndt2(praYsaR zb0(qvOr8@BiJr{f;Cybe&UrBUcpu5t*twF^&O*r<*K%<8&|rya-T_d#rUeY;%>ybi ziR@|}5nc0@Vw5kRVuD6rr=N@zQWdFpaKC_l-QG{_k;^tB-dKu{7ir?*t1ZX_MnzOs zi#xmI<2>NM>wB!O<0Thojg!ziy(Gu-%Yf78p^^>EZ1A>&01M~KgO#Uq*a6P>Xe<99 zx@v9-ofc3>y__Wo?PH-CXHt3B<#^bH8|~zg&m))tMsFBB$J6Y<*+aqW z&J0je6AE5ei~%z?G;x9JnmD7c`rus-#?{xaW09Hv#djQPCIgSK`L zCM=q}ZyhRG)z>iftF!6(Ygxjvsw^I0-bvYsN3zyqR5;@0NU$T>8d|69eoOp`vn5dg=F;+n;FSmUU1WcoY@`oiS?><|kU6f%TlpbIWhE|!%PM_mF= zcYMGh6%!Dk=gh5Myb46k-vLzjOy?>_9${l0v$##`k8;P-CbPjokEmhm69^B71oZWz z!QIw>>ib0U-;aGM-nls&s~V(B6s8O(jXEw+Dm&EKe1raAknR=mAvFs0bE^T(kC%gw zUU6X5)JZGsfGR6-20r9tzGNtA?auYU7NFZ5i3*p73O<7Hpvx9Tqdo6jZt3LH$7B z2YK)=RvxH3UjfC_uY%i~AJj?zQR;Rwil& zEm+h}yrph_U+o|HkAHtbrft3O`pG1;VW^PMws}nMa}A}hv~)5mHDb1XXbJ0FXT%8} zf90;NYXo}-?g4&E>0FDuHb}nMTcSbeNeisC0cVhF9t9YUtpiG;R>B{AfY(vpCuHr#0cXIg~uCin* zs9Wj-p1z_b7L6_7wZkLMY)BZ_G37mwEq0M?*AEAicoyueC3Orbxx(JGRp8uiJZ4ZU zMOy!$CV9Vg0G3o%ExeIe+b!2U&9d8(UY72}8r?gT{}*MZQYM*|nWhDtOa}tZSIL|O z*Tu!Gddh_b1%lMk%Am0RE_+5nkB!)Lg;F#z6jDaMRJ`{J1`CvDQtu5Wo zc&|vNe2r(|8DM9(wbvi@-^Vagd~yAD{L+V4D0#)dKAY*?Vfu9T zt2yMU(mvSW>f_y-gN_)e3FBrcqtRE_;HA<2#8SIEq@JM#{QIr@L9Zt7QQ6P&B zlBnl-^=`ed>>u{kv~pXU;O#|6vE!j&WJ=C{Z0~dvGNttnJ*Id)JE`IsJNddQw?)~2 z)yw{#zntPm*|t(wpo_D9|M^(V%|jNc3IBUb0l#{x&%ks1{rsguDo>Fqhb^n9&dRL3&`VXmfvTeMW* z!LNPD?dK0rEvL6pN}mVO%{rRwYq{PW-M5BSN-bdSR)$a~@^bJ-`8C~sz4>c7c76X2 zl{4?n(QNJenCU4I{#DT#?|V89b5x#-P*au(YuopV{EWAtXIwh47t_a*uG}nUN`NDW zjvo%_;9xF6JA^5nmri}CuOu|kFtls<%YVGz|25`c_xm}Q7+EcuWcI@C`>nfqoaoHV z1)_Z$V+3yf%>M8B^Vk@Y&;RO+&OOkJ(p@aiy3h7sCUN%Oy`$&+8|8A$d1oo*{bjBF*@(j z!ScI^>y^``GX8~fVnL2?I}j*tCZhSx6F!gAxvMe@h&e|!7=H9oix1)u;F zfC5ke3P1rU00p1`6!`y1VCDJEqPWzZ7}Gg`TsL2tx>mJ_EE&niTVAdEZDUmbjaR=# zb?Az~PIC2QKj!J=)og|46=ut{1S;ZFJ=P1|D3$lPZJeAqFSwT(FNw1${jb#Tvv(Zo+xowW5d|$MLW2; zZvsGz;`ezLjZ$VV#V2boWlLopYEIW8s99qevoQ@^(#t2`pHX28Je{i9DLD=tcyg1~ zd(=i7q-PT^Ext)*{0HZU5Nf)o81omiz|hr;ByFo3LAcr!Zp?6$YcmdI8y>jR+J<)t zYtzSvQ;Uy~HtdWV`hzofc#F5s`~n5W|leDtc} z%#Jzb+-S`<5LbQ~SmuoZ&aT{o`^N>>?x4A zaDZf4P9T_D+06RBFK0_;9ASre_Tp3@4`J7?J5ALLAA|>-$ibJU8xz%X>i9lGJEU?= zzuz{F$m>WkIq2)gpBz9S_Bt<#XvCjUj8_14O@@*qx3wj6N>n8lsd0cCc8v=#z<~CE z7_Rqc2acMRz($STPEUG55F%TScw&`JY?={^KbqHw)V0S6%r)Zwu{H3&TK{=}NdF-M z+4_g#g3|-B=WSz1ug+(Td@%x+jh`af-9A;aZO2pz@VN`rL;XP4n@8ZXvMfm2wujx1 z%d;C#3+aoE{zTxFZFv3a6ZrDp8Cb?yKO}KJD-89>_{Y9O;?ME^`dTtoRxG!@5WDE~ zk*HYPK@vJUnBvLloVIQns1of3l0-ky&m#91Frn8V7s!qe zLUM}aHt~4LOFVt{TYRba9t;gk7pE;W{Vi+pN|u@<8qs}-oFsxejTqA&Ya^LSg(SD6 zPZn5LttheF(F&q8j3m83t4NOJYJ*6HL2TVdQ>yO5GUC(0eS}@#8uHxWIMVxRInjRR zHW8^Bgxd{{LmV4*eoGsO@sLBxOH1*X26ak%SQf40(84}ZD6^@>BKYsbcUF3I zyZ>xqKNE}9lacvoArWYmM8B?fVzcyygY73ffbX`mz_lU_M6Z|yoJHPXWs)Dbnmmq+ zSvQ@N;d^r???mj-NL#w5K#f^*q8DRO;!h1b5r(TwJA}CQ%M_k@$LqE}ls>uHaFszU zZ)lvXuq5pQ8ax}PE)UXU?^GplL3@&bUhGK82?KlZv7ZmeH>qLwoPWyJ+cdJ8m&~}O zy*C28oT%^5G&;$-JIJv*?X#JZm>T-jxKUJzUvGT(!!+SY@jc#)8FP$&ZEOF{K1TD6 z;tb~U9M8`ajFB{pmTDhHxppCL?H-2*l%Z7R5_R_3{TGdH(U?>qt-r3)cU;MORn@Eeb{(ci)CnKB!!YB|cOjBjP; zT_D*R;V57hs5{=JSV;5~a#M$ddcia3? zt*nBe&QA{cR_TdNp0t2iQAN;G-jy+ttrwX$R^yqSFU~VrR&v~9^Fpq8y&^X?T9v7| zRz^pEu48QyLOH{ene5^4S@f}-aw4ZcLTV$MNt0`t#Ac-e>|3CZ*!97?ZoRLG9i_u> z7ix-SneFI~9kaqAitg2BNzSm;CmiRlC;HZelJ{2FQhrG- z2Ex^&nCFWOQjidq~ug7E%;VOP~3VFMSs5YfxFl396%MdFtZ7&AuirA8k z>&Z>Vd306yVdf&G&L}Y#@*JWp$f|vc6%H6t1|qZ_m-_&A#!%^!gyKol+v3V zT4}g7vuRig{kg@7$`dM6Gqzh2eQxxY%5^0_%9q*6incGx#|WoG+ABepmQkEQ zMd*p>v38m?r|nF}mg-35x{@F9hgJp&>fGeej6)CbVY)mrJjSdLb9a}{t^-eTijuMwNntH|eK9dT_?MYmt?|EDp-e+LDi02F`%Pyh-*0Vn_k zpa2w*Rsqm)Sg;`>AL%?t<7>-}2&?tMxNgP-^nRRQw~w>^6DB+-RiwY6J|WxpFqIn; zO)nk#nkxK|LDWush$ubH@0RiL|2FQ!i*|~Of-~@itr+!kDUVLeuB7xHY-C(@7qMY) zPBX$|fz$@U2`o!@)NdbW`#WXRd6S7LuCAaq4x7t@Y-?_BZ4SHRKqaFweLtJNp^LeR z6;frko!FP%^1o$X@$ba^=VOZ2P9(P6Si@|Kl?7|EIzaDc2ap%Rv2E>^%*10U^z&#x zQd{Rb8q8y)GJZQ#e#%mtrfTbxEnHPuIWP!ta8CjPJX1TyUnWr1i!S$@8a+Kz_zauISkWt{R`t zTCJk#h)Ki9mfI%eO9PVFQgRAyy`C%;eNt^o7Djzjp!eBVvaTH=AT7d5vR8YMWXRz& zpul-ONII6qiGnQId7ls_Yl{|bw^*H8(Xa;(TYk7l#{U0Y_kX;8ICDJWs`ZXo(Xp4w zdT9dk3#UuebIl|j4{rnODt{1>sR3rVj0Mr@OE|TcJO&YaQLUFo5D%yL{q}XIzsCM! zzdCHDfL~%y=#yd0{dyj7zcfu^zg9`|c=btO^mq-}p-~OCsnZgVcO-}`I>Y2lD<%&Q ziN|@i22v6KGnFMmZJ$iM^4(>+tJIZSvMdOgubco*KaK)?)4mc9>v58X9fV}Cp9}#0 z=FIU)^N3+arD#?BBGD(>j9>Kr0B?)&)*d^zX*X>b4^5njPm%MZgdQZT=GMT;_u~PV zqI`g8%$CGQ@+3Z*_JA3j&QdqeQK#2#!?q-Li7s3|D8BwBNu01jON6fZ^xOL)<6>J; z_0@80?Yk^;t{Xy|oQq}hHyvXi-?+lfoo@hgFL{AE#jW5*R|eSgaRN6y_%uCg>1$FV zb|bU~UPo6>3c{Yohhpc(d_z=3RlmJ2dMR0nz72Rxp42I0R?8Z3m!IWu^m#2nMXckh zPIj`5*ig>9G@W~|o6Z@GaALnq*J2zHRmS1LB`T&hh>*Q^A9p%ZgU3CWV6~rge`{Y9 z-6z7>!Jrv)OI=7GoHw2$nvQ^IV@1i6>L<{3D~~O1k7AxpieYnik78B!*3zb5WvFPQQMkUFJ{tcB z7kz07>b9JvKll0G8Xe;15rgotZfS&Ddj>Jt_#Cm_U^+GGlOxkK#(*W4`7zIOBB)aq z)^xIM1iK?l$n7tg$X*HWO;;=&Ne>=(oU*t*o-7eA#)B|#WJZ@zRIY*lw)vyakrc9K z!XiA%{WGEJ*hYLaSxdxj-b>!Sb&`IL`!gfa47xpZ0Xb{ZeX>HR$;e+VV8p|2(et=@ zCjZGHcv(vPBCKP}8NG#QT@o{Sqmmc@+^ z+`+x=50Lw|J5UwZBFH=w0bZeZ2eZ-`NT%;DB73<$A$C6Y!)k7a{+2fS=lb&NYa7sC zQ2baVzHf6GDSNmRb@6-seJ%TG9Cws_&ufauliw<1b1k-tWAmeh`5FyknU5C{gB$_U zf9*g~>D6$){LH(LA?bWBsfBFKE0;QgWeL*H#x{#@YGEWlRyf zk#qsu=L_)nW0qrgLu8R-n`Qo`FIj!>wdprw9pSFxW8yN^EaalR0kZa(oTzph_uJ38 zKg%Rqf66SU>5=e5)OPU`bqd*ac~j2eN3mh3`(J>139A07W~{r;~p z{X6^e=Sz(14}LPMz7Z~r+!!g+*(4{rUVfABxpdHP$&>$gVui;-0Vn_kpa2wr0#E=7 zKmjNK1)u;F_&WmqXqIQ+Wh?w+P_6j={qso6%OQyV?FYhT&gwljR`qw{{&~zskR%vV zkdNp`(D*lfBchWE!c+9eqHVbwq;{-in#n0q-u~Oz$>+}GfMK(#rieUpuB`!?Ia-eL zm}yNqnrq=_ea(OC{LoL&zq6gIQ-fL=w3E+k1DGH^Z#KcGhH>FeQ=~gaEs5PpmhGI5 zn|<0Qweg>enTO|91kf{8`f{IJ6o8+nGuKjvFbbLEr_J67m?(glAY z2%}THwYj84+8~5Z=Hi=J_RHfqT03ke9d-O9)s(uQ(9)EV%5_o-Q`cavCP`FMyDb|u z<}{b{+5)uQ&;5?^ncB{ObJ3f|4 zRs4~k_6oc>eKdE0GCA;44n1`m$p-g&!VPOOl~|szmdqFs4%pn6Tw-tnyVawHDq#b# zWu`~|Wsa#QK8N|TFNH71IAgh9&>Up9D5tJN`Fr6EP*K%EZv<%f7<_M!DbeSrj$xE_rm|8EWm~*%UhGGGV3| zfhnutJ$g-1ShyJTZL6bXjY`?|`*5)Jc>=IHtO+px8LVXrO?8=O5_LiLvm%O@OJSNmYMbiN4aK@lOuPMdrTclRFL%WuDzgVMVSOXFBsdyK}EBV{fNU z@*3ZoG`80K$)csr9W;JQyKp9#O$L&;?b^5JgA}uZTaRa&eIq5 z_;tkW#N9$A_fT}8Z4>^MJw}YY`5Hf&nSjm;&&2}KE@HdqA+k0)j~J}PU^i7+Bzt#; z_&M*qP%X!jAM>hjkIz4%XEyNd6c&qC;$bZ02F`%Pyh-*0Vn_kpum4BpmN+w z5K`=kbj*~+dwqD1x6HQ0_eln$#Z^{4IzRMx%k#)=wK!KT0za&)N$on;m-c?Sk_zLA ziNy!qkTXMu^ynPj-zh^l{}Zy}b$_yIWdhyY8pV`)w9)N$Wn_kEDb}Z8x2SE=JF}^N zCf)XY_P-P3|8`9Ixn^X5QD3rkcr0C^7|VR)&t@)apQbY(?5B24nnYRK#*q3YTX3(m ziXNTs`k%@`H|-Pq7F6J!Ny$_;`;^JMu$WtI8p0K=C}87ud$P}W$1x9OeJRoQDva0v zf^eeWC8?@Yk1~geNUgn?&AoTjkYTEv`tcJ$w5AD=FV}#Pi;cl8|Iu9f+^_WF{nv@P zMb&6==vk?1*IMH<_)1FK(c5#9sXF=wJK=;sIFVlgt`w+CK3`Im%&6_)sFi)#vg@97 z)AB-MVOg+L<92H#<>}c zWtZ#Z z#w0N5q}>KPsmOzymGO*!XOI(8-eQhl->{ExIF(2o2r?qpn^#j8^ zRF>PBxPiVoGL<|eY{IOQqD1j;BK~EK^Z(Shb?5s#56Ot*Ti4@H0_~`&qfXIdJ#W(1 zS_9}sHPNJonkw_~)M++udM#^PxQH1YyOP{eu@80k-!5)hZz3%B+1%rO=;!yoh;E-x z1bV%e_Bw9PE({1`XRdp|toMIU<=^a0I@j%|Ixe4~PB~5>8-{yhZBIsH!|(1#BhmuJ zXTDwS@wx0Zn-(Id?mglqeSw~M5U_JRve+3>BiOBVLK^vW5vOl%5^0xVhi zbOl@(7-`kx_5VlTknI?1 zDwpw0uvF)mSgy7Ssh<~uBsE2eu1@Rye|?v&y*_%`H6_zi7Y~^^e97RO@<;F+O?&gO zni)Ob|8!^H2LEp;00p1`6o3Ly017|>C;$bZ02F`%Pyh-*0Vn_kpa2y3?*$B=+n6pb z@D^NtD=&7s|5@C&##(G{IaEkKu#t)}^8Z%}RioGPMf)PfPfN71vAXJbYLgpguZAJ# zjXebmGF7C0oZPbGvmkWo095zNUR+TeL*(bl5ZEap_TI=CVfd<2l`HU9pG9WBCxWtl>;odc&z`f?M+>Fgh&{;ZycD|5R-f%;Nlj-@Q{5P1(Ymbzz2ZTxadcs^ZB&fn|J zBv%gQf?6H8v7KkRoauTXkK}W!4X@Dy-j)-+3Ll}-@ljG&j}%+43roRPd>xueMVJIJ z2`iqm2d)eThGrpP;j#VPeWz{A#DVeDqlq>6t&%{g%fotVG+a16$P2yw=n%1B)=m02 z(#cASvpC`VD9&FlhV8mBhG|s(LOCoG6Zd=@r7nJpx;JKSp+m*DtJdR-O9#`;xEZWb zyMUXZ^qS2YyOJ>;(2E{-rHFJ0Q^Nc7&FsOzphq$mys>0zt}K51HG%x$^P&}mDgVQB}9%hbmN@m0(pIq|dW{$f4EtiZiE5HXF5=d2!-k;Uu^OOapo6WhMz;XKfQQuZId>oi*f_emfu)ZS>c8 z;XV|A0#E=7KmjNK1)u;FfC5ke3P1rU00p1`6o3Ly017|>DDdwId`PY3`M7Ec8^;ui zs{2%lVkcP%b%JoI%N6*aN|m(8QS{Y24*fhF;7*J!-k$ahr6>a=>r07fgXSiIs-Re^ z=I?$jhpNT|bXr0P5q`jwlIv$qopfv=?0lc#YO@Qmul=l$3%RaRJ*O0tdjNZzQcs>6 zA4(V9+(*B?RzVRR3iO8*Z7QZS4d(_Q6K`BGz-&KLC{_70KR<_idW*3{+k@o#t&?cw z=3sj8;h7Bns)NZ))?rqAWRT>w%cy$we&N?Olch2*uEshVnHy7qi*_WC0{gyn{kuXY zWvvVQl#F1stA|s1FV0|1WwGMA%@V1|ANcA1bK9`hQ24&o4%NJTo6tU?MBiPn#K=yu zV8)47(eab@$>U?ouw&seh}rlgsf_y2;>zzNa}Llq|pw>1O=b~6o3Ly017|>C;$bZ02F`%Pyh-* z0Vn_kpa2wr0#E=7KmjNqJpxFekCD&W2y!it9Wpz${Li!7zNSm zbtD=)y$N%ih2VkCF@&9P5*ahlhZv2jVpF2kqtYgpP9<(Fsxo)o1 z&%raV+W9k_Q^Y=9WoW=aFYJJ-0jBb10NP7*ShQ?)lGOJ_g(ENVoL_Afc5Y1&PtqNU z{2?+G&!0C;SaW}&SvzNM{A-^jZTm%{zQ+B>oZ)q@yT{LNb`i9<+VOL`a!t(^2N_CR ztS}xZ00p1`6o3Ly017|>C;$bZ02F`%Pyh-*0Vn_kpa2wr0#E=7KmjNK1)u;FfC5ke z3P1rU00p1`6o3Ly017|>C;$bZ02F`%Pyh-*0Vn_kpa2wr0#E=7KmjNK1)u;FfC5ke z3P1rU00p1`6o3Ly017|>C;$bZ02F`%Pyh-*0Vn_kpa2wr0#E=7KmjNK1)u;FfC5ke z3P1rU00p1`6o3Ly017|>C;$bZ02F`%Pyh-*0Vn_kpa2wr0#E=7KmjNK1)u;FfC5ke z3P1rU00p1`6o3Ly017|>C;$bZ02F`%Pyh-*0Vn_kpa2wr0#E=7KmjNK1)u;FfC5ke z3P1rU00p1`6o3Ly017|>C;$bZ02F`%Pyh-*0Vn_kpa2wr0#E=7KmjNK1)u;FfC5ke z3P1rU00p1`6o3Ly017|>C;$bZ02F`%Pyh-*0Vn_kpa2wr0#E=7KmjNK1)u;FfC5lJ HM&|zj@RWZQ literal 0 HcmV?d00001 From 8b9d06d340e40aa294ec4f6dec07addd0c72f294 Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Tue, 2 Jul 2024 14:19:34 +0000 Subject: [PATCH 06/56] feat: Add HDF5DataStore class for handling hdf5 files --- src/caked/dataloader.py | 126 +++++++++++++++++++++++++++++++--------- src/caked/hdf5_utils.py | 30 ++++++++++ 2 files changed, 128 insertions(+), 28 deletions(-) create mode 100644 src/caked/hdf5_utils.py diff --git a/src/caked/dataloader.py b/src/caked/dataloader.py index 4f35a17..e99f7aa 100644 --- a/src/caked/dataloader.py +++ b/src/caked/dataloader.py @@ -9,6 +9,7 @@ import os import random import typing +from concurrent.futures import ThreadPoolExecutor, as_completed from pathlib import Path import mrcfile @@ -19,6 +20,7 @@ from torch.utils.data import ConcatDataset, DataLoader, Subset from torchvision import transforms +from caked.hdf5_utils import HDF5DataStore from caked.Transforms.augments import ComposeAugment from caked.Transforms.transforms import ComposeTransform, DecomposeToSlices, Transforms @@ -329,14 +331,19 @@ def load(self, datapath, datatype, label_path=None, weight_path=None) -> None: Returns: None """ - paths = list(Path(datapath).rglob(f"*.{datatype}")) + datapath = Path(datapath) + label_path = Path(label_path) if label_path is not None else None + weight_path = Path(weight_path) if weight_path is not None else None + + datasets = [] + num_workers = 6 + + paths = list(datapath.rglob(f"*.{datatype}")) label_paths = ( - list(Path(label_path).rglob(f"*.{datatype}")) - if label_path is not None - else None + list(label_path.rglob(f"*.{datatype}")) if label_path is not None else None ) weight_paths = ( - list(Path(weight_path).rglob(f"*.{datatype}")) + list(weight_path.rglob(f"*.{datatype}")) if weight_path is not None else None ) @@ -366,14 +373,14 @@ def load(self, datapath, datatype, label_path=None, weight_path=None) -> None: ) paths = [ - Path(datapath) / p.name + datapath / p.name for p in paths for c in self.classes if c in p.name.split("_")[0] ] label_paths = ( [ - Path(label_path) / p.name + label_path / p.name for p in label_paths for c in self.classes if c in p.name.split("_")[0] @@ -383,7 +390,7 @@ def load(self, datapath, datatype, label_path=None, weight_path=None) -> None: ) weight_paths = ( [ - Path(weight_path) / p.name + weight_path / p.name for p in weight_paths for c in self.classes if c in p.name.split("_")[0] @@ -402,20 +409,46 @@ def load(self, datapath, datatype, label_path=None, weight_path=None) -> None: raise RuntimeError(msg) label_paths = label_paths if label_paths is not None else [None] * len(paths) weight_paths = weight_paths if weight_paths is not None else [None] * len(paths) - self.dataset = ConcatDataset( - [ - MapDataset( - path=path, - label_path=label_path, - weight_path=weight_path, - transforms=self.transformations, - augments=self.augmentations, + + raw_map_HDF5 = HDF5DataStore(datapath.joinpath("raw_map_data.h5")) + label_HDF5 = ( + HDF5DataStore(label_path.joinpath("label_data.h5")) + if label_paths is not None + else None + ) + weight_HDF5 = ( + HDF5DataStore(weight_path.joinpath("weight_data.h5")) + if weight_paths is not None + else None + ) + + with ThreadPoolExecutor(max_workers=num_workers) as executor: + futures = [ + executor.submit( + process_dataset, + path, + label_path, + weight_path, + self.transformations, + self.augmentations, ) for path, label_path, weight_path in zip( paths, label_paths, weight_paths ) ] - ) + + for future in as_completed(futures): + result = future.result() + raw_map_HDF5.add_array(*result["map_data"]) + if result["label_data"] and label_HDF5: + label_HDF5.add_array(*result["label_data"]) + if result["weight_data"] and weight_HDF5: + weight_HDF5.add_array(*result["weight_data"]) + datasets.append(result) # Collect processed datasets + + # Concat datasets if needed + concatenated_data = [dataset["map_data"][0] for dataset in datasets] + self.dataset = ConcatDataset(concatenated_data) def process(self, paths: list[str], datatype: str): """ @@ -651,15 +684,28 @@ def __init__( self.tiles_count: int = 0 self.transforms = transforms self.augments = augments + self.transform_kwargs = None if decompose_kwargs is None: decompose_kwargs = {"cshape": 64, "margin": 8} + if self.transform_kwargs is None: + self.transform_kwargs = {} + + if not decompose_kwargs.get("step", False): + decompose_kwargs["step"] = decompose_kwargs.get("cshape", 1) - ( + 2 * decompose_kwargs.get("margin") + ) + + self.decompose_kwargs = decompose_kwargs + def __len__(self): # TODO: The tile counts need to be calculated before __getitem__ is called # The amount of tiles is linked to the transformations applied to the map data # This would mean the best place to calculate the tile count would be in the __init__ # method and subsequently the transform method would need to be called there too - return self.tiles_count + + # 1 represents the full map + return self.tiles_count if self.tiles_count != 0 else 1 def __getitem__( self, idx @@ -667,16 +713,16 @@ def __getitem__( # start by loading the map data self.load_map_objects() - transforms_keywords = self.transform() + self.transform() _ = self.augment() # SEND TO HDF5 FILE to be saved, some will be duplicates so need to keep track of the duplicates if (self.slices is None) or (self.tiles is None): decompose = DecomposeToSlices( self.mapobj, - step=transforms_keywords.get("step"), - cshape=transforms_keywords.get("cshape"), - margin=transforms_keywords.get("margin"), + step=self.decompose_kwargs.get("step"), + cshape=self.decompose_kwargs.get("cshape"), + margin=self.decompose_kwargs.get("margin"), ) # TODO: move this self.slices = decompose.slices self.tiles = decompose.tiles @@ -706,9 +752,7 @@ def __getitem__( def _transform_keywords_builder(self): keywords = {} keywords.update(self.decompose_kwargs) - keywords["step"] = self.decompose_kwargs.get( - "step", (keywords.get("cshape") - (2 * keywords.get("margin"))) - ) + for transform in self.transforms: if transform == Transforms.MASKCROP.value: keywords["mask"] = self.label_mapobj @@ -773,10 +817,36 @@ def augment(self) -> None: return augment_kwargs - def transform(self) -> dict: + def transform(self): # TODO: Need to see if same transforms are applied to all map objects, maybe just voxel space normalisation transform_kwargs = self._transform_keywords_builder() if len(self.transforms) == 0: - return transform_kwargs + self.transform_kwargs = transform_kwargs + + self.transform_kwargs = ComposeTransform(self.transforms)( + self.mapobj, **transform_kwargs + ) + - return ComposeTransform(self.transforms)(self.mapobj, **transform_kwargs) +def process_dataset(path, label_path, weight_path, transformations, augmentations): + map_dataset = MapDataset( + path, + label_path=label_path, + weight_path=weight_path, + transforms=transformations, + augments=augmentations, + ) + map_dataset.load_map_objects() + map_dataset.transform() + map_dataset.augment() + result = { + "map_data": (map_dataset.mapobj.data, f"{path.stem}_map"), + "label_data": (map_dataset.label_mapobj.data, f"{path.stem}_label") + if label_path is not None + else None, + "weight_data": (map_dataset.weight_mapobj.data, f"{path.stem}_weight") + if weight_path is not None + else None, + } + map_dataset.close_map_objects() + return result diff --git a/src/caked/hdf5_utils.py b/src/caked/hdf5_utils.py new file mode 100644 index 0000000..9b431bd --- /dev/null +++ b/src/caked/hdf5_utils.py @@ -0,0 +1,30 @@ +# create a class to handle hdf5 files +import h5py +import numpy as np + +# Want to take an input of np arrays and store them in a hdf5 file + +# also + + +class HDF5DataStore: + def __init__(self, save_path): + self.save_path = save_path + + def add_array(self, array, dataset_name, compression="gzip"): + with h5py.File(self.save_path, "a") as f: # Open in append mode + f.create_dataset(dataset_name, data=array, compression=compression) + print(f"Dataset {dataset_name} added to {self.save_path}") + + def save(self, array_list): + for i, array in enumerate(array_list): + self.add_array(array, f"array_{i}") + + +# Assuming raw_map_HDF5, label_HDF5, and weight_HDF5 are instances of BatchHDF5Writer +# Initialize them with the HDF5 file and desired batch size + +# load in map dataset, perform the transformations inside the dataset init + + +# first test of loading the files one by one took 13 minutes 10 seconds From d8729a104d953a5b23ef1a541d3891865c43cea7 Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Thu, 4 Jul 2024 13:19:28 +0000 Subject: [PATCH 07/56] Add pytest configuration file and conftest.py for test fixtures --- tests/conftest.py | 26 +++++++++++++++ tests/pytest.ini | 3 ++ tests/test_disk_io.py | 78 +++++++++++++++++++++---------------------- 3 files changed, 67 insertions(+), 40 deletions(-) create mode 100644 tests/conftest.py create mode 100644 tests/pytest.ini diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..6cbff2b --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,26 @@ +from __future__ import annotations + +from pathlib import Path + +import pytest + + +@pytest.fixture(scope="session") +def test_data_mrc_dir(): + """Fixture to provide the MRC test data directory.""" + return Path(Path(__file__).parent.joinpath("testdata_mrc")) + + +@pytest.fixture(scope="session") +def test_data_npy_dir(): + """Fixture to provide the NPY test data directory.""" + return Path(Path(__file__).parent.joinpath("testdata_npy")) + + +@pytest.fixture(scope="session") +def test_corrupt_file(): + """Fixture to provide the path to a corrupt file for testing.""" + return Path(__file__).parent / "corrupt.mrc" + + + diff --git a/tests/pytest.ini b/tests/pytest.ini new file mode 100644 index 0000000..b0e5a94 --- /dev/null +++ b/tests/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +filterwarnings = + ignore::DeprecationWarning \ No newline at end of file diff --git a/tests/test_disk_io.py b/tests/test_disk_io.py index f9a4a7e..b5729d9 100644 --- a/tests/test_disk_io.py +++ b/tests/test_disk_io.py @@ -5,13 +5,11 @@ import numpy as np import pytest import torch -from tests import testdata_mrc, testdata_npy from caked.dataloader import DiskDataLoader, DiskDataset ORIG_DIR = Path.cwd() -TEST_DATA_MRC = Path(testdata_mrc.__file__).parent -TEST_DATA_NPY = Path(testdata_npy.__file__).parent + TEST_CORRUPT = Path(__file__).parent / "corrupt.mrc" DISK_PIPELINE = "disk" DATASET_SIZE_ALL = None @@ -47,36 +45,36 @@ def test_class_instantiation(): assert test_loader.pipeline == DISK_PIPELINE -def test_dataset_instantiation_mrc(): +def test_dataset_instantiation_mrc(test_data_mrc_dir): """ Test case for instantiating a DiskDataset with MRC data. """ - test_dataset = DiskDataset(paths=TEST_DATA_MRC, datatype=DATATYPE_MRC) + test_dataset = DiskDataset(paths=test_data_mrc_dir, datatype=DATATYPE_MRC) assert isinstance(test_dataset, DiskDataset) -def test_dataset_instantiation_npy(): +def test_dataset_instantiation_npy(test_data_npy_dir): """ Test case for instantiating a DiskDataset with npy datatype. """ - test_dataset = DiskDataset(paths=TEST_DATA_MRC, datatype=DATATYPE_MRC) + test_dataset = DiskDataset(paths=test_data_npy_dir, datatype=DATATYPE_MRC) assert isinstance(test_dataset, DiskDataset) -def test_load_dataset_no_classes(): +def test_load_dataset_no_classes(test_data_mrc_dir): """ Test case for loading dataset without specifying classes. """ test_loader = DiskDataLoader( pipeline=DISK_PIPELINE, classes=DISK_CLASSES_NONE, dataset_size=DATASET_SIZE_ALL ) - test_loader.load(datapath=TEST_DATA_MRC, datatype=DATATYPE_MRC) + test_loader.load(datapath=test_data_mrc_dir, datatype=DATATYPE_MRC) assert isinstance(test_loader.dataset, DiskDataset) assert len(test_loader.classes) == len(DISK_CLASSES_FULL_MRC) assert all(a == b for a, b in zip(test_loader.classes, DISK_CLASSES_FULL_MRC)) -def test_load_dataset_all_classes_mrc(): +def test_load_dataset_all_classes_mrc(test_data_mrc_dir): """ Test case for loading a dataset with all classes using DiskDataLoader. """ @@ -85,13 +83,13 @@ def test_load_dataset_all_classes_mrc(): classes=DISK_CLASSES_FULL_MRC, dataset_size=DATASET_SIZE_ALL, ) - test_loader.load(datapath=TEST_DATA_MRC, datatype=DATATYPE_MRC) + test_loader.load(datapath=test_data_mrc_dir, datatype=DATATYPE_MRC) assert isinstance(test_loader.dataset, DiskDataset) assert len(test_loader.classes) == len(DISK_CLASSES_FULL_MRC) assert all(a == b for a, b in zip(test_loader.classes, DISK_CLASSES_FULL_MRC)) -def test_load_dataset_all_classes_npy(): +def test_load_dataset_all_classes_npy(test_data_npy_dir): """ Test case for loading a dataset with all classes using npy files. @@ -106,13 +104,13 @@ def test_load_dataset_all_classes_npy(): classes=DISK_CLASSES_FULL_NPY, dataset_size=DATASET_SIZE_ALL, ) - test_loader.load(datapath=TEST_DATA_NPY, datatype=DATATYPE_NPY) + test_loader.load(datapath=test_data_npy_dir, datatype=DATATYPE_NPY) assert isinstance(test_loader.dataset, DiskDataset) assert len(test_loader.classes) == len(DISK_CLASSES_FULL_NPY) assert all(a == b for a, b in zip(test_loader.classes, DISK_CLASSES_FULL_NPY)) -def test_load_dataset_some_classes(): +def test_load_dataset_some_classes(test_data_mrc_dir): """ Test case for loading a dataset with some specific classes using DiskDataLoader. """ @@ -121,13 +119,13 @@ def test_load_dataset_some_classes(): classes=DISK_CLASSES_SOME_MRC, dataset_size=DATASET_SIZE_ALL, ) - test_loader.load(datapath=TEST_DATA_MRC, datatype=DATATYPE_MRC) + test_loader.load(datapath=test_data_mrc_dir, datatype=DATATYPE_MRC) assert isinstance(test_loader.dataset, DiskDataset) assert len(test_loader.classes) == len(DISK_CLASSES_SOME_MRC) assert all(a == b for a, b in zip(test_loader.classes, DISK_CLASSES_SOME_MRC)) -def test_load_dataset_missing_class(): +def test_load_dataset_missing_class(test_data_mrc_dir): """ Test case for loading dataset with missing classes. """ @@ -137,10 +135,10 @@ def test_load_dataset_missing_class(): dataset_size=DATASET_SIZE_ALL, ) with pytest.raises(Exception, match=r".*Missing classes: .*"): - test_loader.load(datapath=TEST_DATA_MRC, datatype=DATATYPE_MRC) + test_loader.load(datapath=test_data_mrc_dir, datatype=DATATYPE_MRC) -def test_one_image(): +def test_one_image(test_data_mrc_dir): """ Test case for loading one image using DiskDataLoader. @@ -149,14 +147,14 @@ def test_one_image(): test_loader = DiskDataLoader( pipeline=DISK_PIPELINE, classes=DISK_CLASSES_NONE, dataset_size=DATASET_SIZE_ALL ) - test_loader.load(datapath=TEST_DATA_MRC, datatype=DATATYPE_MRC) + test_loader.load(datapath=test_data_mrc_dir, datatype=DATATYPE_MRC) test_dataset = test_loader.dataset test_item_image, test_item_name = test_dataset.__getitem__(1) assert test_item_name in DISK_CLASSES_FULL_MRC assert isinstance(test_item_image, torch.Tensor) -def test_get_loader_training_false(): +def test_get_loader_training_false(test_data_mrc_dir): """ Test case for the `get_loader` method of the `DiskDataLoader` class when `training` is set to False. """ @@ -166,12 +164,12 @@ def test_get_loader_training_false(): dataset_size=DATASET_SIZE_ALL, training=False, ) - test_loader.load(datapath=TEST_DATA_MRC, datatype=DATATYPE_MRC) + test_loader.load(datapath=test_data_mrc_dir, datatype=DATATYPE_MRC) torch_loader = test_loader.get_loader(batch_size=64) assert isinstance(torch_loader, torch.utils.data.DataLoader) -def test_get_loader_training_true(): +def test_get_loader_training_true(test_data_mrc_dir): """ Test case for the `get_loader` method of the `DiskDataLoader` class when training is set to True. """ @@ -181,7 +179,7 @@ def test_get_loader_training_true(): dataset_size=DATASET_SIZE_ALL, training=True, ) - test_loader.load(datapath=TEST_DATA_MRC, datatype=DATATYPE_MRC) + test_loader.load(datapath=test_data_mrc_dir, datatype=DATATYPE_MRC) torch_loader_train, torch_loader_val = test_loader.get_loader( split_size=0.8, batch_size=64 ) @@ -189,7 +187,7 @@ def test_get_loader_training_true(): assert isinstance(torch_loader_val, torch.utils.data.DataLoader) -def test_get_loader_training_fail(): +def test_get_loader_training_fail(test_data_mrc_dir): """ Test case for the `get_loader` method of the `DiskDataLoader` class when training fails. @@ -201,14 +199,14 @@ def test_get_loader_training_fail(): dataset_size=DATASET_SIZE_ALL, training=True, ) - test_loader.load(datapath=TEST_DATA_MRC, datatype=DATATYPE_MRC) + test_loader.load(datapath=test_data_mrc_dir, datatype=DATATYPE_MRC) with pytest.raises(Exception, match=r".* sets must be larger than .*"): torch_loader_train, torch_loader_val = test_loader.get_loader( split_size=1, batch_size=64 ) -def test_processing_data_all_transforms(): +def test_processing_data_all_transforms(test_data_mrc_dir): """ Test the processing of data with all transforms applied. @@ -225,7 +223,7 @@ def test_processing_data_all_transforms(): training=True, transformations=TRANSFORM_ALL, ) - test_loader.load(datapath=TEST_DATA_MRC, datatype=DATATYPE_MRC) + test_loader.load(datapath=test_data_mrc_dir, datatype=DATATYPE_MRC) assert test_loader.dataset.normalise assert test_loader.dataset.shiftmin assert test_loader.dataset.gaussianblur @@ -235,7 +233,7 @@ def test_processing_data_all_transforms(): assert label in DISK_CLASSES_FULL_MRC -def test_processing_data_some_transforms_npy(): +def test_processing_data_some_transforms_npy(test_data_npy_dir): """ Test case for processing data with some transformations using the DiskDataLoader class. @@ -256,8 +254,8 @@ def test_processing_data_some_transforms_npy(): dataset_size=DATASET_SIZE_ALL, training=True, ) - test_loader_none.load(datapath=TEST_DATA_NPY, datatype=DATATYPE_NPY) - test_loader_transf.load(datapath=TEST_DATA_NPY, datatype=DATATYPE_NPY) + test_loader_none.load(datapath=test_data_npy_dir, datatype=DATATYPE_NPY) + test_loader_transf.load(datapath=test_data_npy_dir, datatype=DATATYPE_NPY) assert test_loader_transf.dataset.normalise assert not test_loader_transf.dataset.shiftmin assert test_loader_transf.dataset.gaussianblur @@ -273,7 +271,7 @@ def test_processing_data_some_transforms_npy(): assert len(image_none[1]) == len(image_transf[1]) -def test_processing_data_rescale(): +def test_processing_data_rescale(test_data_mrc_dir): """ Test the processing of data with rescaling. @@ -288,7 +286,7 @@ def test_processing_data_rescale(): training=True, transformations=TRANSFORM_ALL_RESCALE, ) - test_loader.load(datapath=TEST_DATA_MRC, datatype=DATATYPE_MRC) + test_loader.load(datapath=test_data_mrc_dir, datatype=DATATYPE_MRC) assert test_loader.dataset.normalise assert test_loader.dataset.shiftmin assert test_loader.dataset.gaussianblur @@ -305,7 +303,7 @@ def test_processing_data_rescale(): training=True, transformations=TRANSFORM_RESCALE, ) - test_loader.load(datapath=TEST_DATA_MRC, datatype=DATATYPE_MRC) + test_loader.load(datapath=test_data_mrc_dir, datatype=DATATYPE_MRC) assert not test_loader.dataset.normalise assert not test_loader.dataset.shiftmin assert not test_loader.dataset.gaussianblur @@ -316,7 +314,7 @@ def test_processing_data_rescale(): assert label in DISK_CLASSES_FULL_MRC -def test_processing_after_load(): +def test_processing_after_load(test_data_mrc_dir): """ Test the processing steps after loading data using DiskDataLoader. """ @@ -327,14 +325,14 @@ def test_processing_after_load(): training=False, ) test_loader.debug = True - test_loader.load(datapath=TEST_DATA_MRC, datatype=DATATYPE_MRC) + test_loader.load(datapath=test_data_mrc_dir, datatype=DATATYPE_MRC) assert test_loader.transformations is None assert not test_loader.dataset.normalise assert not test_loader.dataset.shiftmin assert not test_loader.dataset.gaussianblur test_loader.transformations = TRANSFORM_ALL_RESCALE pre_dataset = test_loader.dataset - test_loader.load(datapath=TEST_DATA_MRC, datatype=DATATYPE_MRC) + test_loader.load(datapath=test_data_mrc_dir, datatype=DATATYPE_MRC) post_dataset = test_loader.dataset assert test_loader.dataset.normalise assert test_loader.dataset.shiftmin @@ -346,7 +344,7 @@ def test_processing_after_load(): assert not torch.equal(pre_image, post_image) -def test_drop_last(): +def test_drop_last(test_data_mrc_dir): """ Test the drop_last parameter in the get_loader method of the DiskDataLoader class. """ @@ -356,7 +354,7 @@ def test_drop_last(): dataset_size=DATASET_SIZE_ALL, training=True, ) - test_loader.load(datapath=TEST_DATA_MRC, datatype=DATATYPE_MRC) + test_loader.load(datapath=test_data_mrc_dir, datatype=DATATYPE_MRC) loader_train_true, loader_val_true = test_loader.get_loader( split_size=0.7, batch_size=64, no_val_drop=True ) @@ -369,11 +367,11 @@ def test_drop_last(): assert loader_val_false.drop_last -def test_corrupt_mrcfile(): +def test_corrupt_mrcfile(test_data_mrc_dir): """ Test that corrupt mrcfiles are not loaded and throw an exception. """ - test_dataset = DiskDataset(paths=TEST_DATA_MRC, datatype=DATATYPE_MRC) + test_dataset = DiskDataset(paths=test_data_mrc_dir, datatype=DATATYPE_MRC) assert isinstance(test_dataset, DiskDataset) with pytest.raises(Exception, match=r".* corrupted."): test_dataset.read(TEST_CORRUPT) From 1a6ed24864eff89ee313b55051f09fd571b57200 Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Tue, 9 Jul 2024 16:43:28 +0000 Subject: [PATCH 08/56] Added a dataset config class to have all the defaults in one place --- src/caked/base.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/caked/base.py b/src/caked/base.py index 66e9a72..17a768c 100644 --- a/src/caked/base.py +++ b/src/caked/base.py @@ -1,9 +1,12 @@ from __future__ import annotations from abc import ABC, abstractmethod +from pathlib import Path from torch.utils.data import Dataset +from caked.hdf5 import HDF5DataStore + class AbstractDataLoader(ABC): """ @@ -88,3 +91,26 @@ class AbstractDataset(ABC, Dataset): @abstractmethod def augment(self, augment: bool, aug_type: str): pass + + +class DatasetConfig: + datatype: str = "mrc" + label_path: str | Path | None = None + weight_path: str | Path | None = None + dataset_size: int | None = None + save_to_disk: bool = False + training: bool = True + classes: list[str] | None = None + pipeline: str = "disk" + transforms: list[str] | None = None + augments: list[str] | None = None + decompose: bool = True + decompose_kwargs: dict[str, int] | None = None + transform_kwargs: dict | None = None + augment_kwargs: dict | None = None + map_hdf5_store: HDF5DataStore | None = None + label_hdf5_store: HDF5DataStore | None = None + weight_hdf5_store: HDF5DataStore | None = None + slices: list[tuple[int, int, int]] | None = None + tiles = None + tiles_count: int = 0 From c8486b88a365f1000784dfded5f1305f495798a7 Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Tue, 9 Jul 2024 16:44:29 +0000 Subject: [PATCH 09/56] Create a class for lazy-loading a hdf5 file --- src/caked/hdf5.py | 65 +++++++++++++++++++++++++++++++++++++++++ src/caked/hdf5_utils.py | 30 ------------------- 2 files changed, 65 insertions(+), 30 deletions(-) create mode 100644 src/caked/hdf5.py delete mode 100644 src/caked/hdf5_utils.py diff --git a/src/caked/hdf5.py b/src/caked/hdf5.py new file mode 100644 index 0000000..709548f --- /dev/null +++ b/src/caked/hdf5.py @@ -0,0 +1,65 @@ +from __future__ import annotations + +import h5py +import numpy as np + + +class HDF5DataStore: + def __init__(self, save_path: str): + self.save_path = save_path + + def __getitem__(self, key: str): + with h5py.File(self.save_path, "r") as f: + return np.array(f[key]) + + def get(self, key: str, default=None): + try: + with h5py.File(self.save_path, "r") as f: + return np.array(f[key]) + except KeyError: + return default + + def __len__(self): + return len(self.keys()) + + def add_array( + self, array: np.ndarray, dataset_name: str, compression: str = "gzip" + ) -> str: + if self.check_name_in_store(dataset_name): + dataset_name = self._add_number_to_dataset_name(dataset_name) + with h5py.File(self.save_path, "a") as f: # Open in append mode + f.create_dataset(dataset_name, data=array, compression=compression) + + return dataset_name + + def save(self, array_list: list[np.ndarray]): + for i, array in enumerate(array_list): + self.add_array(array, f"array_{i}") + + def check_name_in_store(self, dataset_name: str): + if not self.save_path.exists(): + return False + with h5py.File(self.save_path, "r") as f: + return dataset_name in f + + def _add_number_to_dataset_name(self, dataset_name: str, delimiter: str = "--"): + # add a number to the end of the dataset name, take the last number and increment it + existing_names = [name for name in self.keys() if dataset_name in name] + last_number = ( + max( + [ + int(name.split(delimiter)[0]) + for name in existing_names + if delimiter in name + ] + ) + if len(existing_names) > 1 + else 0 + ) + + # dataset_name = dataset_name.split(delimiter)[0:-1] + return f"{last_number+1}{delimiter}{dataset_name}" + + def keys(self): + with h5py.File(self.save_path, "r") as f: + return list(f.keys()) diff --git a/src/caked/hdf5_utils.py b/src/caked/hdf5_utils.py deleted file mode 100644 index 9b431bd..0000000 --- a/src/caked/hdf5_utils.py +++ /dev/null @@ -1,30 +0,0 @@ -# create a class to handle hdf5 files -import h5py -import numpy as np - -# Want to take an input of np arrays and store them in a hdf5 file - -# also - - -class HDF5DataStore: - def __init__(self, save_path): - self.save_path = save_path - - def add_array(self, array, dataset_name, compression="gzip"): - with h5py.File(self.save_path, "a") as f: # Open in append mode - f.create_dataset(dataset_name, data=array, compression=compression) - print(f"Dataset {dataset_name} added to {self.save_path}") - - def save(self, array_list): - for i, array in enumerate(array_list): - self.add_array(array, f"array_{i}") - - -# Assuming raw_map_HDF5, label_HDF5, and weight_HDF5 are instances of BatchHDF5Writer -# Initialize them with the HDF5 file and desired batch size - -# load in map dataset, perform the transformations inside the dataset init - - -# first test of loading the files one by one took 13 minutes 10 seconds From 3779ee4c59ff64026d8cc6d52ee66583ae551ef8 Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Tue, 9 Jul 2024 16:46:06 +0000 Subject: [PATCH 10/56] Moved multi-processing to utils, Added an array dataset which can load arrays into the hdf5 without a file. --- src/caked/dataloader.py | 647 +++++++++++++++++++++++++--------------- 1 file changed, 414 insertions(+), 233 deletions(-) diff --git a/src/caked/dataloader.py b/src/caked/dataloader.py index e99f7aa..c63ebb1 100644 --- a/src/caked/dataloader.py +++ b/src/caked/dataloader.py @@ -9,22 +9,25 @@ import os import random import typing -from concurrent.futures import ThreadPoolExecutor, as_completed from pathlib import Path import mrcfile import numpy as np import torch -from ccpem_utils.map.parse_mrcmapobj import get_mapobjhandle +from ccpem_utils.map.parse_mrcmapobj import MapObjHandle, get_mapobjhandle +from ccpem_utils.other.utils import set_gpu from scipy.ndimage import zoom from torch.utils.data import ConcatDataset, DataLoader, Subset from torchvision import transforms -from caked.hdf5_utils import HDF5DataStore +from caked.base import AbstractDataLoader, AbstractDataset, DatasetConfig +from caked.hdf5 import HDF5DataStore from caked.Transforms.augments import ComposeAugment from caked.Transforms.transforms import ComposeTransform, DecomposeToSlices, Transforms - -from .base import AbstractDataLoader, AbstractDataset +from caked.utils import ( + filter_and_construct_paths, + process_datasets, +) np.random.seed(42) TRANSFORM_OPTIONS = ["normalise", "gaussianblur", "shiftmin"] @@ -262,9 +265,11 @@ def __init__( pipeline: str = "disk", transformations: list[str] | None = None, augmentations: list[str] | None = None, + decompose: bool = True, ) -> None: """ DataLoader implementation for loading map data from disk. + """ self.dataset_size = dataset_size self.save_to_disk = save_to_disk @@ -272,6 +277,7 @@ def __init__( self.pipeline = pipeline self.transformations = transformations self.augmentations = augmentations + self.decompose = decompose self.debug = False self.classes = classes @@ -282,61 +288,51 @@ def __init__( if self.augmentations is None: self.augmentations = [] - def __add__(self, other): - if not isinstance(other, MapDataLoader): - msg = "Can only add two MapDataLoader objects together." - raise TypeError(msg) - if self.pipeline != other.pipeline: - msg = "Both MapDataLoader objects must use the same pipeline." - raise ValueError(msg) - if self.transformations != other.transformations: - msg = "Both MapDataLoader objects must use the same transformations." - raise ValueError(msg) - if self.augmentations != other.augmentations: - msg = "Both MapDataLoader objects must use the same augmentations." - raise ValueError(msg) - if self.classes != other.classes: - msg = "Both MapDataLoader objects must use the same classes." - raise ValueError(msg) - if self.dataset_size != other.dataset_size: - msg = "Both MapDataLoader objects must use the same dataset size." - raise ValueError(msg) - if self.save_to_disk != other.save_to_disk: - msg = "Both MapDataLoader objects must use the same save to disk option." - raise ValueError(msg) - if self.training != other.training: - msg = "Both MapDataLoader objects must use the same training option." - raise ValueError(msg) - - new_loader = MapDataLoader( - dataset_size=self.dataset_size, - save_to_disk=self.save_to_disk, - training=self.training, - classes=self.classes, - pipeline=self.pipeline, - transformations=self.transformations, - augmentations=self.augmentations, - ) - new_loader.dataset = ConcatDataset([self.dataset, other.dataset]) - return new_loader - - def load(self, datapath, datatype, label_path=None, weight_path=None) -> None: + def load( + self, + datapath: str | Path, + datatype: str, + label_path: str | Path | None = None, + weight_path: str | Path | None = None, + use_gpu: bool = False, + num_workers: int = 1, + ) -> None: """ Load the data from the specified path and data type. Args: - datapath (str): The path to the directory containing the data. + datapath (str | Path): The path to the directory containing the data. datatype (str): The type of data to load. + label_path (str | Path, optional): The path to the directory containing the labels. Defaults to None. + weight_path (str | Path, optional): The path to the directory containing the weights. Defaults to None. + multi_process (bool, optional): Whether to use multi-processing. Defaults to False. + use_gpu (bool, optional): Whether to use the GPU. Defaults to False. Returns: None """ + if use_gpu and num_workers > 1: + msg = "Cannot use GPU and multi-process at the same time." + raise ValueError(msg) + if use_gpu: + set_gpu() + datapath = Path(datapath) label_path = Path(label_path) if label_path is not None else None weight_path = Path(weight_path) if weight_path is not None else None + map_hdf5_store = HDF5DataStore(datapath.joinpath("raw_map_data.h5")) + label_hdf5_store = ( + HDF5DataStore(label_path.joinpath("label_data.h5")) + if label_path is not None + else None + ) + weight_hdf5_store = ( + HDF5DataStore(weight_path.joinpath("weight_data.h5")) + if weight_path is not None + else None + ) datasets = [] - num_workers = 6 paths = list(datapath.rglob(f"*.{datatype}")) label_paths = ( @@ -353,6 +349,9 @@ def load(self, datapath, datatype, label_path=None, weight_path=None) -> None: # ids right now depend on the data being saved with a certain format (id in the first part of the name, separated by _) # TODO: make this more general/document in the README + + # TODO: this won't be how it works for multi classifciation tasks I'm guessing so need to include the ID + # generation from mlToolkit ids = np.unique([file.name.split("_")[0] for file in paths]) if len(self.classes) == 0: self.classes = ids @@ -378,25 +377,10 @@ def load(self, datapath, datatype, label_path=None, weight_path=None) -> None: for c in self.classes if c in p.name.split("_")[0] ] - label_paths = ( - [ - label_path / p.name - for p in label_paths - for c in self.classes - if c in p.name.split("_")[0] - ] - if label_path is not None - else None - ) - weight_paths = ( - [ - weight_path / p.name - for p in weight_paths - for c in self.classes - if c in p.name.split("_")[0] - ] - if weight_path is not None - else None + paths = filter_and_construct_paths(datapath, paths, self.classes) + label_paths = filter_and_construct_paths(label_path, label_paths, self.classes) + weight_paths = filter_and_construct_paths( + weight_path, weight_paths, self.classes ) if self.dataset_size is not None: paths = paths[: self.dataset_size] @@ -404,68 +388,34 @@ def load(self, datapath, datatype, label_path=None, weight_path=None) -> None: if label_paths is not None and len(label_paths) != len(paths): msg = "Label paths and data paths do not match." raise RuntimeError(msg) + if weight_paths is not None and len(weight_paths) != len(paths): msg = "Weight paths and data paths do not match." raise RuntimeError(msg) + label_paths = label_paths if label_paths is not None else [None] * len(paths) weight_paths = weight_paths if weight_paths is not None else [None] * len(paths) - raw_map_HDF5 = HDF5DataStore(datapath.joinpath("raw_map_data.h5")) - label_HDF5 = ( - HDF5DataStore(label_path.joinpath("label_data.h5")) - if label_paths is not None - else None + # HDF5 store assumes the data is all in one location + + datasets = process_datasets( + num_workers, + paths, + label_paths, + weight_paths, + self.transformations, + self.augmentations, + self.decompose, + map_hdf5_store, + label_hdf5_store, + weight_hdf5_store, ) - weight_HDF5 = ( - HDF5DataStore(weight_path.joinpath("weight_data.h5")) - if weight_paths is not None - else None - ) - - with ThreadPoolExecutor(max_workers=num_workers) as executor: - futures = [ - executor.submit( - process_dataset, - path, - label_path, - weight_path, - self.transformations, - self.augmentations, - ) - for path, label_path, weight_path in zip( - paths, label_paths, weight_paths - ) - ] - - for future in as_completed(futures): - result = future.result() - raw_map_HDF5.add_array(*result["map_data"]) - if result["label_data"] and label_HDF5: - label_HDF5.add_array(*result["label_data"]) - if result["weight_data"] and weight_HDF5: - weight_HDF5.add_array(*result["weight_data"]) - datasets.append(result) # Collect processed datasets - - # Concat datasets if needed - concatenated_data = [dataset["map_data"][0] for dataset in datasets] - self.dataset = ConcatDataset(concatenated_data) - - def process(self, paths: list[str], datatype: str): - """ - Process the loaded data with the specified transformations. - Args: - paths (list[str]): List of file paths to the data. - datatype (str): Type of data being processed. + self.dataset = ConcatDataset(datasets) - Returns: - DiskDataset: Processed dataset object. - - Raises: - RuntimeError: If no transformations were provided. - """ - - raise NotImplementedError + def process(self): + """ """ + raise NotImplementedError() def get_loader( self, @@ -496,6 +446,7 @@ def get_loader( raise RuntimeError(msg) # split into train / val sets idx = np.random.permutation(len(self.dataset)) + if split_size < 1: split_size = split_size * 100 @@ -646,87 +597,110 @@ def transformation(self, x): return x def augment(self, augment): - raise NotImplementedError + raise NotImplementedError() class MapDataset(AbstractDataset): - """ - A dataset class for loading map data, alongside the corresponding class labels and weights. - The map data is loaded from the disk and is decomposed into a set of tiles. These tiles are - then reuturned when indexing the dataset. - - Args: - - Note: I'm not sure if shuffling will be used but the method I'm currently using will lazily - load the data from disk so the map file will be loadeded, transformed and then the tile - will be extracted. It might be good to include a cache option to store map data in memory. - This could be useful to reduce the number of times the map data is loaded from disk... - Perhaps saving them as hdf5 files would be a good idea? - """ - def __init__( self, path: str | Path, - label_path: str | Path | None = None, - weight_path: str | Path | None = None, - transforms: list[str] | None = None, - augments: list[str] | None = None, - decompose_kwargs: dict[str, int] | None = None, + **kwargs, ) -> None: + """ + A dataset class for loading map data, alongside the corresponding class labels and weights. + The map data is loaded from the disk and is decomposed into a set of tiles. These tiles are + then returned when indexing the dataset. + + Args: + path (Union[str, Path]): The path to the map data. + label_path (Optional[Union[str, Path]]): The path to the label data. Defaults to None. + weight_path (Optional[Union[str, Path]]): The path to the weight data. Defaults to None. + map_hdf5_store (Optional[HDF5DataStore]): The HDF5 store for the map data. Defaults to None. + label_hdf5_store (Optional[HDF5DataStore]): The HDF5 store for the label data. Defaults to None. + transforms (Optional[List[str]]): The transformations to apply to the data. + augments (Optional[List[str]]): The augmentations to apply to the data. + decompose (bool): Whether to decompose the data into tiles. Defaults to True. + decompose_kwargs (Optional[Dict[str, int]]): The decomposition parameters. Defaults to None. + transform_kwargs (Optional[Dict]): The transformation parameters. Defaults to None. + + + Attributes: + data_shape (Optional[Tuple]): The shape of the map data. Defaults to None. + mapobj (Optional[MapObjHandle]): The map object handle for the map data. Defaults to None. + label_mapobj (Optional[MapObjHandle]): The map object handle for the label data. Defaults to None. + weight_mapobj (Optional[MapObjHandle]): The map object handle for the weight data. Defaults to None. + slices (Optional[List[Tuple]]): The slices of the data. Defaults to None. + tiles (Optional): The tiles of the data. Defaults to None. + tiles_count (int): The number of tiles. Defaults to 0. + + """ + config = DatasetConfig() + + for key, value in kwargs.items(): + if hasattr(config, key): + setattr(config, key, value) + self.path = Path(path) - self.label_path = Path(label_path) if label_path is not None else None - self.weight_path = Path(weight_path) if weight_path is not None else None - self.mapobj = None - self.label_mapobj = None - self.weight_mapobj = None - self.slices = None + self.id = self.path.stem + self.label_path = ( + Path(config.label_path) if config.label_path is not None else None + ) + self.weight_path = ( + Path(config.weight_path) if config.weight_path is not None else None + ) + + self.map_hdf5_store: HDF5DataStore | None = config.map_hdf5_store + self.label_hdf5_store: HDF5DataStore | None = config.label_hdf5_store + self.weight_hdf5_store: HDF5DataStore | None = config.weight_hdf5_store + self.slices: list[tuple] | None = None self.tiles = None - self.tiles_count: int = 0 - self.transforms = transforms - self.augments = augments - self.transform_kwargs = None - if decompose_kwargs is None: - decompose_kwargs = {"cshape": 64, "margin": 8} + self.tiles_count = config.tiles_count + self.transforms = config.transforms + self.augments = config.augments + self.decompose_kwargs = config.decompose_kwargs + self.transform_kwargs = config.transform_kwargs + self.decompose = config.decompose + self.data_shape: tuple | None = None + + self.mapobj: MapObjHandle | None = None + self.label_mapobj: MapObjHandle | None = None + self.weight_mapobj: MapObjHandle | None = None + + if self.decompose_kwargs is None: + self.decompose_kwargs = {"cshape": 64, "margin": 8} if self.transform_kwargs is None: self.transform_kwargs = {} - if not decompose_kwargs.get("step", False): - decompose_kwargs["step"] = decompose_kwargs.get("cshape", 1) - ( - 2 * decompose_kwargs.get("margin") - ) + if self.augments is None: + self.augments = [] + + if self.transforms is None: + self.transforms = [] - self.decompose_kwargs = decompose_kwargs + if not self.decompose_kwargs.get("step", False): + self.decompose_kwargs["step"] = self.decompose_kwargs.get("cshape", 1) - ( + 2 * self.decompose_kwargs.get("margin") + ) def __len__(self): - # TODO: The tile counts need to be calculated before __getitem__ is called - # The amount of tiles is linked to the transformations applied to the map data - # This would mean the best place to calculate the tile count would be in the __init__ - # method and subsequently the transform method would need to be called there too + if self.tiles_count == 0 and self.decompose: + self.generate_tile_indicies() + elif self.tiles_count == 0: + self.tiles_count = 1 - # 1 represents the full map - return self.tiles_count if self.tiles_count != 0 else 1 + return self.tiles_count def __getitem__( self, idx ) -> tuple[torch.Tensor, torch.Tensor | None, torch.Tensor | None]: - # start by loading the map data - self.load_map_objects() - - self.transform() - _ = self.augment() - # SEND TO HDF5 FILE to be saved, some will be duplicates so need to keep track of the duplicates + # This needs to be changhed to hold where the data is stored if (self.slices is None) or (self.tiles is None): - decompose = DecomposeToSlices( - self.mapobj, - step=self.decompose_kwargs.get("step"), - cshape=self.decompose_kwargs.get("cshape"), - margin=self.decompose_kwargs.get("margin"), - ) # TODO: move this - self.slices = decompose.slices - self.tiles = decompose.tiles - self.tiles_count = len(self.tiles) + self.generate_tile_indicies() + + if self.mapobj is None: + self.load_map_objects() map_slice = self.mapobj.data[self.slices[idx]] label_slice = ( @@ -740,39 +714,13 @@ def __getitem__( else None ) - # Close the map objects - self.close_map_objects() - - return ( - torch.tensor(map_slice), - torch.tensor(label_slice) if label_slice is not None else None, - torch.tensor(weight_slice) if weight_slice is not None else None, - ) - - def _transform_keywords_builder(self): - keywords = {} - keywords.update(self.decompose_kwargs) - - for transform in self.transforms: - if transform == Transforms.MASKCROP.value: - keywords["mask"] = self.label_mapobj - if transform == Transforms.NORM.value: - keywords["ext_dim"] = (0, 0, 0) - keywords["fill_padding"] = (0, 0, 0) - if transform == Transforms.VOXNORM.value: - keywords["vox"] = self.decompose_kwargs.get("vox", 1.0) - keywords["vox_lim"] = self.decompose_kwargs.get("vox_lim", (0.95, 1.05)) - - return keywords + map_tensor = torch.tensor(map_slice) + label_tensor = torch.tensor(label_slice) if label_slice is not None else None + weight_tensor = torch.tensor(weight_slice) if weight_slice is not None else None - def _augment_keywords_builder(self): - keywords = {} - for augment in self.augments: - if augment.__class__.__name__ == "RandomRotationAugment": - keywords["ax"] = self.ax - keywords["an"] = self.an + self.close_map_objects(self.mapobj, self.label_mapobj, self.weight_mapobj) - return keywords + return map_tensor, label_tensor, weight_tensor def load_map_objects( self, @@ -794,16 +742,14 @@ def close_map_objects(self, *args): if arg is not None: arg.close() - def augment(self) -> None: + def augment(self, close_map_objects) -> None: augment_kwargs = self._augment_keywords_builder() - augment_kwargs["retall"] = True if len(self.augments) == 0: return {} self.mapobj, extra_kwargs = ComposeAugment(self.augments)( self.mapobj, **augment_kwargs ) - augment_kwargs["retall"] = False augment_kwargs.update( extra_kwargs ) # update the kwargs with the returned values @@ -815,10 +761,24 @@ def augment(self) -> None: self.weight_mapobj, **augment_kwargs ) + if close_map_objects: + self.close_map_objects(self.mapobj, self.label_mapobj, self.weight_mapobj) + return augment_kwargs - def transform(self): + def transform(self, close_map_objects: bool = True): + """ + Perform the transformations on the map data. + + Note: The final map shape is calculated here, + + Args: + close_map_objects (bool, optional): Whether to close the map objects after transformation. Defaults to True. + + """ # TODO: Need to see if same transforms are applied to all map objects, maybe just voxel space normalisation + if self.mapobj is None: + self.load_map_objects() transform_kwargs = self._transform_keywords_builder() if len(self.transforms) == 0: self.transform_kwargs = transform_kwargs @@ -826,27 +786,248 @@ def transform(self): self.transform_kwargs = ComposeTransform(self.transforms)( self.mapobj, **transform_kwargs ) + # Need to do the transform on all the map objects + self.get_data_shape(close_map_objects=False) + if close_map_objects: + self.close_map_objects(self.mapobj, self.label_mapobj, self.weight_mapobj) + + def get_data_shape(self, close_map_objects: bool = True): + if self.data_shape is not None: + return + + if (self.mapobj is None) or (self.mapobj.data) is None: + self.load_map_objects() + self.data_shape = self.mapobj.data.shape + if self.label_mapobj is not None: + assert ( + self.label_mapobj.data.shape == self.data_shape + ), "Map and label shapes do not match." + if self.weight_mapobj is not None: + assert ( + self.weight_mapobj.data.shape == self.data_shape + ), "Map and weight shapes do not match." + + if close_map_objects: + self.close_map_objects(self.mapobj, self.label_mapobj, self.weight_mapobj) + + def generate_tile_indicies(self): + if self.data_shape is None: + self.get_data_shape() + + decompose = DecomposeToSlices( + self.data_shape, + step=self.decompose_kwargs.get("step"), + cshape=self.decompose_kwargs.get("cshape"), + margin=self.decompose_kwargs.get("margin"), + ) + + self.slices = decompose.slices + self.tiles = decompose.tiles + self.tiles_count = len(self.tiles) + + def _transform_keywords_builder(self): + keywords = {} + keywords.update(self.decompose_kwargs) + + for transform in self.transforms: + if transform == Transforms.MASKCROP.value: + keywords["mask"] = self.label_mapobj + + if transform == Transforms.NORM.value: + keywords["ext_dim"] = (0, 0, 0) + keywords["fill_padding"] = (0, 0, 0) + + if transform == Transforms.VOXNORM.value: + keywords["vox"] = self.decompose_kwargs.get("vox", 1.0) + keywords["vox_lim"] = self.decompose_kwargs.get("vox_lim", (0.95, 1.05)) + + return keywords + + def _augment_keywords_builder(self): + keywords = {} + for augment in self.augments: + if augment.__class__.__name__ == "RandomRotationAugment": + keywords["ax"] = self.ax + keywords["an"] = self.an + + return keywords + + +class ArrayDataset(AbstractDataset): + + """Class to handle loading of data from hdf5 files, to be handled by a DataLoader""" + + # need to add their own and update the dataset id + def __init__( + self, + dataset_id: str, + data_array: np.ndarray, + label_array: np.ndarray | None = None, + weight_array: np.ndarray | None = None, + **kwargs, + ) -> None: + config = DatasetConfig() + for key, value in kwargs.items(): + if hasattr(config, key): + setattr(config, key, value) + self.id = dataset_id + self.data_array = data_array + self.label_array = label_array + self.weight_array = weight_array + + self.slices = config.slices + self.tiles = config.tiles + self.tiles_count = config.tiles_count + self.augments = config.augments + self.decompose = config.decompose + self.data_shape: tuple | None = None + self.decompose_kwargs = config.decompose_kwargs + self.map_hdf5_store = config.map_hdf5_store + self.label_hdf5_store = config.label_hdf5_store + self.weight_hdf5_store = config.weight_hdf5_store + if self.decompose_kwargs is None: + self.decompose_kwargs = {"cshape": 64, "margin": 8} + + if not self.decompose_kwargs.get("step", False): + self.decompose_kwargs["step"] = self.decompose_kwargs.get("cshape", 1) - ( + 2 * self.decompose_kwargs.get("margin") + ) + + if self.augments is None: + self.augments = [] + + # create an instance of the map dataset so I can use it's functions using composition + self.__mapdataset = MapDataset( + path=self.id, + # use the attributes from the config object + **config.__dict__, + ) + + def __len__(self): + if self.tiles_count == 0 and self.decompose: + self.generate_tile_indicies() + elif self.tiles_count == 0: + self.tiles_count = 1 + + return self.tiles_count + + def __getitem__(self, idx): + if (self.slices is None) or (self.tiles is None): + self.generate_tile_indicies() + + if self.data_array is None: + self.get_data() + + data_slice = self.data_array[self.slices[idx]] + label_slice = ( + self.label_array[self.slices[idx]] if self.label_array is not None else None + ) + weight_slice = ( + self.weight_array[self.slices[idx]] + if self.weight_array is not None + else None + ) + data_tensor = torch.tensor(data_slice) + label_tensor = torch.tensor(label_slice) if label_slice is not None else None + weight_tensor = torch.tensor(weight_slice) if weight_slice is not None else None + + self.close_data() + return data_tensor, label_tensor, weight_tensor -def process_dataset(path, label_path, weight_path, transformations, augmentations): - map_dataset = MapDataset( - path, - label_path=label_path, - weight_path=weight_path, - transforms=transformations, - augments=augmentations, - ) - map_dataset.load_map_objects() - map_dataset.transform() - map_dataset.augment() - result = { - "map_data": (map_dataset.mapobj.data, f"{path.stem}_map"), - "label_data": (map_dataset.label_mapobj.data, f"{path.stem}_label") - if label_path is not None - else None, - "weight_data": (map_dataset.weight_mapobj.data, f"{path.stem}_weight") - if weight_path is not None - else None, - } - map_dataset.close_map_objects() - return result + def get_data(self): + self.data_array = self.map_hdf5_store.get(self.id + "_map") + if self.label_hdf5_store is not None: + self.label_array = self.label_hdf5_store.get(self.id + "_label") + if self.weight_hdf5_store is not None: + self.weight_array = self.weight_hdf5_store.get(self.id + "_weight") + + def close_data(self): + self.data_array = None + self.label_array = None + self.weight_array = None + + def _augment_keywords_builder(self): + return self.__mapdataset._augment_keywords_builder() + + def _transform_keywords_builder(self): + return self.__mapdataset._transform_keywords_builder() + + # need to do augment + def transform(self) -> None: + msg = "Transforms are not supported for ArrayDataset." + raise NotImplementedError(msg) + + def augment(self) -> None: + augment_kwargs = self._augment_keywords_builder() + if len(self.augments) == 0: + return {} + + self.data_array, extra_kwargs = ComposeAugment(self.augments)( + self.data_array, **augment_kwargs + ) + augment_kwargs.update( + extra_kwargs + ) # update the kwargs with the returned values + if self.label_array is not None: + self.label_array = ComposeAugment(self.augments)( + self.label_array, **augment_kwargs + ) + if self.weight_array is not None: + self.weight_array = ComposeAugment(self.augments)( + self.weight_array, **augment_kwargs + ) + + return augment_kwargs + + def get_data_shape(self, close_data: bool = True): + if self.data_shape is not None: + return + + if self.data_array is None: + self.get_data() + self.data_shape = self.data_array.shape + if self.label_array is not None: + assert ( + self.label_array.shape == self.data_shape + ), "Map and label shapes do not match." + if self.weight_array is not None: + assert ( + self.weight_array.shape == self.data_shape + ), "Map and weight shapes do not match." + + if close_data: + self.close_data() + + def generate_tile_indicies(self): + if self.data_shape is None: + self.get_data_shape() + + decompose = DecomposeToSlices( + self.data_shape, + step=self.decompose_kwargs.get("step"), + cshape=self.decompose_kwargs.get("cshape"), + margin=self.decompose_kwargs.get("margin"), + ) + + self.slices = decompose.slices + self.tiles = decompose.tiles + self.tiles_count = len(self.tiles) + + def save_to_store(self): + self.id = self.map_hdf5_store.add_array( + self.data_array, + self.id + "_map", + ) + self.id = self.id.replace("_map", "") + + if self.label_array is not None: + self.label_hdf5_store.add_array( + self.label_array, + self.id + "_label", + ) + if self.weight_array is not None: + self.weight_hdf5_store.add_array( + self.weight_array, + self.id + "_weight", + ) From b9a91cbf6c412968f139a46ae054c723e64a9e2c Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Tue, 9 Jul 2024 16:48:09 +0000 Subject: [PATCH 11/56] Moved Multi-processing code here, code here to duplciate arrays from the dataloader, augment them and saves them to array. For a specific usecase in ml-toolkit --- src/caked/utils.py | 240 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 240 insertions(+) create mode 100644 src/caked/utils.py diff --git a/src/caked/utils.py b/src/caked/utils.py new file mode 100644 index 0000000..b693d64 --- /dev/null +++ b/src/caked/utils.py @@ -0,0 +1,240 @@ +from __future__ import annotations + +from concurrent.futures import ThreadPoolExecutor, as_completed +from pathlib import Path + +import numpy as np +from torch.utils.data import ConcatDataset + +from caked.hdf5 import HDF5DataStore +from caked.Wrappers import none_return_none + + +def process_datasets( + num_workers: int, + paths: list[str], + label_paths: list[str], + weight_paths: list[str], + transformations, + augmentations, + decompose: bool, + raw_map_HDF5: HDF5DataStore, + label_HDF5: HDF5DataStore | None = None, + weight_HDF5: HDF5DataStore | None = None, +): + """ + Process multiple datasets in parallel. + + Args: + num_workers: Number of workers to use. + paths: List of paths to the map files. + label_paths: List of paths to the label files. + weight_paths: List of paths to the weight files. + raw_map_HDF5: Instance of HDF5DataStore to store map data. + label_HDF5: Instance of HDF5DataStore to store label data. + weight_HDF5: Instance of HDF5DataStore to store weight data. + + Returns: + None + + """ + datasets = [] + + with ThreadPoolExecutor(max_workers=num_workers) as executor: + futures = [ + executor.submit( + process_map_dataset, + path, + label_path, + weight_path, + transformations, + augmentations, + decompose, + raw_map_HDF5, + label_HDF5, + weight_HDF5, + ) + for path, label_path, weight_path in zip(paths, label_paths, weight_paths) + ] + + for future in as_completed(futures): + result, dataset = future.result() + + add_dataset_to_HDF5( + *result.values(), + dataset.id, + raw_map_HDF5, + label_HDF5=label_HDF5, + weight_HDF5=weight_HDF5, + ) + datasets.append(dataset) # Collect processed datasets + + return datasets + + +def process_map_dataset( + path: str | Path, + label_path: str | Path | None, + weight_path: str | Path | None, + transformations: list[str], + augmentations: list[str], + decompose: bool, + map_hdf5: HDF5DataStore, + label_hdf5: HDF5DataStore | None, + weight_hdf5: HDF5DataStore | None, +): + """ + Process a single map dataset, applying transformations and augmentations, closes the map objects. + + Args: + path: (str| Path) path to the map file. + label_path: (str | Path | None) path to the label file. + weight_path: (str | Path | None) path to the weight file. + transformations: (list[str]) list of transformations to apply. + augmentations: (list[str]) list of augmentations to apply. + + Returns: + tuple[dict, MapDataset]: dictionary containing map, label, and weight data, + and the processed MapDataset object. + + + """ + from caked.dataloader import MapDataset # Avoid circular import + + map_dataset = MapDataset( + path, + label_path=label_path, + weight_path=weight_path, + transforms=transformations, + augments=augmentations, + decompose=decompose, + map_hdf5_store=map_hdf5, + label_hdf5_store=label_hdf5, + weight_hdf5_store=weight_hdf5, + ) + map_dataset.transform(close_map_objects=False) + map_dataset.augment(close_map_objects=False) + result = { + "map_data": map_dataset.mapobj.data, + "label_data": map_dataset.label_mapobj.data if label_path is not None else None, + "weight_data": map_dataset.weight_mapobj.data + if weight_path is not None + else None, + } + + map_dataset.close_map_objects() + + return result, map_dataset + + +def add_dataset_to_HDF5( + map_data: np.ndarray, + label_data: np.ndarray | None, + weight_data: np.ndarray | None, + name: str, + raw_map_HDF5: HDF5DataStore, + label_HDF5: HDF5DataStore | None = None, + weight_HDF5: HDF5DataStore | None = None, +) -> tuple[str, str, str]: + """ + Add a map data to HDF5 files. + + Args: + + map_data: (np.ndarray) map data + raw_map_HDF5: (HDF5DataStore) instance of HDF5DataStore to store map data + name: (str) name of the dataset + label_data: (np.ndarray | None) label data + weight_data: (np.ndarray | None) weight data + label_HDF5: (HDF5DataStore | None) instance of HDF5DataStore to store label data + weight_HDF5: (HDF5DataStore | None) instance of HDF5DataStore to store weight data + + Returns: + tuple[str, str, str]: map_id, label_id, weight_id + """ + map_id = f"{name}_map" + label_id = f"{name}_label" + weight_id = f"{name}_weight" + + map_id = raw_map_HDF5.add_array(map_data, map_id) + if label_HDF5 is not None: + label_id = label_HDF5.add_array(label_data, label_id) + if weight_HDF5 is not None: + weight_id = weight_HDF5.add_array(weight_data, weight_id) + + return map_id, label_id, weight_id + + +# Functions so I don't need to write out if xxx is None each time + + +@none_return_none +def filter_and_construct_paths(base_path, paths, classes): + return [ + base_path / p.name for p in paths for c in classes if c in p.name.split("_")[0] + ] + + +def duplicate_and_augment_from_hdf5( + map_data_loader, + ids: list[str], + augmentations: list[str] | None = None, +): + """ + Add data from a list of paths to the HDF5 store. + + Args: + pathnames (list[str]): List of path names accessed from the HDF5 store, typically the stem of the original file. + + Returns: + None + """ + from caked.dataloader import ArrayDataset, MapDataLoader + + datasets = map_data_loader.dataset.datasets + + if not isinstance(map_data_loader, MapDataLoader): + msg = "map_data_loader must be an instance of MapDataLoader." + raise TypeError(msg) + + if len(map_data_loader.dataset.datasets) == 0: + msg = "No datasets have been loaded yet." + raise RuntimeError(msg) + + map_hdf5_store, label_hdf5_store, weight_hdf5_store = ( + map_data_loader.dataset.datasets[0].map_hdf5_store, + map_data_loader.dataset.datasets[0].label_hdf5_store, + map_data_loader.dataset.datasets[0].weight_hdf5_store, + ) + + for dataset_id in ids: + array = map_hdf5_store[dataset_id + "_map"] + label_array = ( + label_hdf5_store.get(dataset_id + "_label") + if label_hdf5_store is not None + else None + ) + weight_array = ( + weight_hdf5_store.get(dataset_id + "_weight") + if weight_hdf5_store is not None + else None + ) + + dataset = ArrayDataset( + dataset_id=dataset_id, + data_array=array, + label_array=label_array, + weight_array=weight_array, + augments=augmentations, + map_hdf5_store=map_hdf5_store, + label_hdf5_store=label_hdf5_store, + weight_hdf5_store=weight_hdf5_store, + decompose=map_data_loader.dataset.datasets[0].decompose, + ) + + dataset.augment() + dataset.save_to_store() + + datasets.append(dataset) + + map_data_loader.dataset = ConcatDataset(datasets) From ab2ce2fe28796f1fbac3bef674d16c87c74f036f Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Tue, 9 Jul 2024 16:48:52 +0000 Subject: [PATCH 12/56] Changed augment to take arrays instead of map-objects to make it compatible with ArrayDataset --- src/caked/Transforms/augments.py | 37 ++++++++++++++++---------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/src/caked/Transforms/augments.py b/src/caked/Transforms/augments.py index c241e73..f701a1a 100644 --- a/src/caked/Transforms/augments.py +++ b/src/caked/Transforms/augments.py @@ -1,20 +1,19 @@ from __future__ import annotations import random -from typing import Union +from enum import Enum import numpy as np from ccpem_utils.map.array_utils import rotate_array +from ccpem_utils.map.parse_mrcmapobj import MapObjHandle from .base import AugmentBase -from enum import Enum -from ccpem_utils.map.parse_mrcmapobj import MapObjHandle class Augments(Enum): """ """ - RANDOMROT = "random" + RANDOMROT = "randrot" ROT90 = "rot90" @@ -23,11 +22,11 @@ def get_augment(augment: str, random_seed) -> AugmentBase: if augment == Augments.RANDOMROT.value: return RandomRotationAugment(random_seed=random_seed) - elif augment == Augments.ROT90.value: + if augment == Augments.ROT90.value: return Rotation90Augment(random_seed=random_seed) - else: - msg = f"Unknown Augmentation: {augment}" - raise ValueError(msg) + + msg = f"Unknown Augmentation: {augment}, please choose from {Augments.__members__}" + raise ValueError(msg) class ComposeAugment: @@ -36,19 +35,23 @@ class ComposeAugment: :param augments: (list) list of augments to compose - :return: (MapObjHandle) transformed MapObjHandle + :return: (np.ndarrry) transformed array """ def __init__(self, augments: list[str], random_seed: int = 42): self.random_seed = random_seed self.augments = augments - def __call__(self, mapobj: MapObjHandle, **kwargs) -> MapObjHandle: + def __call__(self, data: np.ndarray, **kwargs) -> MapObjHandle: for augment in self.augments: - mapobj = get_augment(augment, random_seed=self.random_seed)( - mapobj, **kwargs + data, augment_kwargs = get_augment(augment, random_seed=self.random_seed)( + data, **kwargs ) + kwargs.update(augment_kwargs) + + return data, kwargs + class RandomRotationAugment(AugmentBase): """ @@ -74,7 +77,6 @@ def __call__( ax = kwargs.get("ax", None) an = kwargs.get("an", None) interp = kwargs.get("interp", True) - return_all = kwargs.get("return_all", False) if (ax is not None and an is None) or (ax is None and an is not None): msg = "When specifying rotation, please use both arguments to specify the axis and angle." @@ -90,13 +92,9 @@ def __call__( angle = an r = rotations[axes] - data = rotate_array(data, angle, axes=r, interpolate=interp, reshape=False) - if return_all: - return data, {"ax": axes, "an": angle} - - return data + return data, {"ax": axes, "an": angle} class Rotation90Augment(AugmentBase): @@ -120,4 +118,5 @@ def __call__( data: np.ndarray, **kwargs, ) -> np.ndarray: - raise NotImplementedError("Rotation90Augment not implemented yet.") + msg = "Rotation90Augment not implemented yet." + raise NotImplementedError(msg) From d6eda3afdd78f77c67230b35fe741302589beb98 Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Tue, 9 Jul 2024 16:49:20 +0000 Subject: [PATCH 13/56] Removed mapObject transform base --- src/caked/Transforms/base.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/src/caked/Transforms/base.py b/src/caked/Transforms/base.py index fe84760..d2a400d 100644 --- a/src/caked/Transforms/base.py +++ b/src/caked/Transforms/base.py @@ -37,22 +37,3 @@ def __init__(self, random_seed: int = 42): def __call__(self, data, **kwargs): msg = "The __call__ method must be implemented in the subclass" raise NotImplementedError(msg) - - -class MapObjTransformBase(TransformBase): - """ - Base class for transformations that operate on MapObjHandle objects. - - """ - - @abstractmethod - def __init__(self): - super().__init__() - - def __call__(self, mapobj: MapObjHandle, **kwargs) -> MapObjHandle: - if not isinstance(mapobj, MapObjHandle): - msg = "mapobj must be an instance of MapObjHandle" - raise TypeError(msg) - # Proceed with the method implementation after the check - msg = "The __call__ method must be implemented in the subclass" - raise NotImplementedError(msg) From a86c910c67c6023149bbb09014cdb7a468ab25db Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Tue, 9 Jul 2024 16:49:46 +0000 Subject: [PATCH 14/56] chore: Refactor transforms module and update transform classes --- src/caked/Transforms/transforms.py | 46 +++++++++++++++++++----------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/src/caked/Transforms/transforms.py b/src/caked/Transforms/transforms.py index 35d6a35..f72072e 100644 --- a/src/caked/Transforms/transforms.py +++ b/src/caked/Transforms/transforms.py @@ -9,12 +9,15 @@ from ccpem_utils.map.parse_mrcmapobj import MapObjHandle from mlproteintoolbox.proteins.map_utils import voxel_normalisation -from .base import MapObjTransformBase +from .base import TransformBase from .utils import divx, mask_from_labelobj, pad_map_grid_sample class Transforms(Enum): - """ """ + """ + Enum class for transformations. + + """ VOXNORM = "voxnorm" NORM = "norm" @@ -22,8 +25,14 @@ class Transforms(Enum): PADDING = "padding" -def get_transform(transform: str) -> MapObjTransformBase: - """ """ +def get_transform(transform: str) -> TransformBase: + """ + Get the transformation object. + + :param transform: (str) transformation to apply + + :return: (MapObjHandle) transformed MapObjHandle + """ if transform == Transforms.VOXNORM.value: return MapObjectVoxelNormalisation() @@ -33,7 +42,7 @@ def get_transform(transform: str) -> MapObjTransformBase: return MapObjectMaskCrop() if transform == Transforms.PADDING.value: return MapObjectPadding() - msg = f"Unknown transform: {transform}" + msg = f"Unknown transform: {transform}, please choose from {Transforms.__members__}" raise ValueError(msg) @@ -62,13 +71,13 @@ def __call__(self, mapobj: MapObjHandle, **kwargs) -> MapObjHandle: class DecomposeToSlices: """ """ - def __init__(self, mapobj: MapObjHandle, **kwargs): + def __init__(self, map_shape: tuple, **kwargs): step = kwargs.get("step", 1) cshape = kwargs.get("cshape", 1) slices, tiles = [], [] - for i in range(0, mapobj.data.shape[0], step): - for j in range(0, mapobj.data.shape[1], step): - for k in range(0, mapobj.data.shape[2], step): + for i in range(0, map_shape[0], step): + for j in range(0, map_shape[1], step): + for k in range(0, map_shape[2], step): slices.append( ( slice(i, i + cshape), @@ -82,8 +91,11 @@ def __init__(self, mapobj: MapObjHandle, **kwargs): self.tiles = tiles -class MapObjectVoxelNormalisation(MapObjTransformBase): - """ """ +class MapObjectVoxelNormalisation(TransformBase): + """ + Normalise the spacing of the voxels in a Map Object. + + """ def __init__(self): super().__init__() @@ -107,9 +119,9 @@ def __call__( return mapobj -class MapObjectNormalisation(MapObjTransformBase): +class MapObjectNormalisation(TransformBase): """ - Normalise the voxel values of a 3D volume. + Normalise the voxel values of a Map Object. """ @@ -129,7 +141,7 @@ def __call__( return mapobj -class MapObjectMaskCrop(MapObjTransformBase): +class MapObjectMaskCrop(TransformBase): """ Crop a Map Object using a mask. """ @@ -153,8 +165,10 @@ def __call__( return mapobj -class MapObjectPadding(MapObjTransformBase): - """ """ +class MapObjectPadding(TransformBase): + """ + Pad a Map Object. + """ def __init__(self): super().__init__() From 41d40b3dfe9a2216c68c54509c8daeb549b0fad3 Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Tue, 9 Jul 2024 16:50:12 +0000 Subject: [PATCH 15/56] first attempt at reducing if label/weight... is not none repeated code --- src/caked/Wrappers/__init__.py | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 src/caked/Wrappers/__init__.py diff --git a/src/caked/Wrappers/__init__.py b/src/caked/Wrappers/__init__.py new file mode 100644 index 0000000..b2837ab --- /dev/null +++ b/src/caked/Wrappers/__init__.py @@ -0,0 +1,11 @@ +from functools import wraps + + +def none_return_none(func): + @wraps(func) + def wrapper(*args, **kwargs): + if args[0] is None: + return None + return func(*args, **kwargs) + + return wrapper From 6f73aae0116813f3d06fd312230b6b37cc418c8d Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Tue, 9 Jul 2024 16:50:39 +0000 Subject: [PATCH 16/56] chore: Add test fixtures and update conftest.py for test setup --- tests/conftest.py | 19 ++++- tests/test_map_dataset.py | 61 --------------- tests/test_map_io.py | 157 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 175 insertions(+), 62 deletions(-) delete mode 100644 tests/test_map_dataset.py create mode 100644 tests/test_map_io.py diff --git a/tests/conftest.py b/tests/conftest.py index 6cbff2b..3bc6d4f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,8 @@ from __future__ import annotations +import shutil from pathlib import Path +from tempfile import TemporaryDirectory import pytest @@ -23,4 +25,19 @@ def test_corrupt_file(): return Path(__file__).parent / "corrupt.mrc" - +@pytest.fixture(scope="session") +def test_data_single_mrc_dir(): + """Fixture to provide a single MRC file for testing.""" + return Path(Path(__file__).parent.joinpath("testdata_mrc", "mrc")) + + +@pytest.fixture() +def test_data_single_mrc_temp_dir(): + with TemporaryDirectory() as temp_dir: + temp_dir = Path(temp_dir) + test_data_single_mrc_dir = Path( + Path(__file__).parent.joinpath("testdata_mrc", "mrc") + ) + for file in test_data_single_mrc_dir.glob("*"): + shutil.copy(file, temp_dir) + yield temp_dir diff --git a/tests/test_map_dataset.py b/tests/test_map_dataset.py deleted file mode 100644 index 2c56523..0000000 --- a/tests/test_map_dataset.py +++ /dev/null @@ -1,61 +0,0 @@ -from __future__ import annotations - -from pathlib import Path - -import testdata_mrc -import testdata_npy -import torch - -from caked.dataloader import MapDataLoader, MapDataset - -ORIG_DIR = Path.cwd() -TEST_DATA_MRC = Path(testdata_mrc.__file__).parent.joinpath("mrc") -TEST_DATA_NPY = Path(testdata_npy.__file__).parent - - -DISK_CLASSES_NONE = None -DATATYPE_MRC = "mrc" -VOXNORM = "voxnorm" -NORM = "norm" -MASKCROP = "maskcrop" -PADDING = "padding" -TRANSFORM_ALL = [VOXNORM, NORM, PADDING] - - -def test_map_dataloader(): - test_loader = MapDataLoader() - - assert test_loader is not None - assert isinstance(test_loader, MapDataLoader) - - -def test_map_dataset(): - print() - test_map_dataset = MapDataset(path=next(TEST_DATA_MRC.glob(f"*{DATATYPE_MRC}"))) - assert test_map_dataset is not None - assert isinstance(test_map_dataset, MapDataset) - - -def test_slices(): - test_map_dataset = MapDataset( - path=next(TEST_DATA_MRC.glob(f"*{DATATYPE_MRC}")), transforms=[], augments=[] - ) - slice_, _, _ = test_map_dataset.__getitem__(0) - - assert isinstance(slice_, torch.Tensor) - assert len(test_map_dataset) == 2 - assert slice_.shape == (49, 46, 48) - - -def test_transforms(): - test_map_dataset = MapDataset( - path=next(TEST_DATA_MRC.glob(f"*{DATATYPE_MRC}")), - transforms=TRANSFORM_ALL, - augments=[], - ) - slice_, _, _ = test_map_dataset.__getitem__(0) - - assert len(test_map_dataset) == 8 - assert slice_.shape == (64, 64, 64) - - diff --git a/tests/test_map_io.py b/tests/test_map_io.py new file mode 100644 index 0000000..5dcbdc4 --- /dev/null +++ b/tests/test_map_io.py @@ -0,0 +1,157 @@ +from __future__ import annotations + +from pathlib import Path + +import torch + +from caked.dataloader import MapDataLoader, MapDataset +from caked.utils import duplicate_and_augment_from_hdf5 + +ORIG_DIR = Path.cwd() + + +DISK_CLASSES_NONE = None +DATATYPE_MRC = "mrc" +VOXNORM = "voxnorm" +NORM = "norm" +MASKCROP = "maskcrop" +PADDING = "padding" +ROTATION = "randrot" +TRANSFORM_ALL = [VOXNORM, NORM, PADDING] +AUGMENT_ALL = [ROTATION] + + +def test_map_dataloader(): + test_loader = MapDataLoader() + + assert test_loader is not None + assert isinstance(test_loader, MapDataLoader) + + +def test_map_dataset(test_data_single_mrc_dir): + test_map_dataset = MapDataset( + path=next(test_data_single_mrc_dir.glob(f"*{DATATYPE_MRC}")) + ) + assert test_map_dataset is not None + assert isinstance(test_map_dataset, MapDataset) + + +def test_slices(test_data_single_mrc_dir): + test_map_dataset = MapDataset( + path=next(test_data_single_mrc_dir.glob(f"*{DATATYPE_MRC}")), + transforms=[], + augments=[], + ) + slice_, _, _ = test_map_dataset.__getitem__(0) + + assert isinstance(slice_, torch.Tensor) + assert len(test_map_dataset) == 2 + assert slice_.shape == (49, 46, 48) + + +def test_transforms(test_data_single_mrc_dir): + test_map_dataset = MapDataset( + path=next(test_data_single_mrc_dir.glob(f"*{DATATYPE_MRC}")), + transforms=TRANSFORM_ALL, + augments=[], + ) + test_map_dataset.load_map_objects() + test_map_dataset.transform() + slice_, _, _ = test_map_dataset.__getitem__(0) + + assert len(test_map_dataset) == 8 + assert slice_.shape == (64, 64, 64) + + +def test_dataloader_load_to_HDF5_file(test_data_single_mrc_temp_dir): + test_map_dataloader = MapDataLoader() + test_map_dataloader.load( + datapath=test_data_single_mrc_temp_dir, + datatype=DATATYPE_MRC, + ) + + assert test_map_dataloader is not None + assert isinstance(test_map_dataloader, MapDataLoader) + assert test_map_dataloader.dataset is not None + assert test_data_single_mrc_temp_dir.joinpath("raw_map_data.h5").exists() + + +def test_dataloader_load_to_HDF5_file_with_transforms(test_data_single_mrc_temp_dir): + test_map_dataloader = MapDataLoader( + transformations=TRANSFORM_ALL, + ) + test_map_dataloader.load( + datapath=test_data_single_mrc_temp_dir, + datatype=DATATYPE_MRC, + ) + + assert test_map_dataloader is not None + assert isinstance(test_map_dataloader, MapDataLoader) + assert test_map_dataloader.dataset is not None + assert test_data_single_mrc_temp_dir.joinpath("raw_map_data.h5").exists() + + +def test_add_duplicate_dataset_to_dataloader(test_data_single_mrc_temp_dir): + test_map_dataloader = MapDataLoader( + transformations=TRANSFORM_ALL, + ) + test_map_dataloader.load( + datapath=test_data_single_mrc_temp_dir, + datatype=DATATYPE_MRC, + ) + + duplicate_and_augment_from_hdf5( + test_map_dataloader, + ids=[ + next(test_data_single_mrc_temp_dir.glob(f"*{DATATYPE_MRC}")).stem, + next(test_data_single_mrc_temp_dir.glob(f"*{DATATYPE_MRC}")).stem, + ], + ) + hdf5_store = test_map_dataloader.dataset.datasets[0].map_hdf5_store + + assert len(hdf5_store.keys()) == 3 + assert "realmap_map" in hdf5_store.keys() # noqa: SIM118 + assert "1--realmap_map" in hdf5_store.keys() # noqa: SIM118 + assert "2--realmap_map" in hdf5_store.keys() # noqa: SIM118 + + +def test_add_duplicate_dataset_to_dataloader_with_augments( + test_data_single_mrc_temp_dir, +): + test_map_dataloader = MapDataLoader( + transformations=TRANSFORM_ALL, + ) + test_map_dataloader.load( + datapath=test_data_single_mrc_temp_dir, + datatype=DATATYPE_MRC, + ) + duplicate_and_augment_from_hdf5( + ids=[next(test_data_single_mrc_temp_dir.glob(f"*{DATATYPE_MRC}")).stem], + map_data_loader=test_map_dataloader, + augmentations=AUGMENT_ALL, + ) + hdf5_store = test_map_dataloader.dataset.datasets[0].map_hdf5_store + assert len(hdf5_store.keys()) == 2 + assert "realmap_map" in hdf5_store.keys() # noqa: SIM118 + assert "1--realmap_map" in hdf5_store.keys() # noqa: SIM118 + + assert len(test_map_dataloader.dataset.datasets[0]) == 8 + assert len(test_map_dataloader.dataset.datasets[1]) == 8 + + assert len(test_map_dataloader.dataset) == 16 + + +def test_dataloader_load_multi_process(test_data_single_mrc_temp_dir): + test_map_dataloader = MapDataLoader() + test_map_dataloader.load( + datapath=test_data_single_mrc_temp_dir, + datatype=DATATYPE_MRC, + num_workers=2, + ) + + assert test_map_dataloader is not None + assert isinstance(test_map_dataloader, MapDataLoader) + assert test_map_dataloader.dataset is not None + assert test_data_single_mrc_temp_dir.joinpath("raw_map_data.h5").exists() + + # test_map_dataloader. From 07e2318bba031c75d52dbc8f903a4e352fa09e26 Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Wed, 10 Jul 2024 09:57:29 +0000 Subject: [PATCH 17/56] set_gpu is not in the current ccpem-utils so check added JIC --- src/caked/dataloader.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/caked/dataloader.py b/src/caked/dataloader.py index c63ebb1..17425ae 100644 --- a/src/caked/dataloader.py +++ b/src/caked/dataloader.py @@ -15,7 +15,6 @@ import numpy as np import torch from ccpem_utils.map.parse_mrcmapobj import MapObjHandle, get_mapobjhandle -from ccpem_utils.other.utils import set_gpu from scipy.ndimage import zoom from torch.utils.data import ConcatDataset, DataLoader, Subset from torchvision import transforms @@ -29,6 +28,14 @@ process_datasets, ) +try: + from ccpem_utils.other.utils import set_gpu +except ImportError: + + def set_gpu(): + pass + + np.random.seed(42) TRANSFORM_OPTIONS = ["normalise", "gaussianblur", "shiftmin"] From a6cc9b14e818f32047adb86070f80267a1ee9600 Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Wed, 17 Jul 2024 16:43:19 +0000 Subject: [PATCH 18/56] chore: Refactor HDF5DataStore class and add support for temporary directory storage --- src/caked/hdf5.py | 46 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/src/caked/hdf5.py b/src/caked/hdf5.py index 709548f..27fd940 100644 --- a/src/caked/hdf5.py +++ b/src/caked/hdf5.py @@ -1,12 +1,46 @@ from __future__ import annotations +import tempfile +from pathlib import Path + import h5py import numpy as np class HDF5DataStore: - def __init__(self, save_path: str): - self.save_path = save_path + def __init__(self, save_path: str, use_temp_dir: bool = True, batch_size: int = 10): + """ + Object to store data in HDF5 format. If use_temp_dir is True, the file is saved + in a temporary directory and deleted when the object is deleted. This is useful + for temporary storage of data. If use_temp_dir is False, the file is + saved in the save_path provided. The file is not deleted when the object is deleted. + + :param save_path: (str) path to save the file + :param use_temp_dir: (bool) whether to use a temporary directory + :param batch_size: (int) number of items to write to the file before closing + + + """ + if use_temp_dir: + self.temp_dir_obj = tempfile.TemporaryDirectory() + self.temp_dir = Path(self.temp_dir_obj.name) + self.save_path = self.temp_dir.joinpath(save_path.name) + else: + self.save_path = Path(save_path) + self.temp_dir = None + + self.batch_size = batch_size + self.counter = 0 + self.file = None + + def open(self, mode: str = "a"): + if self.file is None: + self.file = h5py.File(self.save_path, mode) + + def close(self): + if self.file is not None: + self.file.close() + self.file = None def __getitem__(self, key: str): with h5py.File(self.save_path, "r") as f: @@ -28,7 +62,9 @@ def add_array( if self.check_name_in_store(dataset_name): dataset_name = self._add_number_to_dataset_name(dataset_name) with h5py.File(self.save_path, "a") as f: # Open in append mode - f.create_dataset(dataset_name, data=array, compression=compression) + f.create_dataset( + dataset_name, data=array, compression=compression, chunks=True + ) return dataset_name @@ -63,3 +99,7 @@ def _add_number_to_dataset_name(self, dataset_name: str, delimiter: str = "--"): def keys(self): with h5py.File(self.save_path, "r") as f: return list(f.keys()) + + def values(self): + with h5py.File(self.save_path, "r") as f: + return [np.array(f[key]) for key in f.keys()] From 05f792b89fc6886178cf69b0c95e29b61eba090c Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Wed, 17 Jul 2024 16:45:25 +0000 Subject: [PATCH 19/56] Removed "_" id logic in MapDataLoader, might reimplement later but not sure if needed for multi-class porblems --- src/caked/dataloader.py | 124 +++++++++++++++++----------------------- 1 file changed, 53 insertions(+), 71 deletions(-) diff --git a/src/caked/dataloader.py b/src/caked/dataloader.py index 17425ae..0d06d2d 100644 --- a/src/caked/dataloader.py +++ b/src/caked/dataloader.py @@ -24,7 +24,7 @@ from caked.Transforms.augments import ComposeAugment from caked.Transforms.transforms import ComposeTransform, DecomposeToSlices, Transforms from caked.utils import ( - filter_and_construct_paths, + get_sorted_paths, process_datasets, ) @@ -118,9 +118,7 @@ def load(self, datapath, datatype) -> None: else: class_check = np.in1d(self.classes, ids) if not np.all(class_check): - msg = "Not all classes in the list are present in the directory. Missing classes: {}".format( - np.asarray(self.classes)[~class_check] - ) + msg = f"Not all classes in the list are present in the directory. Missing classes: {np.asarray(self.classes)[~class_check]}" raise RuntimeError(msg) class_check = np.in1d(ids, self.classes) if not np.all(class_check): @@ -231,9 +229,7 @@ def get_loader( s = int(np.ceil(len(self.dataset) * int(split_size) / 100)) if s < 2: - msg = "Train and validation sets must be larger than 1 sample, train: {}, val: {}.".format( - len(idx[:-s]), len(idx[-s:]) - ) + msg = f"Train and validation sets must be larger than 1 sample, train: {len(idx[:-s])}, val: {len(idx[-s:])}." raise RuntimeError(msg) train_data = Subset(self.dataset, indices=idx[:-s]) val_data = Subset(self.dataset, indices=idx[-s:]) @@ -275,7 +271,8 @@ def __init__( decompose: bool = True, ) -> None: """ - DataLoader implementation for loading map data from disk. + DataLoader implementation for loading map data from disk and saving them to a internal HDF5 store. + """ self.dataset_size = dataset_size @@ -318,6 +315,8 @@ def load( Returns: None """ + datasets = [] + if use_gpu and num_workers > 1: msg = "Cannot use GPU and multi-process at the same time." raise ValueError(msg) @@ -328,6 +327,7 @@ def load( label_path = Path(label_path) if label_path is not None else None weight_path = Path(weight_path) if weight_path is not None else None map_hdf5_store = HDF5DataStore(datapath.joinpath("raw_map_data.h5")) + label_hdf5_store = ( HDF5DataStore(label_path.joinpath("label_data.h5")) if label_path is not None @@ -339,58 +339,18 @@ def load( else None ) - datasets = [] - - paths = list(datapath.rglob(f"*.{datatype}")) - label_paths = ( - list(label_path.rglob(f"*.{datatype}")) if label_path is not None else None - ) - weight_paths = ( - list(weight_path.rglob(f"*.{datatype}")) - if weight_path is not None - else None - ) - - if not self.debug: - random.shuffle(paths) - - # ids right now depend on the data being saved with a certain format (id in the first part of the name, separated by _) - # TODO: make this more general/document in the README + paths = get_sorted_paths(datapath, datatype, self.dataset_size) + label_paths = get_sorted_paths(label_path, datatype, self.dataset_size) + weight_paths = get_sorted_paths(weight_path, datatype, self.dataset_size) - # TODO: this won't be how it works for multi classifciation tasks I'm guessing so need to include the ID - # generation from mlToolkit - ids = np.unique([file.name.split("_")[0] for file in paths]) - if len(self.classes) == 0: - self.classes = ids - else: - class_check = np.in1d(self.classes, ids) - if not np.all(class_check): - msg = "Not all classes in the list are present in the directory. Missing classes: {}".format( - np.asarray(self.classes)[~class_check] - ) - raise RuntimeError(msg) - class_check = np.in1d(ids, self.classes) - if not np.all(class_check): - logging.basicConfig(format="%(message)s", level=logging.INFO) - logging.info( - "Not all classes in the directory are present in the " - "classes list. Missing classes: %s. They will be ignored.", - (np.asarray(ids)[~class_check]), - ) - - paths = [ - datapath / p.name - for p in paths - for c in self.classes - if c in p.name.split("_")[0] - ] - paths = filter_and_construct_paths(datapath, paths, self.classes) - label_paths = filter_and_construct_paths(label_path, label_paths, self.classes) - weight_paths = filter_and_construct_paths( - weight_path, weight_paths, self.classes - ) if self.dataset_size is not None: paths = paths[: self.dataset_size] + label_paths = ( + label_paths[: self.dataset_size] if label_paths is not None else None + ) + weight_paths = ( + weight_paths[: self.dataset_size] if weight_paths is not None else None + ) if label_paths is not None and len(label_paths) != len(paths): msg = "Label paths and data paths do not match." @@ -400,8 +360,8 @@ def load( msg = "Weight paths and data paths do not match." raise RuntimeError(msg) - label_paths = label_paths if label_paths is not None else [None] * len(paths) - weight_paths = weight_paths if weight_paths is not None else [None] * len(paths) + label_paths = label_paths or [None] * len(paths) + weight_paths = weight_paths or [None] * len(paths) # HDF5 store assumes the data is all in one location @@ -420,10 +380,29 @@ def load( self.dataset = ConcatDataset(datasets) + # TODO: I think this should be removed in favour of user input for classes + if not self.classes and label_hdf5_store is not None: + unique_labels = [ + np.unique(label_data) for label_data in label_hdf5_store.values() + ] + self.classes = np.unique(np.concatenate(unique_labels).flatten()).tolist() + def process(self): """ """ raise NotImplementedError() + def get_hdf5_store( + self, + ) -> tuple[HDF5DataStore, HDF5DataStore | None, HDF5DataStore | None]: + if self.dataset is None: + msg = "The dataset has not been loaded yet." + raise RuntimeError(msg) + return ( + self.dataset.datasets[0].map_hdf5_store, + self.dataset.datasets[0].label_hdf5_store, + self.dataset.datasets[0].weight_hdf5_store, + ) + def get_loader( self, batch_size: int, @@ -459,9 +438,7 @@ def get_loader( s = int(np.ceil(len(self.dataset) * int(split_size) / 100)) if s < 2: - msg = "Train and validation sets must be larger than 1 sample, train: {}, val: {}.".format( - len(idx[:-s]), len(idx[-s:]) - ) + msg = f"Train and validation sets must be larger than 1 sample, train: {len(idx[:-s])}, val: {len(idx[-s:])}." raise RuntimeError(msg) train_data = Subset(self.dataset, indices=idx[:-s]) val_data = Subset(self.dataset, indices=idx[-s:]) @@ -674,7 +651,7 @@ def __init__( self.weight_mapobj: MapObjHandle | None = None if self.decompose_kwargs is None: - self.decompose_kwargs = {"cshape": 64, "margin": 8} + self.decompose_kwargs = {"cshape": 32, "margin": 8} if self.transform_kwargs is None: self.transform_kwargs = {} @@ -733,16 +710,19 @@ def load_map_objects( self, ) -> None: self.mapobj = get_mapobjhandle(self.path) + self.mapobj.all_transforms = True if self.label_path is not None: if not self.label_path.exists(): msg = f"Label file {self.label_path} not found." raise FileNotFoundError(msg) self.label_mapobj = get_mapobjhandle(self.label_path) + self.label_mapobj.all_transforms = False if self.weight_path is not None: if not self.weight_path.exists(): msg = f"Weight file {self.weight_path} not found." raise FileNotFoundError(msg) self.weight_mapobj = get_mapobjhandle(self.weight_path) + self.weight_mapobj.all_transforms = False def close_map_objects(self, *args): for arg in args: @@ -791,10 +771,10 @@ def transform(self, close_map_objects: bool = True): self.transform_kwargs = transform_kwargs self.transform_kwargs = ComposeTransform(self.transforms)( - self.mapobj, **transform_kwargs + self.mapobj, self.label_mapobj, self.weight_mapobj, **transform_kwargs ) - # Need to do the transform on all the map objects self.get_data_shape(close_map_objects=False) + if close_map_objects: self.close_map_objects(self.mapobj, self.label_mapobj, self.weight_mapobj) @@ -808,11 +788,11 @@ def get_data_shape(self, close_map_objects: bool = True): if self.label_mapobj is not None: assert ( self.label_mapobj.data.shape == self.data_shape - ), "Map and label shapes do not match." + ), f"Map and label shapes do not match for {self.id}." if self.weight_mapobj is not None: assert ( self.weight_mapobj.data.shape == self.data_shape - ), "Map and weight shapes do not match." + ), f"Map and weight shapes do not match for {self.id}." if close_map_objects: self.close_map_objects(self.mapobj, self.label_mapobj, self.weight_mapobj) @@ -861,7 +841,6 @@ def _augment_keywords_builder(self): class ArrayDataset(AbstractDataset): - """Class to handle loading of data from hdf5 files, to be handled by a DataLoader""" # need to add their own and update the dataset id @@ -973,15 +952,16 @@ def augment(self) -> None: self.data_array, extra_kwargs = ComposeAugment(self.augments)( self.data_array, **augment_kwargs ) + augment_kwargs.update( extra_kwargs ) # update the kwargs with the returned values if self.label_array is not None: - self.label_array = ComposeAugment(self.augments)( + self.label_array, _ = ComposeAugment(self.augments)( self.label_array, **augment_kwargs ) if self.weight_array is not None: - self.weight_array = ComposeAugment(self.augments)( + self.weight_array, _ = ComposeAugment(self.augments)( self.weight_array, **augment_kwargs ) @@ -1021,7 +1001,7 @@ def generate_tile_indicies(self): self.tiles = decompose.tiles self.tiles_count = len(self.tiles) - def save_to_store(self): + def save_to_store(self, close_data: bool = True): self.id = self.map_hdf5_store.add_array( self.data_array, self.id + "_map", @@ -1038,3 +1018,5 @@ def save_to_store(self): self.weight_array, self.id + "_weight", ) + if close_data: + self.close_data() From 5a770f70324df5c1161f00d9cfbc573cf90532ef Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Wed, 17 Jul 2024 16:45:57 +0000 Subject: [PATCH 20/56] Refactor process_datasets function and remove unnecessary code --- src/caked/utils.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/caked/utils.py b/src/caked/utils.py index b693d64..959419f 100644 --- a/src/caked/utils.py +++ b/src/caked/utils.py @@ -59,7 +59,6 @@ def process_datasets( for future in as_completed(futures): result, dataset = future.result() - add_dataset_to_HDF5( *result.values(), dataset.id, @@ -100,7 +99,6 @@ def process_map_dataset( """ from caked.dataloader import MapDataset # Avoid circular import - map_dataset = MapDataset( path, label_path=label_path, @@ -121,6 +119,7 @@ def process_map_dataset( if weight_path is not None else None, } + map_dataset.close_map_objects() @@ -232,9 +231,21 @@ def duplicate_and_augment_from_hdf5( decompose=map_data_loader.dataset.datasets[0].decompose, ) - dataset.augment() + dataset.augment() # Augment, flagged off when prediction mode selected dataset.save_to_store() datasets.append(dataset) map_data_loader.dataset = ConcatDataset(datasets) + + + + +@none_return_none +def get_sorted_paths(path: Path, datatype: str, dataset_size: int| None =None, ): + """ + Sort paths by the stem of the file name. + """ + paths = sorted(path.rglob(f"*.{datatype}"), key=lambda x: x.stem.split("_")[0]) + return paths[:dataset_size] if dataset_size is not None else paths + \ No newline at end of file From 0a99fd7072b070715ddd0fbb1d6bb7e124043cad Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Wed, 17 Jul 2024 16:46:28 +0000 Subject: [PATCH 21/56] Refactor transforms module and update transform classes --- src/caked/Transforms/transforms.py | 93 +++++++++++++++++++++--------- 1 file changed, 67 insertions(+), 26 deletions(-) diff --git a/src/caked/Transforms/transforms.py b/src/caked/Transforms/transforms.py index f72072e..966b928 100644 --- a/src/caked/Transforms/transforms.py +++ b/src/caked/Transforms/transforms.py @@ -2,15 +2,16 @@ from enum import Enum +import numpy as np from ccpem_utils.map.mrc_map_utils import ( - crop_map_grid, + interpolate_to_grid, normalise_mapobj, + pad_map_grid_split_distribution, ) from ccpem_utils.map.parse_mrcmapobj import MapObjHandle -from mlproteintoolbox.proteins.map_utils import voxel_normalisation from .base import TransformBase -from .utils import divx, mask_from_labelobj, pad_map_grid_sample +from .utils import divx, mask_from_labelobj class Transforms(Enum): @@ -58,13 +59,14 @@ class ComposeTransform: def __init__(self, transforms: list[str]): self.transforms = transforms - def __call__(self, mapobj: MapObjHandle, **kwargs) -> MapObjHandle: + def __call__(self, *args: list[MapObjHandle], **kwargs) -> MapObjHandle: for transform in self.transforms: - mapobj = get_transform(transform)(mapobj, **kwargs) - if transform == Transforms.MASKCROP.value: - kwargs["ext_dim"] = [ - divx(d, kwargs.get("step", 1)) for d in mapobj.shape - ] + for mapobj in args: + if mapobj is None: + continue + + mapobj, kwargs = get_transform(transform)(mapobj, **kwargs) + return kwargs @@ -93,7 +95,8 @@ def __init__(self, map_shape: tuple, **kwargs): class MapObjectVoxelNormalisation(TransformBase): """ - Normalise the spacing of the voxels in a Map Object. + Resamples a map object to a desired voxel size if outside of vox_sh_min and + vox_sh_max. """ @@ -104,19 +107,39 @@ def __call__( self, mapobj: MapObjHandle, **kwargs, - ): - norm_vox = kwargs.get("vox", None) - norm_vox_lim = kwargs.get("vox_lim", None) + ) -> tuple[MapObjHandle, dict]: + # This is needed to do the normalisation but I need to check if label obj is affected by this + + vox = kwargs.get("vox", 1) + vox_min = kwargs.get("vox_min", 0.95) + vox_max = kwargs.get("vox_max", 1.05) + + if not vox_min < vox < vox_max: + msg = f"Voxel size must be within the range of {vox_min} and {vox_max}." + raise ValueError(msg) - voxel_normalisation( + voxx, voxy, voxz = mapobj.apix + sample = np.array(mapobj.shape) + if voxx > vox_max or voxx < vox_min: + sample[2] = int(mapobj.dim[0] / vox) + if voxy > vox_max or voxy < vox_min: + sample[1] = int(mapobj.dim[1] / vox) + if voxz > vox_max or voxz < vox_min: + sample[0] = int(mapobj.dim[2] / vox) + sample = tuple(sample) + + interpolate_to_grid( mapobj, - vox=norm_vox, - vox_min=norm_vox_lim[0], - vox_max=norm_vox_lim[1], + sample, + vox, + mapobj.origin, inplace=True, + prefilter_input=mapobj.all_transforms, ) - return mapobj + mapobj.update_header_by_data() + + return mapobj, kwargs class MapObjectNormalisation(TransformBase): @@ -132,13 +155,15 @@ def __call__( self, mapobj: MapObjHandle, **kwargs, - ): + ) -> tuple[MapObjHandle, dict]: + if not mapobj.all_transforms: + return mapobj, kwargs normalise_mapobj( mapobj, inplace=True, ) - return mapobj + return mapobj, kwargs class MapObjectMaskCrop(TransformBase): @@ -153,16 +178,17 @@ def __call__( self, mapobj: MapObjHandle, **kwargs, - ): + ) -> tuple[MapObjHandle, dict]: mask = kwargs.get("mask", None) if mask is None: msg = "Please provide a mask to crop the map object." raise ValueError(msg) + mask = mask_from_labelobj(mask) - crop_map_grid(mapobj, input_maskobj=mask, inplace=True) + kwargs["ext_dim"] = [divx(d, kwargs.get("step", 1)) - d for d in mapobj.shape] - return mapobj + return mapobj, kwargs class MapObjectPadding(TransformBase): @@ -177,16 +203,31 @@ def __call__( self, mapobj: MapObjHandle, **kwargs, - ): + ) -> tuple[MapObjHandle, dict]: ext_dim = kwargs.get("ext_dim", None) left = kwargs.get("left", True) - pad_map_grid_sample( + pad_map_grid_split_distribution( mapobj, ext_dim=ext_dim, fill_padding=0.0, left=left, inplace=True, ) + return mapobj, kwargs + + +# def data_scale(mapobj: MapObjHandle, desired_shape: tuple, inplace=True): +# """ +# Resamples image to desired shape. + +# :param mapobj: (MapObjHandle) map object +# :param desired_shape: (tuple(int, int, int)) desired shape +# :param inplace: (bool) perform operation in place +# :return: mapobj: (MapObjHandle) updated map object +# """ +# interpolate_to_grid(mapobj, desired_shape, mapobj.apix, mapobj.origin, inplace=True) +# if not inplace: +# return mapobj - return mapobj +# mapobj.update_header_by_data() From 8772fa6a8b6bdbac3664aec3bc6694cd6b4d77ec Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Wed, 17 Jul 2024 17:32:42 +0000 Subject: [PATCH 22/56] re-added __del__ method to HDF5Store --- src/caked/hdf5.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/caked/hdf5.py b/src/caked/hdf5.py index 27fd940..036264f 100644 --- a/src/caked/hdf5.py +++ b/src/caked/hdf5.py @@ -42,6 +42,11 @@ def close(self): self.file.close() self.file = None + def __del__(self): + self.close() + if self.temp_dir is not None: + self.temp_dir_obj.cleanup() + def __getitem__(self, key: str): with h5py.File(self.save_path, "r") as f: return np.array(f[key]) From 93c8424022956fb064470dcba4e2d41e38e88a20 Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Wed, 4 Sep 2024 15:54:53 +0000 Subject: [PATCH 23/56] Refactor MapDataset and ArrayDataset classes to handle weight tensors --- src/caked/dataloader.py | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/src/caked/dataloader.py b/src/caked/dataloader.py index 0d06d2d..81635c4 100644 --- a/src/caked/dataloader.py +++ b/src/caked/dataloader.py @@ -702,9 +702,27 @@ def __getitem__( label_tensor = torch.tensor(label_slice) if label_slice is not None else None weight_tensor = torch.tensor(weight_slice) if weight_slice is not None else None + if weight_tensor is None and label_tensor is not None: + weight_tensor = torch.where( + label_tensor != 0, + torch.ones_like(label_tensor), + torch.zeros_like(label_tensor), + ) + + # Ensure weight_tensor has the same shape as map_tensor + if weight_tensor is not None and weight_tensor.shape == map_tensor.shape: + # Add weight values to the first dimension of the map tensor + map_tensor = torch.cat( + (weight_tensor.unsqueeze(0), map_tensor.unsqueeze(0)), dim=0 + ) + + # if the weight tensor is None then I want to create weights tesnor using the label tensor + self.close_map_objects(self.mapobj, self.label_mapobj, self.weight_mapobj) - return map_tensor, label_tensor, weight_tensor + return tuple( + tensor for tensor in (map_tensor, label_tensor) if tensor is not None + ) def load_map_objects( self, @@ -905,6 +923,7 @@ def __getitem__(self, idx): self.get_data() data_slice = self.data_array[self.slices[idx]] + label_slice = ( self.label_array[self.slices[idx]] if self.label_array is not None else None ) @@ -917,9 +936,24 @@ def __getitem__(self, idx): label_tensor = torch.tensor(label_slice) if label_slice is not None else None weight_tensor = torch.tensor(weight_slice) if weight_slice is not None else None + if weight_tensor is None and label_tensor is not None: + weight_tensor = torch.where( + label_tensor != 0, + torch.ones_like(label_tensor), + torch.zeros_like(label_tensor), + ) + + if weight_tensor is not None and weight_tensor.shape == data_tensor.shape: + # Add weight values to the first dimension of the map tensor + data_tensor = torch.cat( + (weight_tensor.unsqueeze(0), data_tensor.unsqueeze(0)), dim=0 + ) + self.close_data() - return data_tensor, label_tensor, weight_tensor + return tuple( + tensor for tensor in (data_tensor, label_tensor) if tensor is not None + ) def get_data(self): self.data_array = self.map_hdf5_store.get(self.id + "_map") From 9128e9a749c6d0f8a12bdad4d8fda2d437426c02 Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Wed, 4 Sep 2024 15:55:18 +0000 Subject: [PATCH 24/56] Refactor --- src/caked/utils.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/caked/utils.py b/src/caked/utils.py index 959419f..33cc5b7 100644 --- a/src/caked/utils.py +++ b/src/caked/utils.py @@ -99,6 +99,7 @@ def process_map_dataset( """ from caked.dataloader import MapDataset # Avoid circular import + map_dataset = MapDataset( path, label_path=label_path, @@ -119,7 +120,6 @@ def process_map_dataset( if weight_path is not None else None, } - map_dataset.close_map_objects() @@ -229,9 +229,10 @@ def duplicate_and_augment_from_hdf5( label_hdf5_store=label_hdf5_store, weight_hdf5_store=weight_hdf5_store, decompose=map_data_loader.dataset.datasets[0].decompose, + decompose_kwargs=map_data_loader.dataset.datasets[0].decompose_kwargs, ) - dataset.augment() # Augment, flagged off when prediction mode selected + dataset.augment() # Augment, flagged off when prediction mode selected dataset.save_to_store() datasets.append(dataset) @@ -239,13 +240,14 @@ def duplicate_and_augment_from_hdf5( map_data_loader.dataset = ConcatDataset(datasets) - - @none_return_none -def get_sorted_paths(path: Path, datatype: str, dataset_size: int| None =None, ): +def get_sorted_paths( + path: Path, + datatype: str, + dataset_size: int | None = None, +): """ Sort paths by the stem of the file name. """ paths = sorted(path.rglob(f"*.{datatype}"), key=lambda x: x.stem.split("_")[0]) return paths[:dataset_size] if dataset_size is not None else paths - \ No newline at end of file From b2a49c14e14320efce7b69bb4e63f440b8bcb439 Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Wed, 4 Sep 2024 15:55:44 +0000 Subject: [PATCH 25/56] Refactor DecomposeToSlices, MapObjectMaskCrop, and MapObjectPadding classes --- src/caked/Transforms/transforms.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/src/caked/Transforms/transforms.py b/src/caked/Transforms/transforms.py index 966b928..aed440f 100644 --- a/src/caked/Transforms/transforms.py +++ b/src/caked/Transforms/transforms.py @@ -77,9 +77,16 @@ def __init__(self, map_shape: tuple, **kwargs): step = kwargs.get("step", 1) cshape = kwargs.get("cshape", 1) slices, tiles = [], [] + for i in range(0, map_shape[0], step): for j in range(0, map_shape[1], step): for k in range(0, map_shape[2], step): + if ( + i + cshape > map_shape[0] + or j + cshape > map_shape[1] + or k + cshape > map_shape[2] + ): + continue slices.append( ( slice(i, i + cshape), @@ -89,6 +96,13 @@ def __init__(self, map_shape: tuple, **kwargs): ) tiles.append((i, j, k)) + ishape = (i + cshape) - i + jshape = (j + cshape) - j + kshape = (k + cshape) - k + + if ishape != 32 or jshape != 32 or kshape != 32: + print(ishape, jshape, kshape) + self.slices = slices self.tiles = tiles @@ -186,8 +200,6 @@ def __call__( mask = mask_from_labelobj(mask) - kwargs["ext_dim"] = [divx(d, kwargs.get("step", 1)) - d for d in mapobj.shape] - return mapobj, kwargs @@ -204,9 +216,9 @@ def __call__( mapobj: MapObjHandle, **kwargs, ) -> tuple[MapObjHandle, dict]: - ext_dim = kwargs.get("ext_dim", None) + ext_dim = [divx(d, kwargs.get("step", 1)) - d for d in mapobj.shape] + left = kwargs.get("left", True) - pad_map_grid_split_distribution( mapobj, ext_dim=ext_dim, From ce470300f3dc5a47e149e311ae882b35261cba8e Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Wed, 4 Sep 2024 16:05:45 +0000 Subject: [PATCH 26/56] Refactor test_map_io.py to handle weight tensors and update dataset lengths --- tests/test_map_io.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/test_map_io.py b/tests/test_map_io.py index 5dcbdc4..55562a9 100644 --- a/tests/test_map_io.py +++ b/tests/test_map_io.py @@ -42,11 +42,11 @@ def test_slices(test_data_single_mrc_dir): transforms=[], augments=[], ) - slice_, _, _ = test_map_dataset.__getitem__(0) + slice_ = test_map_dataset.__getitem__(0)[0] assert isinstance(slice_, torch.Tensor) - assert len(test_map_dataset) == 2 - assert slice_.shape == (49, 46, 48) + assert len(test_map_dataset) == 4 + assert slice_.shape == (32, 32, 32) def test_transforms(test_data_single_mrc_dir): @@ -57,10 +57,10 @@ def test_transforms(test_data_single_mrc_dir): ) test_map_dataset.load_map_objects() test_map_dataset.transform() - slice_, _, _ = test_map_dataset.__getitem__(0) + slice_ = test_map_dataset.__getitem__(0)[0] - assert len(test_map_dataset) == 8 - assert slice_.shape == (64, 64, 64) + assert len(test_map_dataset) == 64 + assert slice_.shape == (32, 32, 32) def test_dataloader_load_to_HDF5_file(test_data_single_mrc_temp_dir): @@ -73,7 +73,7 @@ def test_dataloader_load_to_HDF5_file(test_data_single_mrc_temp_dir): assert test_map_dataloader is not None assert isinstance(test_map_dataloader, MapDataLoader) assert test_map_dataloader.dataset is not None - assert test_data_single_mrc_temp_dir.joinpath("raw_map_data.h5").exists() + assert test_map_dataloader.dataset.datasets[0].map_hdf5_store.save_path.exists() def test_dataloader_load_to_HDF5_file_with_transforms(test_data_single_mrc_temp_dir): @@ -88,7 +88,7 @@ def test_dataloader_load_to_HDF5_file_with_transforms(test_data_single_mrc_temp_ assert test_map_dataloader is not None assert isinstance(test_map_dataloader, MapDataLoader) assert test_map_dataloader.dataset is not None - assert test_data_single_mrc_temp_dir.joinpath("raw_map_data.h5").exists() + assert test_map_dataloader.dataset.datasets[0].map_hdf5_store.save_path.exists() def test_add_duplicate_dataset_to_dataloader(test_data_single_mrc_temp_dir): @@ -135,10 +135,10 @@ def test_add_duplicate_dataset_to_dataloader_with_augments( assert "realmap_map" in hdf5_store.keys() # noqa: SIM118 assert "1--realmap_map" in hdf5_store.keys() # noqa: SIM118 - assert len(test_map_dataloader.dataset.datasets[0]) == 8 - assert len(test_map_dataloader.dataset.datasets[1]) == 8 + assert len(test_map_dataloader.dataset.datasets[0]) == 64 + assert len(test_map_dataloader.dataset.datasets[1]) == 64 - assert len(test_map_dataloader.dataset) == 16 + assert len(test_map_dataloader.dataset) == 128 def test_dataloader_load_multi_process(test_data_single_mrc_temp_dir): @@ -152,6 +152,6 @@ def test_dataloader_load_multi_process(test_data_single_mrc_temp_dir): assert test_map_dataloader is not None assert isinstance(test_map_dataloader, MapDataLoader) assert test_map_dataloader.dataset is not None - assert test_data_single_mrc_temp_dir.joinpath("raw_map_data.h5").exists() + assert test_map_dataloader.dataset.datasets[0].map_hdf5_store.save_path.exists() # test_map_dataloader. From 5d34da03558c6e521fa8512815f3ae6ac8c1d9ae Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Mon, 30 Sep 2024 14:04:40 +0000 Subject: [PATCH 27/56] Refactor HDF5DataStore class to handle weight tensors and implement caching mechanism --- src/caked/hdf5.py | 76 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 70 insertions(+), 6 deletions(-) diff --git a/src/caked/hdf5.py b/src/caked/hdf5.py index 036264f..d7df003 100644 --- a/src/caked/hdf5.py +++ b/src/caked/hdf5.py @@ -6,9 +6,20 @@ import h5py import numpy as np +from collections import OrderedDict + +import torch + class HDF5DataStore: - def __init__(self, save_path: str, use_temp_dir: bool = True, batch_size: int = 10): + def __init__( + self, + save_path: str | Path, + use_temp_dir: bool = True, + cache: LRUCache = None, + batch_size: int = 10, + cache_size: int = 5, + ): """ Object to store data in HDF5 format. If use_temp_dir is True, the file is saved in a temporary directory and deleted when the object is deleted. This is useful @@ -21,6 +32,7 @@ def __init__(self, save_path: str, use_temp_dir: bool = True, batch_size: int = """ + save_path = Path(save_path) if use_temp_dir: self.temp_dir_obj = tempfile.TemporaryDirectory() self.temp_dir = Path(self.temp_dir_obj.name) @@ -32,6 +44,10 @@ def __init__(self, save_path: str, use_temp_dir: bool = True, batch_size: int = self.batch_size = batch_size self.counter = 0 self.file = None + if cache is None: + self.cache = LRUCache(cache_size) + else: + self.cache = cache def open(self, mode: str = "a"): if self.file is None: @@ -51,10 +67,18 @@ def __getitem__(self, key: str): with h5py.File(self.save_path, "r") as f: return np.array(f[key]) - def get(self, key: str, default=None): + def get(self, key: str, default=None, to_torch: bool = False): try: + if key in self.cache: + return self.cache.get(key) with h5py.File(self.save_path, "r") as f: - return np.array(f[key]) + if to_torch: + arr = torch.from_numpy(np.array(f[key])).clone().detach() + + else: + arr = np.array(f[key]) + self.cache.put(key, arr) + return arr except KeyError: return default @@ -66,7 +90,7 @@ def add_array( ) -> str: if self.check_name_in_store(dataset_name): dataset_name = self._add_number_to_dataset_name(dataset_name) - with h5py.File(self.save_path, "a") as f: # Open in append mode + with h5py.File(self.save_path, "a") as f: f.create_dataset( dataset_name, data=array, compression=compression, chunks=True ) @@ -105,6 +129,46 @@ def keys(self): with h5py.File(self.save_path, "r") as f: return list(f.keys()) - def values(self): + def values(self, to_torch: bool = False): with h5py.File(self.save_path, "r") as f: - return [np.array(f[key]) for key in f.keys()] + for key in f.keys(): + if to_torch: + yield torch.from_numpy(np.array(f[key])) + else: + yield np.array(f[key]) + + +class LRUCache: + def __init__(self, max_memory_gb: float): + self.max_memory_bytes = max_memory_gb * 1024**3 + self.cache = OrderedDict() + self.current_memory_usage = 0 + + def get_memory_usage(self, obj) -> int: + if isinstance(obj, np.ndarray): + return obj.nbytes + if isinstance(obj, torch.Tensor): + return obj.element_size() * obj.nelement() + return 0 + + def get(self, key: str): + if key not in self.cache: + return None + self.cache.move_to_end(key) + return self.cache[key] + + def put(self, key: str, value): + if key in self.cache: + self.current_memory_usage -= self.get_memory_usage(self.cache[key]) + self.cache.move_to_end(key) + self.cache[key] = value + self.current_memory_usage += self.get_memory_usage(value) + self.evict_if_needed() + + def evict_if_needed(self): + while self.current_memory_usage > self.max_memory_bytes: + _, evicted_value = self.cache.popitem(last=False) + self.current_memory_usage -= self.get_memory_usage(evicted_value) + + def __contains__(self, key): + return key in self.cache From d049499550bddf6c66358dfa5350b798198c37f8 Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Mon, 30 Sep 2024 14:35:35 +0000 Subject: [PATCH 28/56] pre-commit fixes --- .gitignore | 2 +- setup.cfg | 11 ++-- setup.py | 2 + src/caked/Transforms/base.py | 3 +- src/caked/Transforms/transforms.py | 10 ++-- src/caked/Wrappers/__init__.py | 2 + src/caked/utils.py | 83 ++++++++++++++++++++---------- tests/pytest.ini | 2 +- 8 files changed, 72 insertions(+), 43 deletions(-) diff --git a/.gitignore b/.gitignore index d459a02..80265b8 100644 --- a/.gitignore +++ b/.gitignore @@ -159,4 +159,4 @@ Thumbs.db # IDE specific files -.vscode/ \ No newline at end of file +.vscode/ diff --git a/setup.cfg b/setup.cfg index a9f9cf7..a13127d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,7 +1,7 @@ # Setup configuration for the package [metadata] name = caked - + # Options for the package @@ -9,10 +9,10 @@ name = caked packages = find: python_requires = >=3.8 - - - - + + + + [options.packages.find] where = src @@ -33,4 +33,3 @@ exclude = *.ipynb .mypy_cache .ruff_cache - \ No newline at end of file diff --git a/setup.py b/setup.py index 6068493..a03590f 100644 --- a/setup.py +++ b/setup.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from setuptools import setup setup() diff --git a/src/caked/Transforms/base.py b/src/caked/Transforms/base.py index d2a400d..5e91863 100644 --- a/src/caked/Transforms/base.py +++ b/src/caked/Transforms/base.py @@ -3,7 +3,6 @@ from abc import ABC, abstractmethod import numpy as np -from ccpem_utils.map.parse_mrcmapobj import MapObjHandle class TransformBase(ABC): @@ -17,7 +16,7 @@ def __init__(self): pass @abstractmethod - def __call__(self, data): + def __call__(self, mapobj, **kwargs): msg = "The __call__ method must be implemented in the subclass" raise NotImplementedError(msg) diff --git a/src/caked/Transforms/transforms.py b/src/caked/Transforms/transforms.py index aed440f..c6df624 100644 --- a/src/caked/Transforms/transforms.py +++ b/src/caked/Transforms/transforms.py @@ -53,19 +53,19 @@ class ComposeTransform: :param transforms: (list) list of transformations to compose - :return: (MapObjHandle) transformed MapObjHandle + :return: (dict) transformed MapObjHandle kwargs """ def __init__(self, transforms: list[str]): self.transforms = transforms - def __call__(self, *args: list[MapObjHandle], **kwargs) -> MapObjHandle: + def __call__(self, *args: list[MapObjHandle | None], **kwargs) -> dict: for transform in self.transforms: for mapobj in args: if mapobj is None: - continue + continue # type: ignore[unreachable] - mapobj, kwargs = get_transform(transform)(mapobj, **kwargs) + _, kwargs = get_transform(transform)(mapobj, **kwargs) return kwargs @@ -217,7 +217,7 @@ def __call__( **kwargs, ) -> tuple[MapObjHandle, dict]: ext_dim = [divx(d, kwargs.get("step", 1)) - d for d in mapobj.shape] - + left = kwargs.get("left", True) pad_map_grid_split_distribution( mapobj, diff --git a/src/caked/Wrappers/__init__.py b/src/caked/Wrappers/__init__.py index b2837ab..7245127 100644 --- a/src/caked/Wrappers/__init__.py +++ b/src/caked/Wrappers/__init__.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from functools import wraps diff --git a/src/caked/utils.py b/src/caked/utils.py index 33cc5b7..be23bdc 100644 --- a/src/caked/utils.py +++ b/src/caked/utils.py @@ -4,6 +4,8 @@ from pathlib import Path import numpy as np +import psutil +import torch from torch.utils.data import ConcatDataset from caked.hdf5 import HDF5DataStore @@ -20,7 +22,6 @@ def process_datasets( decompose: bool, raw_map_HDF5: HDF5DataStore, label_HDF5: HDF5DataStore | None = None, - weight_HDF5: HDF5DataStore | None = None, ): """ Process multiple datasets in parallel. @@ -32,7 +33,6 @@ def process_datasets( weight_paths: List of paths to the weight files. raw_map_HDF5: Instance of HDF5DataStore to store map data. label_HDF5: Instance of HDF5DataStore to store label data. - weight_HDF5: Instance of HDF5DataStore to store weight data. Returns: None @@ -52,19 +52,20 @@ def process_datasets( decompose, raw_map_HDF5, label_HDF5, - weight_HDF5, ) for path, label_path, weight_path in zip(paths, label_paths, weight_paths) ] for future in as_completed(futures): result, dataset = future.result() + map_data, label_data, weight_data = result.values() add_dataset_to_HDF5( - *result.values(), + map_data, + label_data, + weight_data, dataset.id, raw_map_HDF5, label_HDF5=label_HDF5, - weight_HDF5=weight_HDF5, ) datasets.append(dataset) # Collect processed datasets @@ -80,7 +81,6 @@ def process_map_dataset( decompose: bool, map_hdf5: HDF5DataStore, label_hdf5: HDF5DataStore | None, - weight_hdf5: HDF5DataStore | None, ): """ Process a single map dataset, applying transformations and augmentations, closes the map objects. @@ -109,7 +109,6 @@ def process_map_dataset( decompose=decompose, map_hdf5_store=map_hdf5, label_hdf5_store=label_hdf5, - weight_hdf5_store=weight_hdf5, ) map_dataset.transform(close_map_objects=False) map_dataset.augment(close_map_objects=False) @@ -133,8 +132,7 @@ def add_dataset_to_HDF5( name: str, raw_map_HDF5: HDF5DataStore, label_HDF5: HDF5DataStore | None = None, - weight_HDF5: HDF5DataStore | None = None, -) -> tuple[str, str, str]: +) -> tuple[str, str]: """ Add a map data to HDF5 files. @@ -146,25 +144,39 @@ def add_dataset_to_HDF5( label_data: (np.ndarray | None) label data weight_data: (np.ndarray | None) weight data label_HDF5: (HDF5DataStore | None) instance of HDF5DataStore to store label data - weight_HDF5: (HDF5DataStore | None) instance of HDF5DataStore to store weight data Returns: tuple[str, str, str]: map_id, label_id, weight_id """ map_id = f"{name}_map" label_id = f"{name}_label" - weight_id = f"{name}_weight" + + map_data = torch.tensor(map_data, dtype=torch.float32) + label_data = ( + torch.tensor(label_data, dtype=torch.float32) + if label_data is not None + else None + ) + + if weight_data is None and label_data is not None: + weight_data = torch.where( + label_data != 0, + torch.ones_like(label_data), + torch.zeros_like(label_data), + ) + + else: + weight_data = torch.ones_like(map_data) + + if weight_data is not None and weight_data.shape == map_data.shape: + # Add weight values to the first dimension of the map tensor + map_data = torch.cat((map_data.unsqueeze(0), weight_data.unsqueeze(0)), dim=0) map_id = raw_map_HDF5.add_array(map_data, map_id) if label_HDF5 is not None: label_id = label_HDF5.add_array(label_data, label_id) - if weight_HDF5 is not None: - weight_id = weight_HDF5.add_array(weight_data, weight_id) - - return map_id, label_id, weight_id - -# Functions so I don't need to write out if xxx is None each time + return map_id, label_id @none_return_none @@ -180,7 +192,7 @@ def duplicate_and_augment_from_hdf5( augmentations: list[str] | None = None, ): """ - Add data from a list of paths to the HDF5 store. + Add data from a list of paths to the HDF5 store.k Args: pathnames (list[str]): List of path names accessed from the HDF5 store, typically the stem of the original file. @@ -200,24 +212,20 @@ def duplicate_and_augment_from_hdf5( msg = "No datasets have been loaded yet." raise RuntimeError(msg) - map_hdf5_store, label_hdf5_store, weight_hdf5_store = ( + map_hdf5_store, label_hdf5_store = ( map_data_loader.dataset.datasets[0].map_hdf5_store, map_data_loader.dataset.datasets[0].label_hdf5_store, - map_data_loader.dataset.datasets[0].weight_hdf5_store, ) for dataset_id in ids: - array = map_hdf5_store[dataset_id + "_map"] + array_weight = map_hdf5_store[dataset_id + "_map"] label_array = ( label_hdf5_store.get(dataset_id + "_label") if label_hdf5_store is not None else None ) - weight_array = ( - weight_hdf5_store.get(dataset_id + "_weight") - if weight_hdf5_store is not None - else None - ) + weight_array = array_weight[0] + array = array_weight[1] dataset = ArrayDataset( dataset_id=dataset_id, @@ -227,13 +235,20 @@ def duplicate_and_augment_from_hdf5( augments=augmentations, map_hdf5_store=map_hdf5_store, label_hdf5_store=label_hdf5_store, - weight_hdf5_store=weight_hdf5_store, decompose=map_data_loader.dataset.datasets[0].decompose, decompose_kwargs=map_data_loader.dataset.datasets[0].decompose_kwargs, ) dataset.augment() # Augment, flagged off when prediction mode selected - dataset.save_to_store() + + add_dataset_to_HDF5( + dataset.data_array, + dataset.label_array, + dataset.weight_array, + dataset.id, + map_hdf5_store, + label_hdf5_store, + ) datasets.append(dataset) @@ -251,3 +266,15 @@ def get_sorted_paths( """ paths = sorted(path.rglob(f"*.{datatype}"), key=lambda x: x.stem.split("_")[0]) return paths[:dataset_size] if dataset_size is not None else paths + + +def get_max_memory() -> int: + """ + Detect the maximum memory available on the machine. + + Returns: + int: The maximum memory available in GB, rounded down to the nearest integer. + """ + mem_info = psutil.virtual_memory() + max_memory_gb = mem_info.total / (1024**3) # Convert bytes to GB + return int(max_memory_gb // 1) diff --git a/tests/pytest.ini b/tests/pytest.ini index b0e5a94..c24fe5b 100644 --- a/tests/pytest.ini +++ b/tests/pytest.ini @@ -1,3 +1,3 @@ [pytest] filterwarnings = - ignore::DeprecationWarning \ No newline at end of file + ignore::DeprecationWarning From 1e65dd87e5c71902037bb41c0555e266757288d1 Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Mon, 30 Sep 2024 16:07:16 +0000 Subject: [PATCH 29/56] Refactor MapDataLoader to add caching mechanism for HDF5DataStore --- src/caked/dataloader.py | 240 +++++++++++++++++++++------------------- 1 file changed, 126 insertions(+), 114 deletions(-) diff --git a/src/caked/dataloader.py b/src/caked/dataloader.py index 81635c4..2682a5d 100644 --- a/src/caked/dataloader.py +++ b/src/caked/dataloader.py @@ -20,10 +20,11 @@ from torchvision import transforms from caked.base import AbstractDataLoader, AbstractDataset, DatasetConfig -from caked.hdf5 import HDF5DataStore +from caked.hdf5 import HDF5DataStore, LRUCache from caked.Transforms.augments import ComposeAugment from caked.Transforms.transforms import ComposeTransform, DecomposeToSlices, Transforms from caked.utils import ( + get_max_memory, get_sorted_paths, process_datasets, ) @@ -296,6 +297,7 @@ def load( self, datapath: str | Path, datatype: str, + cache_size: int | None = None, label_path: str | Path | None = None, weight_path: str | Path | None = None, use_gpu: bool = False, @@ -326,18 +328,20 @@ def load( datapath = Path(datapath) label_path = Path(label_path) if label_path is not None else None weight_path = Path(weight_path) if weight_path is not None else None - map_hdf5_store = HDF5DataStore(datapath.joinpath("raw_map_data.h5")) + + cache_size = get_max_memory() if cache_size is None else cache_size + cache = LRUCache(cache_size) + + map_hdf5_store = HDF5DataStore( + datapath.joinpath("raw_map_data.h5"), + cache=cache, + ) # TODO: cache size should be a parameter and 40 is for my own testing label_hdf5_store = ( - HDF5DataStore(label_path.joinpath("label_data.h5")) + HDF5DataStore(label_path.joinpath("label_data.h5"), cache=cache) if label_path is not None else None ) - weight_hdf5_store = ( - HDF5DataStore(weight_path.joinpath("weight_data.h5")) - if weight_path is not None - else None - ) paths = get_sorted_paths(datapath, datatype, self.dataset_size) label_paths = get_sorted_paths(label_path, datatype, self.dataset_size) @@ -375,7 +379,6 @@ def load( self.decompose, map_hdf5_store, label_hdf5_store, - weight_hdf5_store, ) self.dataset = ConcatDataset(datasets) @@ -393,14 +396,13 @@ def process(self): def get_hdf5_store( self, - ) -> tuple[HDF5DataStore, HDF5DataStore | None, HDF5DataStore | None]: + ) -> tuple[HDF5DataStore, HDF5DataStore | None]: if self.dataset is None: msg = "The dataset has not been loaded yet." raise RuntimeError(msg) return ( self.dataset.datasets[0].map_hdf5_store, self.dataset.datasets[0].label_hdf5_store, - self.dataset.datasets[0].weight_hdf5_store, ) def get_loader( @@ -408,6 +410,7 @@ def get_loader( batch_size: int, split_size: float | None = None, no_val_drop: bool = False, + split: bool = True, ): """ Retrieve the data loader. @@ -426,7 +429,7 @@ def get_loader( RuntimeError: If the train and validation sets are smaller than 2 samples. """ - if self.training: + if self.training and split: if split_size is None: msg = "Split size must be provided for training. " raise RuntimeError(msg) @@ -633,17 +636,20 @@ def __init__( Path(config.weight_path) if config.weight_path is not None else None ) - self.map_hdf5_store: HDF5DataStore | None = config.map_hdf5_store - self.label_hdf5_store: HDF5DataStore | None = config.label_hdf5_store - self.weight_hdf5_store: HDF5DataStore | None = config.weight_hdf5_store - self.slices: list[tuple] | None = None - self.tiles = None - self.tiles_count = config.tiles_count - self.transforms = config.transforms - self.augments = config.augments - self.decompose_kwargs = config.decompose_kwargs - self.transform_kwargs = config.transform_kwargs - self.decompose = config.decompose + self.map_hdf5_store: HDF5DataStore = kwargs.get( + "map_hdf5_store", config.map_hdf5_store + ) + self.label_hdf5_store: HDF5DataStore | None = kwargs.get( + "label_hdf5_store", config.label_hdf5_store + ) + self.slices: list = kwargs.get("slices", []) + self.tiles: list = kwargs.get("tiles", []) + self.tiles_count = kwargs.get("tiles_count", config.tiles_count) + self.transforms = kwargs.get("transforms", config.transforms) + self.augments = kwargs.get("augments", config.augments) + self.decompose_kwargs = kwargs.get("decompose_kwargs", config.decompose_kwargs) + self.transform_kwargs = kwargs.get("transform_kwargs", config.transform_kwargs) + self.decompose = kwargs.get("decompose", config.decompose) self.data_shape: tuple | None = None self.mapobj: MapObjHandle | None = None @@ -656,11 +662,9 @@ def __init__( if self.transform_kwargs is None: self.transform_kwargs = {} - if self.augments is None: - self.augments = [] + self.augments = [] if self.augments is None else self.augments - if self.transforms is None: - self.transforms = [] + self.transforms = [] if self.transforms is None else self.transforms if not self.decompose_kwargs.get("step", False): self.decompose_kwargs["step"] = self.decompose_kwargs.get("cshape", 1) - ( @@ -680,45 +684,36 @@ def __getitem__( ) -> tuple[torch.Tensor, torch.Tensor | None, torch.Tensor | None]: # This needs to be changhed to hold where the data is stored - if (self.slices is None) or (self.tiles is None): + if (not self.slices) or (not self.tiles): self.generate_tile_indicies() - if self.mapobj is None: - self.load_map_objects() + map_array = self.map_hdf5_store.get(f"{self.id}_map", to_torch=True) + + if map_array.ndim == 4: + x_slice, y_slice, z_slice = self.slices[idx] + map_slice = map_array[:, x_slice, y_slice, z_slice] + else: + map_slice = map_array[self.slices[idx]] - map_slice = self.mapobj.data[self.slices[idx]] label_slice = ( - self.label_mapobj.data[self.slices[idx]] - if self.label_mapobj is not None - else None - ) - weight_slice = ( - self.weight_mapobj.data[self.slices[idx]] - if self.weight_mapobj is not None + self.label_hdf5_store.get(f"{self.id}_label", to_torch=True)[ + self.slices[idx] + ] + if self.label_hdf5_store is not None else None ) - map_tensor = torch.tensor(map_slice) - label_tensor = torch.tensor(label_slice) if label_slice is not None else None - weight_tensor = torch.tensor(weight_slice) if weight_slice is not None else None - - if weight_tensor is None and label_tensor is not None: - weight_tensor = torch.where( - label_tensor != 0, - torch.ones_like(label_tensor), - torch.zeros_like(label_tensor), - ) + if not isinstance(map_slice, torch.Tensor): + map_tensor = torch.tensor(map_slice) + else: + map_tensor = map_slice - # Ensure weight_tensor has the same shape as map_tensor - if weight_tensor is not None and weight_tensor.shape == map_tensor.shape: - # Add weight values to the first dimension of the map tensor - map_tensor = torch.cat( - (weight_tensor.unsqueeze(0), map_tensor.unsqueeze(0)), dim=0 + if not isinstance(label_slice, torch.Tensor): + label_tensor = ( + torch.tensor(label_slice) if label_slice is not None else None ) - - # if the weight tensor is None then I want to create weights tesnor using the label tensor - - self.close_map_objects(self.mapobj, self.label_mapobj, self.weight_mapobj) + else: + label_tensor = label_slice return tuple( tensor for tensor in (map_tensor, label_tensor) if tensor is not None @@ -747,7 +742,7 @@ def close_map_objects(self, *args): if arg is not None: arg.close() - def augment(self, close_map_objects) -> None: + def augment(self, close_map_objects) -> dict: augment_kwargs = self._augment_keywords_builder() if len(self.augments) == 0: return {} @@ -802,7 +797,9 @@ def get_data_shape(self, close_map_objects: bool = True): if (self.mapobj is None) or (self.mapobj.data) is None: self.load_map_objects() - self.data_shape = self.mapobj.data.shape + if self.mapobj is not None and self.mapobj.data is not None: + # MyPy shenanigans + self.data_shape = self.mapobj.data.shape if self.label_mapobj is not None: assert ( self.label_mapobj.data.shape == self.data_shape @@ -879,16 +876,15 @@ def __init__( self.label_array = label_array self.weight_array = weight_array - self.slices = config.slices - self.tiles = config.tiles - self.tiles_count = config.tiles_count - self.augments = config.augments - self.decompose = config.decompose + self.slices = kwargs.get("slices", config.slices) + self.tiles = kwargs.get("tiles", config.tiles) + self.tiles_count = kwargs.get("tiles_count", config.tiles_count) + self.augments = kwargs.get("augments", config.augments) + self.decompose = kwargs.get("decompose", config.decompose) self.data_shape: tuple | None = None - self.decompose_kwargs = config.decompose_kwargs - self.map_hdf5_store = config.map_hdf5_store - self.label_hdf5_store = config.label_hdf5_store - self.weight_hdf5_store = config.weight_hdf5_store + self.decompose_kwargs = kwargs.get("decompose_kwargs", config.decompose_kwargs) + self.map_hdf5_store = kwargs.get("map_hdf5_store", config.map_hdf5_store) + self.label_hdf5_store = kwargs.get("label_hdf5_store", config.label_hdf5_store) if self.decompose_kwargs is None: self.decompose_kwargs = {"cshape": 64, "margin": 8} @@ -922,45 +918,47 @@ def __getitem__(self, idx): if self.data_array is None: self.get_data() - data_slice = self.data_array[self.slices[idx]] + # the map_slice could be the shape [2, x, y, x] however the slice is only [x, y, z] + + # MyPy shenanigans + if self.data_array is not None and self.data_array.ndim == 4: + x_slice, y_slice, z_slice = self.slices[idx] + map_slice = self.data_array[:, x_slice, y_slice, z_slice] + elif self.data_array is not None: + map_slice = self.data_array[self.slices[idx]] + else: + map_slice = None label_slice = ( self.label_array[self.slices[idx]] if self.label_array is not None else None ) - weight_slice = ( - self.weight_array[self.slices[idx]] - if self.weight_array is not None - else None - ) - data_tensor = torch.tensor(data_slice) - label_tensor = torch.tensor(label_slice) if label_slice is not None else None - weight_tensor = torch.tensor(weight_slice) if weight_slice is not None else None - - if weight_tensor is None and label_tensor is not None: - weight_tensor = torch.where( - label_tensor != 0, - torch.ones_like(label_tensor), - torch.zeros_like(label_tensor), - ) - if weight_tensor is not None and weight_tensor.shape == data_tensor.shape: - # Add weight values to the first dimension of the map tensor - data_tensor = torch.cat( - (weight_tensor.unsqueeze(0), data_tensor.unsqueeze(0)), dim=0 + if not isinstance(map_slice, torch.Tensor): + map_tensor = torch.tensor(map_slice) + else: + map_tensor = map_slice + + if not isinstance(label_slice, torch.Tensor): + label_tensor = ( + torch.tensor(label_slice) if label_slice is not None else None ) + else: + label_tensor = label_slice self.close_data() + # self.close_map_objects(self.mapobj, self.label_mapobj, self.weight_mapobj) + return tuple( - tensor for tensor in (data_tensor, label_tensor) if tensor is not None + tensor for tensor in (map_tensor, label_tensor) if tensor is not None ) def get_data(self): - self.data_array = self.map_hdf5_store.get(self.id + "_map") + self.data_array = self.map_hdf5_store.get(self.id + "_map", to_torch=True) if self.label_hdf5_store is not None: - self.label_array = self.label_hdf5_store.get(self.id + "_label") - if self.weight_hdf5_store is not None: - self.weight_array = self.weight_hdf5_store.get(self.id + "_weight") + self.label_array = self.label_hdf5_store.get( + self.id + "_label", to_torch=True + ) def close_data(self): self.data_array = None @@ -978,7 +976,7 @@ def transform(self) -> None: msg = "Transforms are not supported for ArrayDataset." raise NotImplementedError(msg) - def augment(self) -> None: + def augment(self) -> dict: augment_kwargs = self._augment_keywords_builder() if len(self.augments) == 0: return {} @@ -1007,7 +1005,9 @@ def get_data_shape(self, close_data: bool = True): if self.data_array is None: self.get_data() - self.data_shape = self.data_array.shape + + # MyPy shenanigans + self.data_shape = self.data_array.shape if self.data_array is not None else None if self.label_array is not None: assert ( self.label_array.shape == self.data_shape @@ -1035,22 +1035,34 @@ def generate_tile_indicies(self): self.tiles = decompose.tiles self.tiles_count = len(self.tiles) - def save_to_store(self, close_data: bool = True): - self.id = self.map_hdf5_store.add_array( - self.data_array, - self.id + "_map", - ) - self.id = self.id.replace("_map", "") - - if self.label_array is not None: - self.label_hdf5_store.add_array( - self.label_array, - self.id + "_label", - ) - if self.weight_array is not None: - self.weight_hdf5_store.add_array( - self.weight_array, - self.id + "_weight", - ) - if close_data: - self.close_data() + # def save_to_store(self, close_data: bool = True): + # if self.weight_array is None and self.label_array is not None: + # self.weight_array = torch.where( + # self.label_array != 0, + # torch.ones_like(self.label_array), + # torch.zeros_like(self.label_array), + # ) + + # if ( + # self.weight_array is not None + # and self.weight_array.shape == self.data_array.shape + # ): + # # Add weight values to the first dimension of the map tensor + # self.data_array = torch.cat( + # (self.weight_array.unsqueeze(0), self.data_array.unsqueeze(0)), dim=0 + # ) + + # self.id = self.map_hdf5_store.add_array( + # self.data_array, + # self.id + "_map", + # ) + # self.id = self.id.replace("_map", "") + + # if self.label_array is not None: + # self.label_hdf5_store.add_array( + # self.label_array, + # self.id + "_label", + # ) + + # if close_data: + # self.close_data() From f31870b2e925b8fa33b5b2122b29a1c1da23df18 Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Mon, 30 Sep 2024 16:13:53 +0000 Subject: [PATCH 30/56] Refactor HDF5DataStore class to handle weight tensors and update caching mechanism --- src/caked/hdf5.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/caked/hdf5.py b/src/caked/hdf5.py index d7df003..6718f87 100644 --- a/src/caked/hdf5.py +++ b/src/caked/hdf5.py @@ -1,13 +1,11 @@ from __future__ import annotations import tempfile +from collections import OrderedDict from pathlib import Path import h5py import numpy as np - -from collections import OrderedDict - import torch @@ -16,7 +14,7 @@ def __init__( self, save_path: str | Path, use_temp_dir: bool = True, - cache: LRUCache = None, + cache: LRUCache | None = None, batch_size: int = 10, cache_size: int = 5, ): @@ -27,15 +25,17 @@ def __init__( saved in the save_path provided. The file is not deleted when the object is deleted. :param save_path: (str) path to save the file + :param cache: (LRUCache) cache object to store data :param use_temp_dir: (bool) whether to use a temporary directory :param batch_size: (int) number of items to write to the file before closing + :param cache_size: (int) size of the cache in GB """ save_path = Path(save_path) if use_temp_dir: self.temp_dir_obj = tempfile.TemporaryDirectory() - self.temp_dir = Path(self.temp_dir_obj.name) + self.temp_dir: Path | None = Path(self.temp_dir_obj.name) self.save_path = self.temp_dir.joinpath(save_path.name) else: self.save_path = Path(save_path) @@ -55,7 +55,7 @@ def open(self, mode: str = "a"): def close(self): if self.file is not None: - self.file.close() + self.file.close() # type: ignore[unreachable] self.file = None def __del__(self): @@ -131,7 +131,7 @@ def keys(self): def values(self, to_torch: bool = False): with h5py.File(self.save_path, "r") as f: - for key in f.keys(): + for key in f: if to_torch: yield torch.from_numpy(np.array(f[key])) else: @@ -139,9 +139,9 @@ def values(self, to_torch: bool = False): class LRUCache: - def __init__(self, max_memory_gb: float): + def __init__(self, max_memory_gb: int): self.max_memory_bytes = max_memory_gb * 1024**3 - self.cache = OrderedDict() + self.cache: OrderedDict = OrderedDict() self.current_memory_usage = 0 def get_memory_usage(self, obj) -> int: From cb501a0b2248a5384eec2e06385612645c0fc4c7 Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Mon, 30 Sep 2024 16:14:26 +0000 Subject: [PATCH 31/56] Fixed tests --- tests/test_map_io.py | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/tests/test_map_io.py b/tests/test_map_io.py index 55562a9..d9f4469 100644 --- a/tests/test_map_io.py +++ b/tests/test_map_io.py @@ -5,7 +5,8 @@ import torch from caked.dataloader import MapDataLoader, MapDataset -from caked.utils import duplicate_and_augment_from_hdf5 +from caked.hdf5 import HDF5DataStore +from caked.utils import add_dataset_to_HDF5, duplicate_and_augment_from_hdf5 ORIG_DIR = Path.cwd() @@ -37,30 +38,51 @@ def test_map_dataset(test_data_single_mrc_dir): def test_slices(test_data_single_mrc_dir): + hdf5_store = HDF5DataStore("test.hdf5", cache_size=1) + test_map_dataset = MapDataset( path=next(test_data_single_mrc_dir.glob(f"*{DATATYPE_MRC}")), transforms=[], augments=[], + map_hdf5_store=hdf5_store, + ) + test_map_dataset.load_map_objects() + + add_dataset_to_HDF5( + test_map_dataset.mapobj.data, + None, + None, + "realmap", + hdf5_store, ) slice_ = test_map_dataset.__getitem__(0)[0] assert isinstance(slice_, torch.Tensor) assert len(test_map_dataset) == 4 - assert slice_.shape == (32, 32, 32) + assert slice_.shape == (2, 32, 32, 32) def test_transforms(test_data_single_mrc_dir): + hdf5_store = HDF5DataStore("test.hdf5", cache_size=1) test_map_dataset = MapDataset( path=next(test_data_single_mrc_dir.glob(f"*{DATATYPE_MRC}")), + map_hdf5_store=hdf5_store, transforms=TRANSFORM_ALL, augments=[], ) test_map_dataset.load_map_objects() test_map_dataset.transform() + add_dataset_to_HDF5( + test_map_dataset.mapobj.data, + None, + None, + "realmap", + hdf5_store, + ) slice_ = test_map_dataset.__getitem__(0)[0] assert len(test_map_dataset) == 64 - assert slice_.shape == (32, 32, 32) + assert slice_.shape == (2, 32, 32, 32) def test_dataloader_load_to_HDF5_file(test_data_single_mrc_temp_dir): From 6cd4f6a5c839093b5daf0930e74a2fa839315f15 Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Tue, 1 Oct 2024 11:18:10 +0000 Subject: [PATCH 32/56] Refactor MapDataset and ArrayDataset to handle tile generation and slicing --- src/caked/dataloader.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/caked/dataloader.py b/src/caked/dataloader.py index 2682a5d..7ffe349 100644 --- a/src/caked/dataloader.py +++ b/src/caked/dataloader.py @@ -682,10 +682,10 @@ def __len__(self): def __getitem__( self, idx ) -> tuple[torch.Tensor, torch.Tensor | None, torch.Tensor | None]: - # This needs to be changhed to hold where the data is stored - - if (not self.slices) or (not self.tiles): + if ((not self.slices) or (not self.tiles)) and self.decompose: self.generate_tile_indicies() + else: + self.slices = [(slice(None), slice(None), slice(None))] map_array = self.map_hdf5_store.get(f"{self.id}_map", to_torch=True) @@ -912,14 +912,14 @@ def __len__(self): return self.tiles_count def __getitem__(self, idx): - if (self.slices is None) or (self.tiles is None): + if ((not self.slices) or (not self.tiles)) and self.decompose: self.generate_tile_indicies() + else: + self.slices = [(slice(None), slice(None), slice(None))] if self.data_array is None: self.get_data() - # the map_slice could be the shape [2, x, y, x] however the slice is only [x, y, z] - # MyPy shenanigans if self.data_array is not None and self.data_array.ndim == 4: x_slice, y_slice, z_slice = self.slices[idx] From 34770e815ec8e17d0317d86b67bfb328db60c40e Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Tue, 1 Oct 2024 11:18:24 +0000 Subject: [PATCH 33/56] Added LRUCache test --- tests/test_map_io.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/tests/test_map_io.py b/tests/test_map_io.py index d9f4469..acdee81 100644 --- a/tests/test_map_io.py +++ b/tests/test_map_io.py @@ -5,7 +5,7 @@ import torch from caked.dataloader import MapDataLoader, MapDataset -from caked.hdf5 import HDF5DataStore +from caked.hdf5 import HDF5DataStore, LRUCache from caked.utils import add_dataset_to_HDF5, duplicate_and_augment_from_hdf5 ORIG_DIR = Path.cwd() @@ -177,3 +177,30 @@ def test_dataloader_load_multi_process(test_data_single_mrc_temp_dir): assert test_map_dataloader.dataset.datasets[0].map_hdf5_store.save_path.exists() # test_map_dataloader. + + +def test_lru_cache(test_data_single_mrc_dir): + cache = LRUCache(1) + hdf5_store = HDF5DataStore("test.hdf5", cache=cache) + + test_map_dataset = MapDataset( + path=next(test_data_single_mrc_dir.glob(f"*{DATATYPE_MRC}")), + transforms=[], + augments=[], + map_hdf5_store=hdf5_store, + ) + test_map_dataset.load_map_objects() + + add_dataset_to_HDF5( + test_map_dataset.mapobj.data, + None, + None, + "realmap", + hdf5_store, + ) + + assert "realmap_map" not in hdf5_store.cache + + _ = test_map_dataset.__getitem__(0)[0] + + assert "realmap_map" in hdf5_store.cache From ddbd2d282b7672be354763c74f57d16ee4da7ec9 Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Tue, 1 Oct 2024 12:49:30 +0000 Subject: [PATCH 34/56] Refactor MapDataset and ArrayDataset to improve code organization and readability --- src/caked/dataloader.py | 94 +++++++++++++++++++++-------------------- 1 file changed, 49 insertions(+), 45 deletions(-) diff --git a/src/caked/dataloader.py b/src/caked/dataloader.py index 7ffe349..a4e1dfe 100644 --- a/src/caked/dataloader.py +++ b/src/caked/dataloader.py @@ -722,6 +722,9 @@ def __getitem__( def load_map_objects( self, ) -> None: + """ + Load the map objects from the specified paths. + """ self.mapobj = get_mapobjhandle(self.path) self.mapobj.all_transforms = True if self.label_path is not None: @@ -738,11 +741,28 @@ def load_map_objects( self.weight_mapobj.all_transforms = False def close_map_objects(self, *args): + """ + Close the map objects. + + Args: + *args: The map objects to close. + + + """ for arg in args: if arg is not None: arg.close() def augment(self, close_map_objects) -> dict: + """ + Apply augmentations to the map data. + + Args: + close_map_objects (bool): Whether to close the map objects after transformation. + + Returns: + dict: The augmentation keywords + """ augment_kwargs = self._augment_keywords_builder() if len(self.augments) == 0: return {} @@ -750,9 +770,7 @@ def augment(self, close_map_objects) -> dict: self.mapobj, extra_kwargs = ComposeAugment(self.augments)( self.mapobj, **augment_kwargs ) - augment_kwargs.update( - extra_kwargs - ) # update the kwargs with the returned values + augment_kwargs.update(extra_kwargs) self.label_mapobj = ComposeAugment(self.augments)( self.label_mapobj, **augment_kwargs @@ -776,7 +794,6 @@ def transform(self, close_map_objects: bool = True): close_map_objects (bool, optional): Whether to close the map objects after transformation. Defaults to True. """ - # TODO: Need to see if same transforms are applied to all map objects, maybe just voxel space normalisation if self.mapobj is None: self.load_map_objects() transform_kwargs = self._transform_keywords_builder() @@ -792,6 +809,14 @@ def transform(self, close_map_objects: bool = True): self.close_map_objects(self.mapobj, self.label_mapobj, self.weight_mapobj) def get_data_shape(self, close_map_objects: bool = True): + """ + Get the shape of the map data, label data, and weight data. + + + Args: + close_map_objects (bool, optional): Whether to close the map objects after transformation. Defaults to True. + + """ if self.data_shape is not None: return @@ -813,6 +838,10 @@ def get_data_shape(self, close_map_objects: bool = True): self.close_map_objects(self.mapobj, self.label_mapobj, self.weight_mapobj) def generate_tile_indicies(self): + """ + Generate the tile indices for the map data using the decomposition parameters. + + """ if self.data_shape is None: self.get_data_shape() @@ -858,7 +887,6 @@ def _augment_keywords_builder(self): class ArrayDataset(AbstractDataset): """Class to handle loading of data from hdf5 files, to be handled by a DataLoader""" - # need to add their own and update the dataset id def __init__( self, dataset_id: str, @@ -896,10 +924,8 @@ def __init__( if self.augments is None: self.augments = [] - # create an instance of the map dataset so I can use it's functions using composition self.__mapdataset = MapDataset( path=self.id, - # use the attributes from the config object **config.__dict__, ) @@ -947,13 +973,14 @@ def __getitem__(self, idx): self.close_data() - # self.close_map_objects(self.mapobj, self.label_mapobj, self.weight_mapobj) - return tuple( tensor for tensor in (map_tensor, label_tensor) if tensor is not None ) def get_data(self): + """ + Retrieve the array data from the HDF5 store. + """ self.data_array = self.map_hdf5_store.get(self.id + "_map", to_torch=True) if self.label_hdf5_store is not None: self.label_array = self.label_hdf5_store.get( @@ -961,6 +988,9 @@ def get_data(self): ) def close_data(self): + """ + Close the data arrays. + """ self.data_array = None self.label_array = None self.weight_array = None @@ -971,12 +1001,14 @@ def _augment_keywords_builder(self): def _transform_keywords_builder(self): return self.__mapdataset._transform_keywords_builder() - # need to do augment def transform(self) -> None: msg = "Transforms are not supported for ArrayDataset." raise NotImplementedError(msg) def augment(self) -> dict: + """ + Apply augmentations to the array data. + """ augment_kwargs = self._augment_keywords_builder() if len(self.augments) == 0: return {} @@ -985,9 +1017,7 @@ def augment(self) -> dict: self.data_array, **augment_kwargs ) - augment_kwargs.update( - extra_kwargs - ) # update the kwargs with the returned values + augment_kwargs.update(extra_kwargs) if self.label_array is not None: self.label_array, _ = ComposeAugment(self.augments)( self.label_array, **augment_kwargs @@ -1000,6 +1030,9 @@ def augment(self) -> dict: return augment_kwargs def get_data_shape(self, close_data: bool = True): + """ + Get the shape of the array data. + """ if self.data_shape is not None: return @@ -1021,6 +1054,9 @@ def get_data_shape(self, close_data: bool = True): self.close_data() def generate_tile_indicies(self): + """ + Generate the tile indices for the array data using the decomposition parameters. + """ if self.data_shape is None: self.get_data_shape() @@ -1034,35 +1070,3 @@ def generate_tile_indicies(self): self.slices = decompose.slices self.tiles = decompose.tiles self.tiles_count = len(self.tiles) - - # def save_to_store(self, close_data: bool = True): - # if self.weight_array is None and self.label_array is not None: - # self.weight_array = torch.where( - # self.label_array != 0, - # torch.ones_like(self.label_array), - # torch.zeros_like(self.label_array), - # ) - - # if ( - # self.weight_array is not None - # and self.weight_array.shape == self.data_array.shape - # ): - # # Add weight values to the first dimension of the map tensor - # self.data_array = torch.cat( - # (self.weight_array.unsqueeze(0), self.data_array.unsqueeze(0)), dim=0 - # ) - - # self.id = self.map_hdf5_store.add_array( - # self.data_array, - # self.id + "_map", - # ) - # self.id = self.id.replace("_map", "") - - # if self.label_array is not None: - # self.label_hdf5_store.add_array( - # self.label_array, - # self.id + "_label", - # ) - - # if close_data: - # self.close_data() From f10117d4111671a75d19e5d9b48a3091f4e0b8b6 Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Tue, 1 Oct 2024 13:13:30 +0000 Subject: [PATCH 35/56] Refactor unused variables in Rotation90Augment class --- src/caked/Transforms/augments.py | 2 ++ tests/conftest.py | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/caked/Transforms/augments.py b/src/caked/Transforms/augments.py index f701a1a..4dace35 100644 --- a/src/caked/Transforms/augments.py +++ b/src/caked/Transforms/augments.py @@ -118,5 +118,7 @@ def __call__( data: np.ndarray, **kwargs, ) -> np.ndarray: + _ = data + _ = kwargs msg = "Rotation90Augment not implemented yet." raise NotImplementedError(msg) diff --git a/tests/conftest.py b/tests/conftest.py index 3bc6d4f..a3d0a33 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -34,10 +34,10 @@ def test_data_single_mrc_dir(): @pytest.fixture() def test_data_single_mrc_temp_dir(): with TemporaryDirectory() as temp_dir: - temp_dir = Path(temp_dir) + temp_dir_path = Path(temp_dir) test_data_single_mrc_dir = Path( Path(__file__).parent.joinpath("testdata_mrc", "mrc") ) for file in test_data_single_mrc_dir.glob("*"): - shutil.copy(file, temp_dir) - yield temp_dir + shutil.copy(file, temp_dir_path) + yield temp_dir_path From 93313561533764da2f67e73e068eb69d87073e51 Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Tue, 1 Oct 2024 13:28:30 +0000 Subject: [PATCH 36/56] Add ccpem-utils to project dependencies --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 56eb38d..e00dc4a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,7 +28,7 @@ classifiers = [ "Typing :: Typed", ] -dependencies = ["torch", "numpy", "pandas", "mrcfile", "torchvision", "scipy", "pyarrow"] +dependencies = ["torch", "numpy", "pandas", "mrcfile", "torchvision", "scipy", "pyarrow", "ccpem-utils"] [project.optional-dependencies] test = [ From be1a80b6644aca980dbfa4389f0322b36f33a2e4 Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Tue, 1 Oct 2024 13:52:02 +0000 Subject: [PATCH 37/56] Add h5py and psutil to project dependencies --- pyproject.toml | 2 +- setup.cfg | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e00dc4a..546f3c3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,7 +28,7 @@ classifiers = [ "Typing :: Typed", ] -dependencies = ["torch", "numpy", "pandas", "mrcfile", "torchvision", "scipy", "pyarrow", "ccpem-utils"] +dependencies = ["torch", "numpy", "pandas", "mrcfile", "torchvision", "scipy", "pyarrow", "ccpem-utils", "h5py", "psutil"] [project.optional-dependencies] test = [ diff --git a/setup.cfg b/setup.cfg index a13127d..d7ebb46 100644 --- a/setup.cfg +++ b/setup.cfg @@ -11,6 +11,19 @@ packages = find: python_requires = >=3.8 +# where to add pip dependencies + +install_requires = + torch + numpy + pandas + mrcfile + torchvision + scipy + pyarrow + ccpem-utils + h5py + psutil From 6b7f0c7d3f4b12df38dd1416fef8c9afe1690b63 Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Tue, 1 Oct 2024 13:54:18 +0000 Subject: [PATCH 38/56] Fix deprecation warnings in pytest.ini --- tests/pytest.ini | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/pytest.ini b/tests/pytest.ini index c24fe5b..bb8885b 100644 --- a/tests/pytest.ini +++ b/tests/pytest.ini @@ -1,3 +1,4 @@ [pytest] filterwarnings = ignore::DeprecationWarning + error::DeprecationWarning From 3b4196f82cd0fe757d04a24f821331747236fb1e Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Tue, 1 Oct 2024 14:00:14 +0000 Subject: [PATCH 39/56] Fix deprecation warnings in pytest.ini --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7e8a554..faf4616 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -48,8 +48,8 @@ jobs: - name: Test package run: >- - python -m pytest -ra --cov --cov-report=xml --cov-report=term - --durations=20 + PYTHONWARNINGS=ignore::DeprecationWarning python -m pytest -ra --cov + --cov-report=xml --cov-report=term --durations=20 - name: Upload coverage report uses: codecov/codecov-action@v3.1.4 From 28dc529d4349665105d448dfdb0c5e570c897498 Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Tue, 1 Oct 2024 14:04:46 +0000 Subject: [PATCH 40/56] Fix deprecation warnings in pytest.ini --- tests/pytest.ini | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/pytest.ini b/tests/pytest.ini index bb8885b..c24fe5b 100644 --- a/tests/pytest.ini +++ b/tests/pytest.ini @@ -1,4 +1,3 @@ [pytest] filterwarnings = ignore::DeprecationWarning - error::DeprecationWarning From 2358a763b322f972af0ac58cadabbd910f701170 Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Tue, 1 Oct 2024 14:17:36 +0000 Subject: [PATCH 41/56] Fix deprecation warnings in pytest.ini and update pytest configuration --- .github/workflows/ci.yml | 4 ++-- pyproject.toml | 1 + tests/pytest.ini | 3 --- 3 files changed, 3 insertions(+), 5 deletions(-) delete mode 100644 tests/pytest.ini diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index faf4616..6a19267 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -48,8 +48,8 @@ jobs: - name: Test package run: >- - PYTHONWARNINGS=ignore::DeprecationWarning python -m pytest -ra --cov - --cov-report=xml --cov-report=term --durations=20 + python -m pytest -ra --cov --cov-report=xml --cov-report=term + --durations=20 --disable-pytest-warnings - name: Upload coverage report uses: codecov/codecov-action@v3.1.4 diff --git a/pyproject.toml b/pyproject.toml index 546f3c3..070ac5f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,6 +61,7 @@ addopts = ["-ra", "--showlocals", "--strict-markers", "--strict-config"] xfail_strict = true filterwarnings = [ "error", + "ignore::DeprecationWarning", ] log_cli_level = "INFO" testpaths = [ diff --git a/tests/pytest.ini b/tests/pytest.ini deleted file mode 100644 index c24fe5b..0000000 --- a/tests/pytest.ini +++ /dev/null @@ -1,3 +0,0 @@ -[pytest] -filterwarnings = - ignore::DeprecationWarning From a9af9c3e21d9d5b7b51055ae673bfb5a15003701 Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Tue, 1 Oct 2024 14:25:09 +0000 Subject: [PATCH 42/56] Fix deprecation warnings --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 070ac5f..3d363e4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,7 +60,6 @@ minversion = "6.0" addopts = ["-ra", "--showlocals", "--strict-markers", "--strict-config"] xfail_strict = true filterwarnings = [ - "error", "ignore::DeprecationWarning", ] log_cli_level = "INFO" From 7cd0e06d2dd4f6c6a0218b5d264c41043020802f Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Tue, 1 Oct 2024 14:39:36 +0000 Subject: [PATCH 43/56] Update pytest configuration and package installation --- .github/workflows/ci.yml | 2 +- setup.cfg | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6a19267..b7e9d46 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -44,7 +44,7 @@ jobs: allow-prereleases: true - name: Install package - run: python -m pip install .[test] + run: python -m pip install -e .[test] - name: Test package run: >- diff --git a/setup.cfg b/setup.cfg index d7ebb46..da8bec8 100644 --- a/setup.cfg +++ b/setup.cfg @@ -28,7 +28,10 @@ install_requires = [options.packages.find] -where = src +where = + src + src/Transforms + src/Wrappers exclude = tests .github From cb6a8c97a9c46b035ce9910e5fce9e61321437bc Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Tue, 1 Oct 2024 14:48:35 +0000 Subject: [PATCH 44/56] Update pytest configuration and package installation --- .github/workflows/ci.yml | 2 +- setup.cfg | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b7e9d46..92e245d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -44,7 +44,7 @@ jobs: allow-prereleases: true - name: Install package - run: python -m pip install -e .[test] + run: python -m pip install -e . - name: Test package run: >- diff --git a/setup.cfg b/setup.cfg index da8bec8..f6fca38 100644 --- a/setup.cfg +++ b/setup.cfg @@ -9,6 +9,8 @@ name = caked packages = find: python_requires = >=3.8 +package_dir = + = src # where to add pip dependencies @@ -26,12 +28,9 @@ install_requires = psutil - [options.packages.find] -where = - src - src/Transforms - src/Wrappers +where = src + exclude = tests .github From b95eaa675f05ea0aebba5bf4f3f5fa55a0aa9e20 Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Tue, 1 Oct 2024 14:54:09 +0000 Subject: [PATCH 45/56] Update pytest configuration and package installation --- .github/workflows/ci.yml | 4 ++-- setup.cfg | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 92e245d..51bad8b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -44,12 +44,12 @@ jobs: allow-prereleases: true - name: Install package - run: python -m pip install -e . + run: python -m pip install -e .[test] - name: Test package run: >- python -m pytest -ra --cov --cov-report=xml --cov-report=term - --durations=20 --disable-pytest-warnings + --durations=20 - name: Upload coverage report uses: codecov/codecov-action@v3.1.4 diff --git a/setup.cfg b/setup.cfg index f6fca38..969267e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -29,7 +29,10 @@ install_requires = [options.packages.find] -where = src +where = + src + src/Transforms + src/Wrappers exclude = tests From a8b367c5491b9bf918518c0b787b94c261678a44 Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Tue, 1 Oct 2024 16:00:40 +0000 Subject: [PATCH 46/56] Fix: __getitem__ logic --- src/caked/dataloader.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/caked/dataloader.py b/src/caked/dataloader.py index d876224..5dd0cce 100644 --- a/src/caked/dataloader.py +++ b/src/caked/dataloader.py @@ -682,10 +682,13 @@ def __len__(self): def __getitem__( self, idx ) -> tuple[torch.Tensor, torch.Tensor | None, torch.Tensor | None]: - if ((not self.slices) or (not self.tiles)) and self.decompose: + if (not self.slices or not self.tiles) and self.decompose: self.generate_tile_indicies() - else: + elif (not self.slices or not self.tiles) and not self.decompose: self.slices = [(slice(None), slice(None), slice(None))] + else: + self.slices = self.slices + self.tiles = self.tiles map_array = self.map_hdf5_store.get(f"{self.id}_map", to_torch=True) @@ -938,10 +941,13 @@ def __len__(self): return self.tiles_count def __getitem__(self, idx): - if ((not self.slices) or (not self.tiles)) and self.decompose: + if (not self.slices or not self.tiles) and self.decompose: self.generate_tile_indicies() - else: + elif (not self.slices or not self.tiles) and not self.decompose: self.slices = [(slice(None), slice(None), slice(None))] + else: + self.slices = self.slices + self.tiles = self.tiles if self.data_array is None: self.get_data() From 06968068fb845e073844fa3ef8d60c2bb92f0c94 Mon Sep 17 00:00:00 2001 From: hllelli2 Date: Wed, 9 Oct 2024 13:53:30 +0000 Subject: [PATCH 47/56] Add iterator method to HDF5DataStore and simplify key checks in tests --- src/caked/hdf5.py | 5 +++++ tests/test_map_io.py | 10 +++++----- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/caked/hdf5.py b/src/caked/hdf5.py index 6718f87..22e5d78 100644 --- a/src/caked/hdf5.py +++ b/src/caked/hdf5.py @@ -67,6 +67,11 @@ def __getitem__(self, key: str): with h5py.File(self.save_path, "r") as f: return np.array(f[key]) + def __iter__(self): + with h5py.File(self.save_path, "r") as f: + for key in f: + yield key + def get(self, key: str, default=None, to_torch: bool = False): try: if key in self.cache: diff --git a/tests/test_map_io.py b/tests/test_map_io.py index acdee81..e5af235 100644 --- a/tests/test_map_io.py +++ b/tests/test_map_io.py @@ -132,9 +132,9 @@ def test_add_duplicate_dataset_to_dataloader(test_data_single_mrc_temp_dir): hdf5_store = test_map_dataloader.dataset.datasets[0].map_hdf5_store assert len(hdf5_store.keys()) == 3 - assert "realmap_map" in hdf5_store.keys() # noqa: SIM118 - assert "1--realmap_map" in hdf5_store.keys() # noqa: SIM118 - assert "2--realmap_map" in hdf5_store.keys() # noqa: SIM118 + assert "realmap_map" in hdf5_store + assert "1--realmap_map" in hdf5_store + assert "2--realmap_map" in hdf5_store def test_add_duplicate_dataset_to_dataloader_with_augments( @@ -154,8 +154,8 @@ def test_add_duplicate_dataset_to_dataloader_with_augments( ) hdf5_store = test_map_dataloader.dataset.datasets[0].map_hdf5_store assert len(hdf5_store.keys()) == 2 - assert "realmap_map" in hdf5_store.keys() # noqa: SIM118 - assert "1--realmap_map" in hdf5_store.keys() # noqa: SIM118 + assert "realmap_map" in hdf5_store + assert "1--realmap_map" in hdf5_store assert len(test_map_dataloader.dataset.datasets[0]) == 64 assert len(test_map_dataloader.dataset.datasets[1]) == 64 From 7fbefdfe93544f706350e00da509b8885264aba2 Mon Sep 17 00:00:00 2001 From: hllelli2 Date: Wed, 9 Oct 2024 13:54:43 +0000 Subject: [PATCH 48/56] Refactor test fixtures to use default scope and improve readability --- tests/conftest.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index a3d0a33..ca86b2d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -7,25 +7,25 @@ import pytest -@pytest.fixture(scope="session") +@pytest.fixture() def test_data_mrc_dir(): """Fixture to provide the MRC test data directory.""" return Path(Path(__file__).parent.joinpath("testdata_mrc")) -@pytest.fixture(scope="session") +@pytest.fixture() def test_data_npy_dir(): """Fixture to provide the NPY test data directory.""" return Path(Path(__file__).parent.joinpath("testdata_npy")) -@pytest.fixture(scope="session") +@pytest.fixture() def test_corrupt_file(): """Fixture to provide the path to a corrupt file for testing.""" return Path(__file__).parent / "corrupt.mrc" -@pytest.fixture(scope="session") +@pytest.fixture() def test_data_single_mrc_dir(): """Fixture to provide a single MRC file for testing.""" return Path(Path(__file__).parent.joinpath("testdata_mrc", "mrc")) From 36a5883a0c692659142a65199458778d56b0a4ff Mon Sep 17 00:00:00 2001 From: hllelli2 Date: Wed, 9 Oct 2024 14:44:07 +0000 Subject: [PATCH 49/56] Refactor: Rename tiles to slice_indicies and update related logic --- src/caked/Transforms/transforms.py | 6 +-- src/caked/Transforms/utils.py | 85 ------------------------------ src/caked/base.py | 4 +- src/caked/dataloader.py | 63 ++++++++++++---------- 4 files changed, 41 insertions(+), 117 deletions(-) diff --git a/src/caked/Transforms/transforms.py b/src/caked/Transforms/transforms.py index c6df624..4afdfe2 100644 --- a/src/caked/Transforms/transforms.py +++ b/src/caked/Transforms/transforms.py @@ -76,7 +76,7 @@ class DecomposeToSlices: def __init__(self, map_shape: tuple, **kwargs): step = kwargs.get("step", 1) cshape = kwargs.get("cshape", 1) - slices, tiles = [], [] + slices, slice_indicies = [], [] for i in range(0, map_shape[0], step): for j in range(0, map_shape[1], step): @@ -94,7 +94,7 @@ def __init__(self, map_shape: tuple, **kwargs): slice(k, k + cshape), ) ) - tiles.append((i, j, k)) + slice_indicies.append((i, j, k)) ishape = (i + cshape) - i jshape = (j + cshape) - j @@ -104,7 +104,7 @@ def __init__(self, map_shape: tuple, **kwargs): print(ishape, jshape, kshape) self.slices = slices - self.tiles = tiles + self.slice_indicies = slice_indicies class MapObjectVoxelNormalisation(TransformBase): diff --git a/src/caked/Transforms/utils.py b/src/caked/Transforms/utils.py index c579f71..78aae11 100644 --- a/src/caked/Transforms/utils.py +++ b/src/caked/Transforms/utils.py @@ -2,94 +2,9 @@ import math -import numpy as np from ccpem_utils.map.parse_mrcmapobj import MapObjHandle -def pad_map_grid_sample( - mapobj: MapObjHandle, - ext_dim: tuple, - inplace: bool = False, - fill_padding: float | None = None, - left: bool = True, -) -> MapObjHandle | None: - """Takes an input map object and pads it with zeros to the specified extent. - - :param mapobj: (MapObjHandle) map object to be padded - :param ext_dim: (tuple) the extent of the padding in each dimension (X, Y, Z) - :param inplace: (bool) whether to modify the input map object or return a new one - :param fill_padding: (float) value to fill the padding with - :param left: (bool) if there is an odd number of slices to pad, whether to pad more on the left or right - - :return: (MapObjHandle) the padded map object - """ - - def even_odd_split(n): - if n % 2 == 0: - return n // 2, n // 2 - - return n // 2, n - n // 2 - - nx, ny, nz = ext_dim[::-1] - nx1, nx2 = even_odd_split(nx) - ny1, ny2 = even_odd_split(ny) - nz1, nz2 = even_odd_split(nz) - - padded_array = pad_array_numpy( - mapobj.data, nx1, nx2, ny1, ny2, nz1, nz2, fill_padding=fill_padding, left=left - ) - # the start is at the base of the xyz grid - # I want to move the origin to the base of the padded grid - start = (nx1, ny1, nz1) - - ox = mapobj.origin[0] - start[0] * mapobj.apix[0] - oy = mapobj.origin[1] - start[1] * mapobj.apix[1] - oz = mapobj.origin[2] - start[2] * mapobj.apix[2] - nstx = mapobj.nstart[0] - start[0] - nsty = mapobj.nstart[1] - start[1] - nstz = mapobj.nstart[2] - start[2] - if not inplace: - newmap = mapobj.copy() - newmap.origin = (ox, oy, oz) - newmap.data = padded_array - newmap.nstart = (nstx, nsty, nstz) - newmap.update_header_by_data() - return newmap - - mapobj.origin = (ox, oy, oz) - mapobj.data = padded_array - mapobj.nstart = (nstx, nsty, nstz) - mapobj.update_header_by_data() - - return None - - -def pad_array_numpy(arr, nx1, nx2, ny1, ny2, nz1, nz2, fill_padding=None, left=True): - """ - - Pad an array with specified increments along each dimension. - Arguments: - *nx,ny,nz* - Number of slices to add to either sides of each dimension. - Return: - array - """ - - # the nx, ny, nz values should be the total number of slices to add, split as evenly as possible - - if not left: - nx1, nx2 = nx2, nx1 - ny1, ny2 = ny2, ny1 - nz1, nz2 = nz2, nz1 - - return np.pad( - arr, - ((nz1, nz2), (ny1, ny2), (nx1, nx2)), - mode="constant", - constant_values=fill_padding, - ) - - def mask_from_labelobj(label_mapobj: MapObjHandle): """ Create a mask from a label object, where the mask is a boolean array diff --git a/src/caked/base.py b/src/caked/base.py index 17a768c..3921431 100644 --- a/src/caked/base.py +++ b/src/caked/base.py @@ -112,5 +112,5 @@ class DatasetConfig: label_hdf5_store: HDF5DataStore | None = None weight_hdf5_store: HDF5DataStore | None = None slices: list[tuple[int, int, int]] | None = None - tiles = None - tiles_count: int = 0 + slice_indicies = None + slices_count: int = 0 diff --git a/src/caked/dataloader.py b/src/caked/dataloader.py index 5dd0cce..56e6008 100644 --- a/src/caked/dataloader.py +++ b/src/caked/dataloader.py @@ -595,7 +595,7 @@ def __init__( ) -> None: """ A dataset class for loading map data, alongside the corresponding class labels and weights. - The map data is loaded from the disk and is decomposed into a set of tiles. These tiles are + The map data is loaded from the disk and is decomposed into a set of slice_indicies. These slice_indicies are then returned when indexing the dataset. Args: @@ -606,7 +606,7 @@ def __init__( label_hdf5_store (Optional[HDF5DataStore]): The HDF5 store for the label data. Defaults to None. transforms (Optional[List[str]]): The transformations to apply to the data. augments (Optional[List[str]]): The augmentations to apply to the data. - decompose (bool): Whether to decompose the data into tiles. Defaults to True. + decompose (bool): Whether to decompose the data into slices and slice_indicies. Defaults to True. decompose_kwargs (Optional[Dict[str, int]]): The decomposition parameters. Defaults to None. transform_kwargs (Optional[Dict]): The transformation parameters. Defaults to None. @@ -617,8 +617,8 @@ def __init__( label_mapobj (Optional[MapObjHandle]): The map object handle for the label data. Defaults to None. weight_mapobj (Optional[MapObjHandle]): The map object handle for the weight data. Defaults to None. slices (Optional[List[Tuple]]): The slices of the data. Defaults to None. - tiles (Optional): The tiles of the data. Defaults to None. - tiles_count (int): The number of tiles. Defaults to 0. + slice_indicies (Optional): The slice_indicies of the data. Defaults to None. + slices_count (int): The number of slice_indicies. Defaults to 0. """ config = DatasetConfig() @@ -643,8 +643,8 @@ def __init__( "label_hdf5_store", config.label_hdf5_store ) self.slices: list = kwargs.get("slices", []) - self.tiles: list = kwargs.get("tiles", []) - self.tiles_count = kwargs.get("tiles_count", config.tiles_count) + self.slice_indicies: list = kwargs.get("slice_indicies", []) + self.slices_count = kwargs.get("slices_count", config.slices_count) self.transforms = kwargs.get("transforms", config.transforms) self.augments = kwargs.get("augments", config.augments) self.decompose_kwargs = kwargs.get("decompose_kwargs", config.decompose_kwargs) @@ -672,23 +672,23 @@ def __init__( ) def __len__(self): - if self.tiles_count == 0 and self.decompose: + if self.slices_count == 0 and self.decompose: self.generate_tile_indicies() - elif self.tiles_count == 0: - self.tiles_count = 1 + elif self.slices_count == 0: + self.slices_count = 1 - return self.tiles_count + return self.slices_count def __getitem__( self, idx ) -> tuple[torch.Tensor, torch.Tensor | None, torch.Tensor | None]: - if (not self.slices or not self.tiles) and self.decompose: + if (not self.slices or not self.slice_indicies) and self.decompose: self.generate_tile_indicies() - elif (not self.slices or not self.tiles) and not self.decompose: + elif (not self.slices or not self.slice_indicies) and not self.decompose: self.slices = [(slice(None), slice(None), slice(None))] else: self.slices = self.slices - self.tiles = self.tiles + self.slice_indicies = self.slice_indicies map_array = self.map_hdf5_store.get(f"{self.id}_map", to_torch=True) @@ -856,8 +856,8 @@ def generate_tile_indicies(self): ) self.slices = decompose.slices - self.tiles = decompose.tiles - self.tiles_count = len(self.tiles) + self.slice_indicies = decompose.slice_indicies + self.slices_count = len(self.slice_indicies) def _transform_keywords_builder(self): keywords = {} @@ -888,7 +888,16 @@ def _augment_keywords_builder(self): class ArrayDataset(AbstractDataset): - """Class to handle loading of data from hdf5 files, to be handled by a DataLoader""" + """Class to handle loading of data from hdf5 files, to be handled by a DataLoader + + Args: + dataset_id (str): The dataset ID. + data_array (np.ndarray): The data array. + label_array (np.ndarray, optional): The label array. Defaults to None. + weight_array (np.ndarray, optional): The weight array. Defaults to None. + + + """ def __init__( self, @@ -908,8 +917,8 @@ def __init__( self.weight_array = weight_array self.slices = kwargs.get("slices", config.slices) - self.tiles = kwargs.get("tiles", config.tiles) - self.tiles_count = kwargs.get("tiles_count", config.tiles_count) + self.slice_indicies = kwargs.get("slice_indicies", config.slice_indicies) + self.slices_count = kwargs.get("slices_count", config.slices_count) self.augments = kwargs.get("augments", config.augments) self.decompose = kwargs.get("decompose", config.decompose) self.data_shape: tuple | None = None @@ -933,21 +942,21 @@ def __init__( ) def __len__(self): - if self.tiles_count == 0 and self.decompose: + if self.slices_count == 0 and self.decompose: self.generate_tile_indicies() - elif self.tiles_count == 0: - self.tiles_count = 1 + elif self.slices_count == 0: + self.slices_count = 1 - return self.tiles_count + return self.slices_count def __getitem__(self, idx): - if (not self.slices or not self.tiles) and self.decompose: + if (not self.slices or not self.slice_indicies) and self.decompose: self.generate_tile_indicies() - elif (not self.slices or not self.tiles) and not self.decompose: + elif (not self.slices or not self.slice_indicies) and not self.decompose: self.slices = [(slice(None), slice(None), slice(None))] else: self.slices = self.slices - self.tiles = self.tiles + self.slice_indicies = self.slice_indicies if self.data_array is None: self.get_data() @@ -1074,5 +1083,5 @@ def generate_tile_indicies(self): ) self.slices = decompose.slices - self.tiles = decompose.tiles - self.tiles_count = len(self.tiles) + self.slice_indicies = decompose.slice_indicies + self.slices_count = len(self.slice_indicies) From b24d299607c46bcd0612123f402852b69fcfc296 Mon Sep 17 00:00:00 2001 From: hllelli2 Date: Wed, 9 Oct 2024 14:46:16 +0000 Subject: [PATCH 50/56] Refactor: Simplify iterator method in HDF5DataStore --- src/caked/hdf5.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/caked/hdf5.py b/src/caked/hdf5.py index 22e5d78..f4c395e 100644 --- a/src/caked/hdf5.py +++ b/src/caked/hdf5.py @@ -69,8 +69,7 @@ def __getitem__(self, key: str): def __iter__(self): with h5py.File(self.save_path, "r") as f: - for key in f: - yield key + yield from f def get(self, key: str, default=None, to_torch: bool = False): try: From 7d12eced31bc9722648072ec733603c2117223e3 Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Wed, 9 Oct 2024 16:42:03 +0000 Subject: [PATCH 51/56] Refactor: update default voxel parameter type and improve kwargs handling --- src/caked/Transforms/transforms.py | 5 ++--- src/caked/dataloader.py | 4 +++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/caked/Transforms/transforms.py b/src/caked/Transforms/transforms.py index c6df624..08b1c5e 100644 --- a/src/caked/Transforms/transforms.py +++ b/src/caked/Transforms/transforms.py @@ -124,7 +124,7 @@ def __call__( ) -> tuple[MapObjHandle, dict]: # This is needed to do the normalisation but I need to check if label obj is affected by this - vox = kwargs.get("vox", 1) + vox = kwargs.get("vox", 1.0) vox_min = kwargs.get("vox_min", 0.95) vox_max = kwargs.get("vox_max", 1.05) @@ -141,11 +141,10 @@ def __call__( if voxz > vox_max or voxz < vox_min: sample[0] = int(mapobj.dim[2] / vox) sample = tuple(sample) - interpolate_to_grid( mapobj, sample, - vox, + (vox, vox, vox), mapobj.origin, inplace=True, prefilter_input=mapobj.all_transforms, diff --git a/src/caked/dataloader.py b/src/caked/dataloader.py index 5dd0cce..f82d675 100644 --- a/src/caked/dataloader.py +++ b/src/caked/dataloader.py @@ -656,8 +656,10 @@ def __init__( self.label_mapobj: MapObjHandle | None = None self.weight_mapobj: MapObjHandle | None = None + cshape = kwargs.get("cshape", 32) + margin = kwargs.get("margin", 8) if self.decompose_kwargs is None: - self.decompose_kwargs = {"cshape": 32, "margin": 8} + self.decompose_kwargs = {"cshape": cshape, "margin": margin} if self.transform_kwargs is None: self.transform_kwargs = {} From 2f3253611c4d27fb1e384c430790d125d4ffdec2 Mon Sep 17 00:00:00 2001 From: Luc Elliott Date: Wed, 23 Oct 2024 12:54:57 +0000 Subject: [PATCH 52/56] Updated MapDataLoader to accepted the Mapdataset Kwargs in the load function. This allows any keywords to be applied to all MapDatasets. --- src/caked/Transforms/transforms.py | 10 +++------- src/caked/dataloader.py | 9 ++++++++- src/caked/utils.py | 10 +++++++--- tests/test_map_io.py | 16 ++++++++++++++++ 4 files changed, 34 insertions(+), 11 deletions(-) diff --git a/src/caked/Transforms/transforms.py b/src/caked/Transforms/transforms.py index fd5fdab..f91293e 100644 --- a/src/caked/Transforms/transforms.py +++ b/src/caked/Transforms/transforms.py @@ -96,13 +96,9 @@ def __init__(self, map_shape: tuple, **kwargs): ) slice_indicies.append((i, j, k)) - ishape = (i + cshape) - i - jshape = (j + cshape) - j - kshape = (k + cshape) - k - - if ishape != 32 or jshape != 32 or kshape != 32: - print(ishape, jshape, kshape) - + if len(slice_indicies) == 0: + msg = "No slices were generated, please check the step and cshape values." + raise ValueError(msg) self.slices = slices self.slice_indicies = slice_indicies diff --git a/src/caked/dataloader.py b/src/caked/dataloader.py index ae944e7..c00ac03 100644 --- a/src/caked/dataloader.py +++ b/src/caked/dataloader.py @@ -302,6 +302,7 @@ def load( weight_path: str | Path | None = None, use_gpu: bool = False, num_workers: int = 1, + **kwargs, ) -> None: """ Load the data from the specified path and data type. @@ -313,6 +314,7 @@ def load( weight_path (str | Path, optional): The path to the directory containing the weights. Defaults to None. multi_process (bool, optional): Whether to use multi-processing. Defaults to False. use_gpu (bool, optional): Whether to use the GPU. Defaults to False. + kwargs: Additional keyword arguments used for MapDataSet Returns: None @@ -379,6 +381,7 @@ def load( self.decompose, map_hdf5_store, label_hdf5_store, + **kwargs, ) self.dataset = ConcatDataset(datasets) @@ -669,9 +672,10 @@ def __init__( self.transforms = [] if self.transforms is None else self.transforms if not self.decompose_kwargs.get("step", False): - self.decompose_kwargs["step"] = self.decompose_kwargs.get("cshape", 1) - ( + step = self.decompose_kwargs.get("cshape", 1) - ( 2 * self.decompose_kwargs.get("margin") ) + self.decompose_kwargs["step"] = step if step != 0 else 1 def __len__(self): if self.slices_count == 0 and self.decompose: @@ -695,7 +699,10 @@ def __getitem__( map_array = self.map_hdf5_store.get(f"{self.id}_map", to_torch=True) if map_array.ndim == 4: + print(len(self.slices)) + print(idx) x_slice, y_slice, z_slice = self.slices[idx] + print(x_slice, y_slice, z_slice) map_slice = map_array[:, x_slice, y_slice, z_slice] else: map_slice = map_array[self.slices[idx]] diff --git a/src/caked/utils.py b/src/caked/utils.py index be23bdc..255852a 100644 --- a/src/caked/utils.py +++ b/src/caked/utils.py @@ -22,6 +22,7 @@ def process_datasets( decompose: bool, raw_map_HDF5: HDF5DataStore, label_HDF5: HDF5DataStore | None = None, + **kwargs, ): """ Process multiple datasets in parallel. @@ -52,6 +53,7 @@ def process_datasets( decompose, raw_map_HDF5, label_HDF5, + **kwargs, ) for path, label_path, weight_path in zip(paths, label_paths, weight_paths) ] @@ -81,6 +83,7 @@ def process_map_dataset( decompose: bool, map_hdf5: HDF5DataStore, label_hdf5: HDF5DataStore | None, + **kwargs, ): """ Process a single map dataset, applying transformations and augmentations, closes the map objects. @@ -109,15 +112,16 @@ def process_map_dataset( decompose=decompose, map_hdf5_store=map_hdf5, label_hdf5_store=label_hdf5, + **kwargs, ) map_dataset.transform(close_map_objects=False) map_dataset.augment(close_map_objects=False) result = { "map_data": map_dataset.mapobj.data, "label_data": map_dataset.label_mapobj.data if label_path is not None else None, - "weight_data": map_dataset.weight_mapobj.data - if weight_path is not None - else None, + "weight_data": ( + map_dataset.weight_mapobj.data if weight_path is not None else None + ), } map_dataset.close_map_objects() diff --git a/tests/test_map_io.py b/tests/test_map_io.py index e5af235..37230fa 100644 --- a/tests/test_map_io.py +++ b/tests/test_map_io.py @@ -98,6 +98,22 @@ def test_dataloader_load_to_HDF5_file(test_data_single_mrc_temp_dir): assert test_map_dataloader.dataset.datasets[0].map_hdf5_store.save_path.exists() +def test_dataloader_load_and_decompose(test_data_single_mrc_temp_dir): + test_map_dataloader = MapDataLoader() + test_map_dataloader.load( + datapath=test_data_single_mrc_temp_dir, + datatype=DATATYPE_MRC, + cshape=16, + ) + + assert test_map_dataloader is not None + assert isinstance(test_map_dataloader, MapDataLoader) + test_map_dataset = test_map_dataloader.dataset.datasets[0] + slice_ = test_map_dataset.__getitem__(0)[0] + + assert slice_.shape == (2, 16, 16, 16) + + def test_dataloader_load_to_HDF5_file_with_transforms(test_data_single_mrc_temp_dir): test_map_dataloader = MapDataLoader( transformations=TRANSFORM_ALL, From 1844ec7fd5a4c18db4f4df14cc04ede0c6e0d2ff Mon Sep 17 00:00:00 2001 From: hllelli2 Date: Wed, 23 Oct 2024 13:09:59 +0000 Subject: [PATCH 53/56] Fix: Change default value of 'vox' to a float and update related logic in MapObjectVoxelNormalisation --- src/caked/Transforms/transforms.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/caked/Transforms/transforms.py b/src/caked/Transforms/transforms.py index 4afdfe2..fd5fdab 100644 --- a/src/caked/Transforms/transforms.py +++ b/src/caked/Transforms/transforms.py @@ -124,7 +124,7 @@ def __call__( ) -> tuple[MapObjHandle, dict]: # This is needed to do the normalisation but I need to check if label obj is affected by this - vox = kwargs.get("vox", 1) + vox = kwargs.get("vox", 1.0) vox_min = kwargs.get("vox_min", 0.95) vox_max = kwargs.get("vox_max", 1.05) @@ -141,11 +141,10 @@ def __call__( if voxz > vox_max or voxz < vox_min: sample[0] = int(mapobj.dim[2] / vox) sample = tuple(sample) - interpolate_to_grid( mapobj, sample, - vox, + (vox, vox, vox), mapobj.origin, inplace=True, prefilter_input=mapobj.all_transforms, From b8d9c65a61f31d979bb06006d15331ddced119e8 Mon Sep 17 00:00:00 2001 From: hllelli2 Date: Thu, 24 Oct 2024 10:08:47 +0000 Subject: [PATCH 54/56] Cleanup: remove debug print statements from MapDataset class --- src/caked/dataloader.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/caked/dataloader.py b/src/caked/dataloader.py index c00ac03..e4f1487 100644 --- a/src/caked/dataloader.py +++ b/src/caked/dataloader.py @@ -699,10 +699,7 @@ def __getitem__( map_array = self.map_hdf5_store.get(f"{self.id}_map", to_torch=True) if map_array.ndim == 4: - print(len(self.slices)) - print(idx) x_slice, y_slice, z_slice = self.slices[idx] - print(x_slice, y_slice, z_slice) map_slice = map_array[:, x_slice, y_slice, z_slice] else: map_slice = map_array[self.slices[idx]] From d975d445c8e7a3369bb96a50ee0dd30eed8c2496 Mon Sep 17 00:00:00 2001 From: aj26git Date: Tue, 10 Dec 2024 13:49:22 +0000 Subject: [PATCH 55/56] update pyproject.toml with dependency version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 3d363e4..9e45763 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,7 +28,7 @@ classifiers = [ "Typing :: Typed", ] -dependencies = ["torch", "numpy", "pandas", "mrcfile", "torchvision", "scipy", "pyarrow", "ccpem-utils", "h5py", "psutil"] +dependencies = ["torch", "numpy", "pandas", "mrcfile", "torchvision", "scipy ~= 1.9.3", "pyarrow", "ccpem-utils", "h5py", "psutil", "pillow ~= 9.3"] [project.optional-dependencies] test = [ From dc23c5c8136e4aaed0577f88a6adbd7297a142e0 Mon Sep 17 00:00:00 2001 From: Luc Date: Thu, 13 Mar 2025 13:17:40 +0000 Subject: [PATCH 56/56] Enhance MapDataLoader: Add background filtering to skip slices with excessive background. Added check to see if train is set to false, if so, no augmentation is applied. --- src/caked/base.py | 1 + src/caked/dataloader.py | 46 ++++++++++++++++++++++++++++++++++++++++- src/caked/utils.py | 36 ++++++++++++++++++++++++++++++++ 3 files changed, 82 insertions(+), 1 deletion(-) diff --git a/src/caked/base.py b/src/caked/base.py index 3921431..3c2cb8a 100644 --- a/src/caked/base.py +++ b/src/caked/base.py @@ -114,3 +114,4 @@ class DatasetConfig: slices: list[tuple[int, int, int]] | None = None slice_indicies = None slices_count: int = 0 + diff --git a/src/caked/dataloader.py b/src/caked/dataloader.py index e4f1487..1392acb 100644 --- a/src/caked/dataloader.py +++ b/src/caked/dataloader.py @@ -27,6 +27,7 @@ get_max_memory, get_sorted_paths, process_datasets, + find_background_slices_to_skip, ) try: @@ -302,6 +303,7 @@ def load( weight_path: str | Path | None = None, use_gpu: bool = False, num_workers: int = 1, + background_filter: float | None = None, **kwargs, ) -> None: """ @@ -337,7 +339,7 @@ def load( map_hdf5_store = HDF5DataStore( datapath.joinpath("raw_map_data.h5"), cache=cache, - ) # TODO: cache size should be a parameter and 40 is for my own testing + ) label_hdf5_store = ( HDF5DataStore(label_path.joinpath("label_data.h5"), cache=cache) @@ -393,6 +395,11 @@ def load( ] self.classes = np.unique(np.concatenate(unique_labels).flatten()).tolist() + if background_filter is not None: + self.filter_slices_under_background_limit( + self.classes, background_limit=background_filter + ) + def process(self): """ """ raise NotImplementedError() @@ -472,6 +479,41 @@ def get_loader( shuffle=True, ) + def filter_slices_under_background_limit( + self, + class_labels, + background_limit: float = 0.3, + ): + """ + Find the slices in the dataloader that contain only background and remove them. + + :param class_label_handler: Class label handler + + :return: Empty tile + """ + to_skip = find_background_slices_to_skip( + self, + class_labels, + background_limit=background_limit, + ) + + for dataset in self.dataset.datasets: + if dataset.id in to_skip: + dataset.slice_indicies = [ + tile + for i, tile in enumerate(dataset.slice_indicies) + if i not in to_skip[dataset.id] + ] + + dataset.slices = [ + slice_ + for i, slice_ in enumerate(dataset.slices) + if i not in to_skip[dataset.id] + ] + dataset.slices_count = len(dataset.slice_indicies) + + self.dataset.cumulative_sizes = self.dataset.cumsum(self.dataset.datasets) + class DiskDataset(AbstractDataset): """ @@ -649,6 +691,8 @@ def __init__( self.slice_indicies: list = kwargs.get("slice_indicies", []) self.slices_count = kwargs.get("slices_count", config.slices_count) self.transforms = kwargs.get("transforms", config.transforms) + if not config.train: + self.augments = None self.augments = kwargs.get("augments", config.augments) self.decompose_kwargs = kwargs.get("decompose_kwargs", config.decompose_kwargs) self.transform_kwargs = kwargs.get("transform_kwargs", config.transform_kwargs) diff --git a/src/caked/utils.py b/src/caked/utils.py index 255852a..17adb1a 100644 --- a/src/caked/utils.py +++ b/src/caked/utils.py @@ -12,6 +12,9 @@ from caked.Wrappers import none_return_none +DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + def process_datasets( num_workers: int, paths: list[str], @@ -282,3 +285,36 @@ def get_max_memory() -> int: mem_info = psutil.virtual_memory() max_memory_gb = mem_info.total / (1024**3) # Convert bytes to GB return int(max_memory_gb // 1) + + + + +def find_background_slices_to_skip( + dataloader, + class_labels, + background_limit: float = 0.3, +) -> None: + to_skip = {} + + for dataset in dataloader.dataset.datasets: + counts_tensor = torch.zeros(len(class_labels), dtype=torch.int32, device=DEVICE) + for index in range(len(dataset)): + _, label_tensor = dataset[index] + label_tensor = label_tensor.to(DEVICE) + + label_tensor = label_tensor.flatten().type(torch.int64) + + if label_tensor.numel() == 0: + continue + counts_tensor.zero_() + counts_tensor.scatter_add_( + 0, label_tensor, torch.ones_like(label_tensor, dtype=torch.int32) + ) + + total = label_tensor.size(0) + background_counts = (counts_tensor[0] / total).item() + + if background_counts > background_limit: + if dataset.id not in to_skip: + to_skip[dataset.id] = [] + to_skip[dataset.id].append(index)