From d12bc10324f52b69684da4b97c19df817630ea8d Mon Sep 17 00:00:00 2001 From: Elias Baumann Date: Wed, 12 Feb 2025 16:23:43 +0100 Subject: [PATCH 1/3] added czi support (preliminary) --- src/data_utils.py | 114 +++++++++++++++++++++++++++++++++++--- src/post_process_utils.py | 18 +++--- 2 files changed, 116 insertions(+), 16 deletions(-) diff --git a/src/data_utils.py b/src/data_utils.py index b09dc3b..750c341 100644 --- a/src/data_utils.py +++ b/src/data_utils.py @@ -10,6 +10,7 @@ from src.constants import LUT_MAGNIFICATION_MPP, LUT_MAGNIFICATION_X from shutil import copy2, copytree import os +from pylibCZIrw import czi as pyczi def copy_img(im_path, cache_dir): @@ -90,6 +91,89 @@ def center_crop(t, croph, cropw): return t[..., starth : starth + croph, startw : startw + cropw] +class czi_wrapper: + def __init__(self, path, levels=11, sharpen_img=True): + """ + Wrapper to load czi files without openslide, but with the same endpoints + + Parameters + ------------ + path: str + Path to the wsi (.czi) + levels: int, optional + number of artificially created levels + sharpen_img: bool, optional + whether to sharpen the image (cohort dependent) + + Examples + ----------- + Use as a replacement for openslide.open_slide: + >>> sl = czi_wrapper(path) + >>> sl.read_region(...) + """ + self.path = path + self.levels = levels + self.sharpen_img = sharpen_img + self.level_dimensions = None + self.level_downsamples = None + self.properties = {} + self.associated_images = {} + try: + self._generate_dictionaries() + except: + raise RuntimeError(f"issue with {self.path}") + + @staticmethod + def _convert_rect_to_tuple(rect): + return rect.x, rect.y, rect.w, rect.h + + @staticmethod + def _sharpen(img_o): + img_b = cv2.GaussianBlur(img_o, ksize=[3, 3], sigmaX=1, sigmaY=1) + img_s = cv2.addWeighted(img_o, 3.0, img_b, -2.0, 0) + return img_s + + def _generate_dictionaries(self): + with pyczi.open_czi(self.path) as sl: + total_bounding_rectangle = sl.total_bounding_rectangle + meta = sl.metadata["ImageDocument"]["Metadata"] + + self.associated_images["thumbnail"] = PIL.Image.fromarray( + cv2.cvtColor(sl.read(zoom=0.005), cv2.COLOR_BGR2RGB) + ) + + x, y, w, h = self._convert_rect_to_tuple(total_bounding_rectangle) + self.level_dimensions = tuple( + (int(w / (2**i)), int(h / (2.0**i))) for i in range(self.levels) + ) + self.level_downsamples = tuple(2.0**i for i in range(self.levels)) + mpp = { + m["@Id"]: float(m["Value"]) * 1e6 + for m in meta["Scaling"]["Items"]["Distance"] + } + self.properties["openslide.mpp-x"] = mpp["X"] + self.properties["openslide.mpp-y"] = mpp["Y"] + self.tx = x + self.ty = y + + def read_region(self, crds, level, size): + with pyczi.open_czi(self.path) as sl: + img = sl.read( + # plane={"T": 0, "Z": 0, "C": 0}, + zoom=1.0 / (2**level), + roi=( + self.tx + crds[0], + self.ty + crds[1], + size[0] * (2**level), + size[1] * (2**level), + ), + ) + + if self.sharpen_img: + img = self._sharpen(img) + return cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + + # Adapted from https://github.com/christianabbet/SRA # Original Author: Christian Abbet class WholeSlideDataset(Dataset): @@ -148,12 +232,23 @@ def __init__( """ extension = pathlib.Path(path).suffix - if extension != ".svs" and extension != ".mrxs" and extension != ".tif": - raise NotImplementedError("Only *.svs, *.tif and *.mrxs files supported") + if ( + extension != ".svs" + and extension != ".mrxs" + and extension != ".tif" + and extension != ".czi" + ): + raise NotImplementedError( + "Only *.svs, *.tif, *.czi, and *.mrxs files supported" + ) # Load and create slide and affect default values self.path = path - self.s = openslide.open_slide(self.path) + self.s = ( + openslide.open_slide(self.path) + if extension != ".czi" + else czi_wrapper(self.path) + ) self.crop_sizes_px = crop_sizes_px self.crop_magnifications = crop_magnifications self.transform = transform @@ -258,10 +353,15 @@ def _pil_rgba2rgb( """ if default_background is None: default_background = (255, 255, 255) - - image.load() - background = PIL.Image.new("RGB", image.size, default_background) - background.paste(image, mask=image.split()[3]) + if type(image) == np.ndarray: + if image.shape[-1] == 3: + return image + else: + return cv2.cvtColor(image, cv2.COLOR_RGBA2RGB) + else: + image.load() + background = PIL.Image.new("RGB", image.size, default_background) + background.paste(image, mask=image.split()[3]) return background @staticmethod diff --git a/src/post_process_utils.py b/src/post_process_utils.py index 50cfaa8..df4edb2 100644 --- a/src/post_process_utils.py +++ b/src/post_process_utils.py @@ -423,7 +423,7 @@ def get_wsi(wsi_path, read_ds=32, pannuke=False, tile_size=256, padding_factor=0 padding_factor=padding_factor, ratio_object_thresh=0.0001, ) - sl = openslide.open_slide(wsi_path) + sl = ws_ds.s # openslide.open_slide(wsi_path) sl_info = get_openslide_info(sl) target_level = np.argwhere(np.isclose(sl_info["level_downsamples"], read_ds)).item() ds_coord = ws_ds.crop_metadatas[0] @@ -630,9 +630,9 @@ def get_shapes(params, nclasses): print("getting coords:") ds_coord = dataset.crop_metadatas[0][:, 2:4].copy() try: - with openslide.open_slide(params["p"]) as sl: - bounds_x = int(sl.properties["openslide.bounds-x"]) # 158208 - bounds_y = int(sl.properties["openslide.bounds-y"]) # 28672 + sl = dataset.s + bounds_x = int(sl.properties["openslide.bounds-x"]) # 158208 + bounds_y = int(sl.properties["openslide.bounds-y"]) # 28672 except KeyError: bounds_x = 0 bounds_y = 0 @@ -640,17 +640,17 @@ def get_shapes(params, nclasses): ds_coord -= np.array([bounds_x, bounds_y]) ccrop = int(tile_size * padding_factor) - rel_res = np.isclose(dataset.mpp, LUT_MAGNIFICATION_MPP, rtol=0.05) + rel_res = np.isclose(dataset.mpp, LUT_MAGNIFICATION_MPP, rtol=0.2) if sum(rel_res) != 1: raise NotImplementedError( "Currently no support for images with this resolution. Check src.constants in LUT_MAGNIFICATION_MPP and LUT_MAGNIFICATION_X to add the resultion - downsampling pair" ) else: ds_factor = LUT_MAGNIFICATION_X[rel_res.argmax()] / level - if ds_factor < 1: - raise NotImplementedError( - "The specified model does not support images at this resolution. Consider supplying a higher resolution image" - ) + # if ds_factor < 1: + # raise NotImplementedError( + # "The specified model does not support images at this resolution. Consider supplying a higher resolution image" + # ) ds_coord /= ds_factor ds_coord += (tile_size - ccrop) // 2 From c8171d8679662ce207b27ea54c7550b46f171d53 Mon Sep 17 00:00:00 2001 From: eliasbaumann Date: Wed, 9 Apr 2025 09:27:45 +0200 Subject: [PATCH 2/3] czi works on SurGen --- .gitignore | 4 + README.md | 273 +++++++++++++++++++++++++++-------------------------- 2 files changed, 141 insertions(+), 136 deletions(-) diff --git a/.gitignore b/.gitignore index 3681401..16614f9 100644 --- a/.gitignore +++ b/.gitignore @@ -207,3 +207,7 @@ sample_cls.bmp sample_cls.jpg sample_he.jpg testo.sh +debug.py +direct_cpu_pp.sh +run_inference_container_jc.sh +sample_analysis.ipynb diff --git a/README.md b/README.md index 157a7b1..c21eb51 100644 --- a/README.md +++ b/README.md @@ -1,136 +1,137 @@ -# HoVer-NeXt Inference -HoVer-NeXt is a fast and efficient nuclei segmentation and classification pipeline. - -Supported are a variety of data formats, including all OpenSlide supported datatypes, `.npy` numpy array dumps, and common image formats such as JPEG and PNG. -If you are having trouble with using this repository, please create an issue and we will be happy to help! - -For training code, please check the [hover-next training repository](https://github.com/digitalpathologybern/hover_next_train) - -Find the Publication here: [https://openreview.net/pdf?id=3vmB43oqIO](https://openreview.net/pdf?id=3vmB43oqIO) - -## Setup - -Environments for train and inference are the same so if you already have set the environment up for training, you can use it for inference as well. - -Otherwise: - -```bash -conda env create -f environment.yml -conda activate hovernext -pip install torch==2.1.1 torchvision==0.16.1 --index-url https://download.pytorch.org/whl/cu118 -``` - -or use predefined [docker/singularity container](#docker-and-apptainersingularity-container) - -## Model Weights - -Weights are hosted on [Zenodo](https://zenodo.org/records/10635618) -By specifying one of the ID's listed, weights are **automatically** downloaded and loaded. - -| Dataset | ID | Weights | -|--------------|--------|-----| -| Lizard-Mitosis | "lizard_convnextv2_large" | [Large](https://zenodo.org/records/10635618/files/lizard_convnextv2_large.zip?download=1) | -| | "lizard_convnextv2_base" |[Base](https://zenodo.org/records/10635618/files/lizard_convnextv2_base.zip?download=1) | -| | "lizard_convnextv2_tiny" |[Tiny](https://zenodo.org/records/10635618/files/lizard_convnextv2_tiny.zip?download=1) | -| PanNuke | "pannuke_convnextv2_tiny_1" | [Tiny Fold 1](https://zenodo.org/records/10635618/files/pannuke_convnextv2_tiny_1.zip?download=1) | -| | "pannuke_convnextv2_tiny_2" | [Tiny Fold 2](https://zenodo.org/records/10635618/files/pannuke_convnextv2_tiny_2.zip?download=1) | -| | "pannuke_convnextv2_tiny_3" | [Tiny Fold 3](https://zenodo.org/records/10635618/files/pannuke_convnextv2_tiny_3.zip?download=1) | - -If you are manually downloading weights, unzip them in the directory, such that the folder (e.g. ```lizard_convnextv2_large```) sits in the same directory as ```main.py```. - -## WSI Inference - -This pipeline uses OpenSlide to read images, and therefore supports all formats which are supported by OpenSlide. -If you want to run this pipeline on custom ome.tif files, ensure that the necessary metadata such as resolution, downsampling and dimensions are available. -Before running a slide, choose [appropriate parameters for your machine](#optimizing-inference-for-your-machine) - -To run a single slide: - -```bash -python3 main.py \ - --input "/path-to-wsi/wsi.svs" \ - --output_root "results/" \ - --cp "lizard_convnextv2_large" \ - --tta 4 \ - --inf_workers 16 \ - --pp_tiling 10 \ - --pp_workers 16 -``` - -To run multiple slides, specify a glob pattern such as `"/path-to-folder/*.mrxs"` or provide a list of paths as a `.txt` file. - -### Slurm - -if you are running on a slurm cluster you might consider separating pre and post-processing to improve GPU utilization. -Use the `--only_inference` parameter and submit another job on with the same parameters, but removing the `--only_inference`. - -## NPY / Image inference - -NPY and image inference works the same as WSI inference, however output files are only a ZARR array. - -```bash -python3 main.py \ - --input "/path-to-file/file.npy" \ - --output_root "/results/" \ - --cp "lizard_convnextv2_large" \ - --tta 4 \ - --inf_workers 16 \ - --pp_tiling 10 \ - --pp_workers 16 -``` - -Support for other datatypes are easy to implement. Check the NPYDataloader for reference. - -## Optimizing inference for your machine: - -1. WSI is on the machine or on a fast access network location -2. If you have multiple machines, e.g. CPU-only machines, you can move post-processing to that machine -3. '--tta 4' yields robust results with very high speed -4. '--inf_workers' should be set to the number of available cores -5. '--pp_workers' should be set to number of available cores -1, with '--pp_tiling' set to a low number where the machine does not run OOM. E.g. on a 16-Core machine, '--pp_workers 16 --pp_tiling 8 is good. If you are running out of memory, increase --pp_tiling. - -## Using the output files for downstream analysis: - -By default, the pipeline produces an instance-map, a class-lookup with centroids and a number of .tsv files to load in QuPath. -sample_analysis.ipynb shows exemplarily how to use the files. - -## Docker and Apptainer/Singularity Container: - -Download the singularity image from [Zenodo](https://zenodo.org/records/10649470/files/hover_next.sif) - -```bash -# don't forget to mount your local directory -export APPTAINER_BINDPATH="/storage" -apptainer exec --nv /path-to-container/hover_next.sif \ - python3 /path-to-repo/main.py \ - --input "/path-to-wsi/*.svs" \ - --output_root "results/" \ - --cp "lizard_convnextv2_large" \ - --tta 4 -``` -# License - -This repository is licensed under GNU General Public License v3.0 (See License Info). -If you are intending to use this repository for commercial usecases, please check the licenses of all python packages referenced in the Setup section / described in the requirements.txt and environment.yml. - -# Citation - -If you are using this code, please cite: -``` -@inproceedings{baumann2024hover, - title={HoVer-NeXt: A Fast Nuclei Segmentation and Classification Pipeline for Next Generation Histopathology}, - author={Baumann, Elias and Dislich, Bastian and Rumberger, Josef Lorenz and Nagtegaal, Iris D and Martinez, Maria Rodriguez and Zlobec, Inti}, - booktitle={Medical Imaging with Deep Learning}, - year={2024} -} -``` -and -``` -@INPROCEEDINGS{rumberger2022panoptic, - author={Rumberger, Josef Lorenz and Baumann, Elias and Hirsch, Peter and Janowczyk, Andrew and Zlobec, Inti and Kainmueller, Dagmar}, - booktitle={2022 IEEE International Symposium on Biomedical Imaging Challenges (ISBIC)}, - title={Panoptic segmentation with highly imbalanced semantic labels}, - year={2022}, - pages={1-4}, - doi={10.1109/ISBIC56247.2022.9854551}} -``` +# HoVer-NeXt Inference +HoVer-NeXt is a fast and efficient nuclei segmentation and classification pipeline. + +Supported are a variety of data formats, including all OpenSlide supported datatypes, `.npy` numpy array dumps, and common image formats such as JPEG and PNG. +If you are having trouble with using this repository, please create an issue and we will be happy to help! + +For training code, please check the [hover-next training repository](https://github.com/digitalpathologybern/hover_next_train) + +Find the Publication here: [https://openreview.net/pdf?id=3vmB43oqIO](https://openreview.net/pdf?id=3vmB43oqIO) + +## Setup + +Environments for train and inference are the same so if you already have set the environment up for training, you can use it for inference as well. + +Otherwise: + +```bash +conda env create -f environment.yml +conda activate hovernext +pip install torch==2.1.1 torchvision==0.16.1 --index-url https://download.pytorch.org/whl/cu118 +``` + +or use predefined [docker/singularity container](#docker-and-apptainersingularity-container) + +## Model Weights + +Weights are hosted on [Zenodo](https://zenodo.org/records/10635618) +By specifying one of the ID's listed, weights are **automatically** downloaded and loaded. + +| Dataset | ID | Weights | +|--------------|--------|-----| +| Lizard-Mitosis | "lizard_convnextv2_large" | [Large](https://zenodo.org/records/10635618/files/lizard_convnextv2_large.zip?download=1) | +| | "lizard_convnextv2_base" |[Base](https://zenodo.org/records/10635618/files/lizard_convnextv2_base.zip?download=1) | +| | "lizard_convnextv2_tiny" |[Tiny](https://zenodo.org/records/10635618/files/lizard_convnextv2_tiny.zip?download=1) | +| PanNuke | "pannuke_convnextv2_tiny_1" | [Tiny Fold 1](https://zenodo.org/records/10635618/files/pannuke_convnextv2_tiny_1.zip?download=1) | +| | "pannuke_convnextv2_tiny_2" | [Tiny Fold 2](https://zenodo.org/records/10635618/files/pannuke_convnextv2_tiny_2.zip?download=1) | +| | "pannuke_convnextv2_tiny_3" | [Tiny Fold 3](https://zenodo.org/records/10635618/files/pannuke_convnextv2_tiny_3.zip?download=1) | + +If you are manually downloading weights, unzip them in the directory, such that the folder (e.g. ```lizard_convnextv2_large```) sits in the same directory as ```main.py```. + +## WSI Inference + +This pipeline uses OpenSlide to read images, and therefore supports all formats which are supported by OpenSlide. +If you want to run this pipeline on custom ome.tif files, ensure that the necessary metadata such as resolution, downsampling and dimensions are available. +Additionally, czi is is supported via pylibCZIrw. +Before running a slide, choose [appropriate parameters for your machine](#optimizing-inference-for-your-machine) + +To run a single slide: + +```bash +python3 main.py \ + --input "/path-to-wsi/wsi.svs" \ + --output_root "results/" \ + --cp "lizard_convnextv2_large" \ + --tta 4 \ + --inf_workers 16 \ + --pp_tiling 10 \ + --pp_workers 16 +``` + +To run multiple slides, specify a glob pattern such as `"/path-to-folder/*.mrxs"` or provide a list of paths as a `.txt` file. + +### Slurm + +if you are running on a slurm cluster you might consider separating pre and post-processing to improve GPU utilization. +Use the `--only_inference` parameter and submit another job on with the same parameters, but removing the `--only_inference`. + +## NPY / Image inference + +NPY and image inference works the same as WSI inference, however output files are only a ZARR array. + +```bash +python3 main.py \ + --input "/path-to-file/file.npy" \ + --output_root "/results/" \ + --cp "lizard_convnextv2_large" \ + --tta 4 \ + --inf_workers 16 \ + --pp_tiling 10 \ + --pp_workers 16 +``` + +Support for other datatypes are easy to implement. Check the NPYDataloader for reference. + +## Optimizing inference for your machine: + +1. WSI is on the machine or on a fast access network location +2. If you have multiple machines, e.g. CPU-only machines, you can move post-processing to that machine +3. '--tta 4' yields robust results with very high speed +4. '--inf_workers' should be set to the number of available cores +5. '--pp_workers' should be set to number of available cores -1, with '--pp_tiling' set to a low number where the machine does not run OOM. E.g. on a 16-Core machine, '--pp_workers 16 --pp_tiling 8 is good. If you are running out of memory, increase --pp_tiling. + +## Using the output files for downstream analysis: + +By default, the pipeline produces an instance-map, a class-lookup with centroids and a number of .tsv files to load in QuPath. +sample_analysis.ipynb shows exemplarily how to use the files. + +## Docker and Apptainer/Singularity Container: + +Download the singularity image from [Zenodo](https://zenodo.org/records/10649470/files/hover_next.sif) + +```bash +# don't forget to mount your local directory +export APPTAINER_BINDPATH="/storage" +apptainer exec --nv /path-to-container/hover_next.sif \ + python3 /path-to-repo/main.py \ + --input "/path-to-wsi/*.svs" \ + --output_root "results/" \ + --cp "lizard_convnextv2_large" \ + --tta 4 +``` +# License + +This repository is licensed under GNU General Public License v3.0 (See License Info). +If you are intending to use this repository for commercial usecases, please check the licenses of all python packages referenced in the Setup section / described in the requirements.txt and environment.yml. + +# Citation + +If you are using this code, please cite: +``` +@inproceedings{baumann2024hover, + title={HoVer-NeXt: A Fast Nuclei Segmentation and Classification Pipeline for Next Generation Histopathology}, + author={Baumann, Elias and Dislich, Bastian and Rumberger, Josef Lorenz and Nagtegaal, Iris D and Martinez, Maria Rodriguez and Zlobec, Inti}, + booktitle={Medical Imaging with Deep Learning}, + year={2024} +} +``` +and +``` +@INPROCEEDINGS{rumberger2022panoptic, + author={Rumberger, Josef Lorenz and Baumann, Elias and Hirsch, Peter and Janowczyk, Andrew and Zlobec, Inti and Kainmueller, Dagmar}, + booktitle={2022 IEEE International Symposium on Biomedical Imaging Challenges (ISBIC)}, + title={Panoptic segmentation with highly imbalanced semantic labels}, + year={2022}, + pages={1-4}, + doi={10.1109/ISBIC56247.2022.9854551}} +``` From acc4f5ceb79d0bec830243b35f4504c9d693f31f Mon Sep 17 00:00:00 2001 From: eliasbaumann Date: Wed, 9 Apr 2025 09:31:01 +0200 Subject: [PATCH 3/3] updated requirements --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 52238ba..e95b2a5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -25,4 +25,5 @@ toml numcodecs imagecodecs timm==0.9.6 -geojson \ No newline at end of file +geojson +pylibCZIrw \ No newline at end of file