From d12bc10324f52b69684da4b97c19df817630ea8d Mon Sep 17 00:00:00 2001
From: Elias Baumann <elias.baumann@pathology.unibe.ch>
Date: Wed, 12 Feb 2025 16:23:43 +0100
Subject: [PATCH 1/3] added czi support (preliminary)

---
 src/data_utils.py         | 114 +++++++++++++++++++++++++++++++++++---
 src/post_process_utils.py |  18 +++---
 2 files changed, 116 insertions(+), 16 deletions(-)

diff --git a/src/data_utils.py b/src/data_utils.py
index b09dc3b..750c341 100644
--- a/src/data_utils.py
+++ b/src/data_utils.py
@@ -10,6 +10,7 @@
 from src.constants import LUT_MAGNIFICATION_MPP, LUT_MAGNIFICATION_X
 from shutil import copy2, copytree
 import os
+from pylibCZIrw import czi as pyczi
 
 
 def copy_img(im_path, cache_dir):
@@ -90,6 +91,89 @@ def center_crop(t, croph, cropw):
     return t[..., starth : starth + croph, startw : startw + cropw]
 
 
+class czi_wrapper:
+    def __init__(self, path, levels=11, sharpen_img=True):
+        """
+        Wrapper to load czi files without openslide, but with the same endpoints
+
+        Parameters
+        ------------
+        path: str
+            Path to the wsi (.czi)
+        levels: int, optional
+            number of artificially created levels
+        sharpen_img: bool, optional
+            whether to sharpen the image (cohort dependent)
+
+        Examples
+        -----------
+        Use as a replacement for openslide.open_slide:
+        >>> sl = czi_wrapper(path)
+        >>> sl.read_region(...)
+        """
+        self.path = path
+        self.levels = levels
+        self.sharpen_img = sharpen_img
+        self.level_dimensions = None
+        self.level_downsamples = None
+        self.properties = {}
+        self.associated_images = {}
+        try:
+            self._generate_dictionaries()
+        except:
+            raise RuntimeError(f"issue with {self.path}")
+
+    @staticmethod
+    def _convert_rect_to_tuple(rect):
+        return rect.x, rect.y, rect.w, rect.h
+
+    @staticmethod
+    def _sharpen(img_o):
+        img_b = cv2.GaussianBlur(img_o, ksize=[3, 3], sigmaX=1, sigmaY=1)
+        img_s = cv2.addWeighted(img_o, 3.0, img_b, -2.0, 0)
+        return img_s
+
+    def _generate_dictionaries(self):
+        with pyczi.open_czi(self.path) as sl:
+            total_bounding_rectangle = sl.total_bounding_rectangle
+            meta = sl.metadata["ImageDocument"]["Metadata"]
+
+            self.associated_images["thumbnail"] = PIL.Image.fromarray(
+                cv2.cvtColor(sl.read(zoom=0.005), cv2.COLOR_BGR2RGB)
+            )
+
+        x, y, w, h = self._convert_rect_to_tuple(total_bounding_rectangle)
+        self.level_dimensions = tuple(
+            (int(w / (2**i)), int(h / (2.0**i))) for i in range(self.levels)
+        )
+        self.level_downsamples = tuple(2.0**i for i in range(self.levels))
+        mpp = {
+            m["@Id"]: float(m["Value"]) * 1e6
+            for m in meta["Scaling"]["Items"]["Distance"]
+        }
+        self.properties["openslide.mpp-x"] = mpp["X"]
+        self.properties["openslide.mpp-y"] = mpp["Y"]
+        self.tx = x
+        self.ty = y
+
+    def read_region(self, crds, level, size):
+        with pyczi.open_czi(self.path) as sl:
+            img = sl.read(
+                # plane={"T": 0, "Z": 0, "C": 0},
+                zoom=1.0 / (2**level),
+                roi=(
+                    self.tx + crds[0],
+                    self.ty + crds[1],
+                    size[0] * (2**level),
+                    size[1] * (2**level),
+                ),
+            )
+
+        if self.sharpen_img:
+            img = self._sharpen(img)
+        return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+
+
 # Adapted from  https://github.com/christianabbet/SRA
 # Original Author: Christian Abbet
 class WholeSlideDataset(Dataset):
@@ -148,12 +232,23 @@ def __init__(
         """
 
         extension = pathlib.Path(path).suffix
-        if extension != ".svs" and extension != ".mrxs" and extension != ".tif":
-            raise NotImplementedError("Only *.svs, *.tif and *.mrxs files supported")
+        if (
+            extension != ".svs"
+            and extension != ".mrxs"
+            and extension != ".tif"
+            and extension != ".czi"
+        ):
+            raise NotImplementedError(
+                "Only *.svs, *.tif, *.czi, and *.mrxs files supported"
+            )
 
         # Load and create slide and affect default values
         self.path = path
-        self.s = openslide.open_slide(self.path)
+        self.s = (
+            openslide.open_slide(self.path)
+            if extension != ".czi"
+            else czi_wrapper(self.path)
+        )
         self.crop_sizes_px = crop_sizes_px
         self.crop_magnifications = crop_magnifications
         self.transform = transform
@@ -258,10 +353,15 @@ def _pil_rgba2rgb(
         """
         if default_background is None:
             default_background = (255, 255, 255)
-
-        image.load()
-        background = PIL.Image.new("RGB", image.size, default_background)
-        background.paste(image, mask=image.split()[3])
+        if type(image) == np.ndarray:
+            if image.shape[-1] == 3:
+                return image
+            else:
+                return cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
+        else:
+            image.load()
+            background = PIL.Image.new("RGB", image.size, default_background)
+            background.paste(image, mask=image.split()[3])
         return background
 
     @staticmethod
diff --git a/src/post_process_utils.py b/src/post_process_utils.py
index 50cfaa8..df4edb2 100644
--- a/src/post_process_utils.py
+++ b/src/post_process_utils.py
@@ -423,7 +423,7 @@ def get_wsi(wsi_path, read_ds=32, pannuke=False, tile_size=256, padding_factor=0
         padding_factor=padding_factor,
         ratio_object_thresh=0.0001,
     )
-    sl = openslide.open_slide(wsi_path)
+    sl = ws_ds.s  # openslide.open_slide(wsi_path)
     sl_info = get_openslide_info(sl)
     target_level = np.argwhere(np.isclose(sl_info["level_downsamples"], read_ds)).item()
     ds_coord = ws_ds.crop_metadatas[0]
@@ -630,9 +630,9 @@ def get_shapes(params, nclasses):
         print("getting coords:")
         ds_coord = dataset.crop_metadatas[0][:, 2:4].copy()
         try:
-            with openslide.open_slide(params["p"]) as sl:
-                bounds_x = int(sl.properties["openslide.bounds-x"])  # 158208
-                bounds_y = int(sl.properties["openslide.bounds-y"])  # 28672
+            sl = dataset.s
+            bounds_x = int(sl.properties["openslide.bounds-x"])  # 158208
+            bounds_y = int(sl.properties["openslide.bounds-y"])  # 28672
         except KeyError:
             bounds_x = 0
             bounds_y = 0
@@ -640,17 +640,17 @@ def get_shapes(params, nclasses):
         ds_coord -= np.array([bounds_x, bounds_y])
 
         ccrop = int(tile_size * padding_factor)
-        rel_res = np.isclose(dataset.mpp, LUT_MAGNIFICATION_MPP, rtol=0.05)
+        rel_res = np.isclose(dataset.mpp, LUT_MAGNIFICATION_MPP, rtol=0.2)
         if sum(rel_res) != 1:
             raise NotImplementedError(
                 "Currently no support for images with this resolution. Check src.constants in LUT_MAGNIFICATION_MPP and LUT_MAGNIFICATION_X to add the resultion - downsampling pair"
             )
         else:
             ds_factor = LUT_MAGNIFICATION_X[rel_res.argmax()] / level
-            if ds_factor < 1:
-                raise NotImplementedError(
-                    "The specified model does not support images at this resolution. Consider supplying a higher resolution image"
-                )
+            # if ds_factor < 1:
+            #     raise NotImplementedError(
+            #         "The specified model does not support images at this resolution. Consider supplying a higher resolution image"
+            #     )
             ds_coord /= ds_factor
 
         ds_coord += (tile_size - ccrop) // 2

From c8171d8679662ce207b27ea54c7550b46f171d53 Mon Sep 17 00:00:00 2001
From: eliasbaumann <elias.baumann@unibe.ch>
Date: Wed, 9 Apr 2025 09:27:45 +0200
Subject: [PATCH 2/3] czi works on SurGen

---
 .gitignore |   4 +
 README.md  | 273 +++++++++++++++++++++++++++--------------------------
 2 files changed, 141 insertions(+), 136 deletions(-)

diff --git a/.gitignore b/.gitignore
index 3681401..16614f9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -207,3 +207,7 @@ sample_cls.bmp
 sample_cls.jpg
 sample_he.jpg
 testo.sh
+debug.py
+direct_cpu_pp.sh
+run_inference_container_jc.sh
+sample_analysis.ipynb
diff --git a/README.md b/README.md
index 157a7b1..c21eb51 100644
--- a/README.md
+++ b/README.md
@@ -1,136 +1,137 @@
-# HoVer-NeXt Inference
-HoVer-NeXt is a fast and efficient nuclei segmentation and classification pipeline. 
-
-Supported are a variety of data formats, including all OpenSlide supported datatypes, `.npy` numpy array dumps, and common image formats such as JPEG and PNG.
-If you are having trouble with using this repository, please create an issue and we will be happy to help!
-
-For training code, please check the [hover-next training repository](https://github.com/digitalpathologybern/hover_next_train)
-
-Find the Publication here: [https://openreview.net/pdf?id=3vmB43oqIO](https://openreview.net/pdf?id=3vmB43oqIO)
-
-## Setup
-
-Environments for train and inference are the same so if you already have set the environment up for training, you can use it for inference as well.
-
-Otherwise: 
-
-```bash
-conda env create -f environment.yml
-conda activate hovernext
-pip install torch==2.1.1 torchvision==0.16.1 --index-url https://download.pytorch.org/whl/cu118
-```
-
-or use predefined [docker/singularity container](#docker-and-apptainersingularity-container)
-
-## Model Weights
-
-Weights are hosted on [Zenodo](https://zenodo.org/records/10635618)
-By specifying one of the ID's listed, weights are **automatically** downloaded and loaded. 
-
-| Dataset      | ID | Weights |
-|--------------|--------|-----|
-| Lizard-Mitosis |   "lizard_convnextv2_large"   | [Large](https://zenodo.org/records/10635618/files/lizard_convnextv2_large.zip?download=1) |
-|  | "lizard_convnextv2_base" |[Base](https://zenodo.org/records/10635618/files/lizard_convnextv2_base.zip?download=1)      |
-|  | "lizard_convnextv2_tiny" |[Tiny](https://zenodo.org/records/10635618/files/lizard_convnextv2_tiny.zip?download=1)      |
-| PanNuke | "pannuke_convnextv2_tiny_1"  | [Tiny Fold 1](https://zenodo.org/records/10635618/files/pannuke_convnextv2_tiny_1.zip?download=1) |
-|   | "pannuke_convnextv2_tiny_2"  | [Tiny Fold 2](https://zenodo.org/records/10635618/files/pannuke_convnextv2_tiny_2.zip?download=1) |
-|   | "pannuke_convnextv2_tiny_3"  | [Tiny Fold 3](https://zenodo.org/records/10635618/files/pannuke_convnextv2_tiny_3.zip?download=1) |
-
-If you are manually downloading weights, unzip them in the directory, such that the folder (e.g. ```lizard_convnextv2_large```) sits in the same directory as ```main.py```.
-
-## WSI Inference
-
-This pipeline uses OpenSlide to read images, and therefore supports all formats which are supported by OpenSlide. 
-If you want to run this pipeline on custom ome.tif files, ensure that the necessary metadata such as resolution, downsampling and dimensions are available.
-Before running a slide, choose [appropriate parameters for your machine](#optimizing-inference-for-your-machine)
-
-To run a single slide:
-
-```bash
-python3 main.py \
-    --input "/path-to-wsi/wsi.svs" \
-    --output_root "results/" \
-    --cp "lizard_convnextv2_large" \
-    --tta 4 \
-    --inf_workers 16 \
-    --pp_tiling 10 \
-    --pp_workers 16
-```
-
-To run multiple slides, specify a glob pattern such as `"/path-to-folder/*.mrxs"` or provide a list of paths as a `.txt` file.
-
-### Slurm
-
-if you are running on a slurm cluster you might consider separating pre and post-processing to improve GPU utilization.
-Use the `--only_inference` parameter and submit another job on with the same parameters, but removing the `--only_inference`.
-
-## NPY / Image inference
-
-NPY and image inference works the same as WSI inference, however output files are only a ZARR array.
-
-```bash
-python3 main.py \
-    --input "/path-to-file/file.npy" \
-    --output_root "/results/" \
-    --cp "lizard_convnextv2_large" \
-    --tta 4 \
-    --inf_workers 16 \
-    --pp_tiling 10 \
-    --pp_workers 16
-```
-
-Support for other datatypes are easy to implement. Check the NPYDataloader for reference.
-
-## Optimizing inference for your machine:
-
-1. WSI is on the machine or on a fast access network location
-2. If you have multiple machines, e.g. CPU-only machines, you can move post-processing to that machine
-3. '--tta 4' yields robust results with very high speed
-4. '--inf_workers' should be set to the number of available cores
-5. '--pp_workers' should be set to number of available cores -1, with '--pp_tiling' set to a low number where the machine does not run OOM. E.g. on a 16-Core machine, '--pp_workers 16 --pp_tiling 8 is good. If you are running out of memory, increase --pp_tiling.
-
-## Using the output files for downstream analysis:
-
-By default, the pipeline produces an instance-map, a class-lookup with centroids and a number of .tsv files to load in QuPath.
-sample_analysis.ipynb shows exemplarily how to use the files.
-
-## Docker and Apptainer/Singularity Container:
-
-Download the singularity image from [Zenodo](https://zenodo.org/records/10649470/files/hover_next.sif)
-
-```bash
-# don't forget to mount your local directory
-export APPTAINER_BINDPATH="/storage"
-apptainer exec --nv /path-to-container/hover_next.sif \
-    python3 /path-to-repo/main.py \
-    --input "/path-to-wsi/*.svs" \
-    --output_root "results/" \
-	--cp "lizard_convnextv2_large" \
-    --tta 4 
-```
-# License
-
-This repository is licensed under GNU General Public License v3.0 (See License Info).
-If you are intending to use this repository for commercial usecases, please check the licenses of all python packages referenced in the Setup section / described in the requirements.txt and environment.yml.
-
-# Citation
-
-If you are using this code, please cite:
-```
-@inproceedings{baumann2024hover,
-  title={HoVer-NeXt: A Fast Nuclei Segmentation and Classification Pipeline for Next Generation Histopathology},
-  author={Baumann, Elias and Dislich, Bastian and Rumberger, Josef Lorenz and Nagtegaal, Iris D and Martinez, Maria Rodriguez and Zlobec, Inti},
-  booktitle={Medical Imaging with Deep Learning},
-  year={2024}
-}
-```
-and
-```
-@INPROCEEDINGS{rumberger2022panoptic,
-  author={Rumberger, Josef Lorenz and Baumann, Elias and Hirsch, Peter and Janowczyk, Andrew and Zlobec, Inti and Kainmueller, Dagmar},
-  booktitle={2022 IEEE International Symposium on Biomedical Imaging Challenges (ISBIC)}, 
-  title={Panoptic segmentation with highly imbalanced semantic labels}, 
-  year={2022},
-  pages={1-4},
-  doi={10.1109/ISBIC56247.2022.9854551}}
-```
+# HoVer-NeXt Inference
+HoVer-NeXt is a fast and efficient nuclei segmentation and classification pipeline. 
+
+Supported are a variety of data formats, including all OpenSlide supported datatypes, `.npy` numpy array dumps, and common image formats such as JPEG and PNG.
+If you are having trouble with using this repository, please create an issue and we will be happy to help!
+
+For training code, please check the [hover-next training repository](https://github.com/digitalpathologybern/hover_next_train)
+
+Find the Publication here: [https://openreview.net/pdf?id=3vmB43oqIO](https://openreview.net/pdf?id=3vmB43oqIO)
+
+## Setup
+
+Environments for train and inference are the same so if you already have set the environment up for training, you can use it for inference as well.
+
+Otherwise: 
+
+```bash
+conda env create -f environment.yml
+conda activate hovernext
+pip install torch==2.1.1 torchvision==0.16.1 --index-url https://download.pytorch.org/whl/cu118
+```
+
+or use predefined [docker/singularity container](#docker-and-apptainersingularity-container)
+
+## Model Weights
+
+Weights are hosted on [Zenodo](https://zenodo.org/records/10635618)
+By specifying one of the ID's listed, weights are **automatically** downloaded and loaded. 
+
+| Dataset      | ID | Weights |
+|--------------|--------|-----|
+| Lizard-Mitosis |   "lizard_convnextv2_large"   | [Large](https://zenodo.org/records/10635618/files/lizard_convnextv2_large.zip?download=1) |
+|  | "lizard_convnextv2_base" |[Base](https://zenodo.org/records/10635618/files/lizard_convnextv2_base.zip?download=1)      |
+|  | "lizard_convnextv2_tiny" |[Tiny](https://zenodo.org/records/10635618/files/lizard_convnextv2_tiny.zip?download=1)      |
+| PanNuke | "pannuke_convnextv2_tiny_1"  | [Tiny Fold 1](https://zenodo.org/records/10635618/files/pannuke_convnextv2_tiny_1.zip?download=1) |
+|   | "pannuke_convnextv2_tiny_2"  | [Tiny Fold 2](https://zenodo.org/records/10635618/files/pannuke_convnextv2_tiny_2.zip?download=1) |
+|   | "pannuke_convnextv2_tiny_3"  | [Tiny Fold 3](https://zenodo.org/records/10635618/files/pannuke_convnextv2_tiny_3.zip?download=1) |
+
+If you are manually downloading weights, unzip them in the directory, such that the folder (e.g. ```lizard_convnextv2_large```) sits in the same directory as ```main.py```.
+
+## WSI Inference
+
+This pipeline uses OpenSlide to read images, and therefore supports all formats which are supported by OpenSlide. 
+If you want to run this pipeline on custom ome.tif files, ensure that the necessary metadata such as resolution, downsampling and dimensions are available. 
+Additionally, czi is is supported via pylibCZIrw.
+Before running a slide, choose [appropriate parameters for your machine](#optimizing-inference-for-your-machine)
+
+To run a single slide:
+
+```bash
+python3 main.py \
+    --input "/path-to-wsi/wsi.svs" \
+    --output_root "results/" \
+    --cp "lizard_convnextv2_large" \
+    --tta 4 \
+    --inf_workers 16 \
+    --pp_tiling 10 \
+    --pp_workers 16
+```
+
+To run multiple slides, specify a glob pattern such as `"/path-to-folder/*.mrxs"` or provide a list of paths as a `.txt` file.
+
+### Slurm
+
+if you are running on a slurm cluster you might consider separating pre and post-processing to improve GPU utilization.
+Use the `--only_inference` parameter and submit another job on with the same parameters, but removing the `--only_inference`.
+
+## NPY / Image inference
+
+NPY and image inference works the same as WSI inference, however output files are only a ZARR array.
+
+```bash
+python3 main.py \
+    --input "/path-to-file/file.npy" \
+    --output_root "/results/" \
+    --cp "lizard_convnextv2_large" \
+    --tta 4 \
+    --inf_workers 16 \
+    --pp_tiling 10 \
+    --pp_workers 16
+```
+
+Support for other datatypes are easy to implement. Check the NPYDataloader for reference.
+
+## Optimizing inference for your machine:
+
+1. WSI is on the machine or on a fast access network location
+2. If you have multiple machines, e.g. CPU-only machines, you can move post-processing to that machine
+3. '--tta 4' yields robust results with very high speed
+4. '--inf_workers' should be set to the number of available cores
+5. '--pp_workers' should be set to number of available cores -1, with '--pp_tiling' set to a low number where the machine does not run OOM. E.g. on a 16-Core machine, '--pp_workers 16 --pp_tiling 8 is good. If you are running out of memory, increase --pp_tiling.
+
+## Using the output files for downstream analysis:
+
+By default, the pipeline produces an instance-map, a class-lookup with centroids and a number of .tsv files to load in QuPath.
+sample_analysis.ipynb shows exemplarily how to use the files.
+
+## Docker and Apptainer/Singularity Container:
+
+Download the singularity image from [Zenodo](https://zenodo.org/records/10649470/files/hover_next.sif)
+
+```bash
+# don't forget to mount your local directory
+export APPTAINER_BINDPATH="/storage"
+apptainer exec --nv /path-to-container/hover_next.sif \
+    python3 /path-to-repo/main.py \
+    --input "/path-to-wsi/*.svs" \
+    --output_root "results/" \
+	--cp "lizard_convnextv2_large" \
+    --tta 4 
+```
+# License
+
+This repository is licensed under GNU General Public License v3.0 (See License Info).
+If you are intending to use this repository for commercial usecases, please check the licenses of all python packages referenced in the Setup section / described in the requirements.txt and environment.yml.
+
+# Citation
+
+If you are using this code, please cite:
+```
+@inproceedings{baumann2024hover,
+  title={HoVer-NeXt: A Fast Nuclei Segmentation and Classification Pipeline for Next Generation Histopathology},
+  author={Baumann, Elias and Dislich, Bastian and Rumberger, Josef Lorenz and Nagtegaal, Iris D and Martinez, Maria Rodriguez and Zlobec, Inti},
+  booktitle={Medical Imaging with Deep Learning},
+  year={2024}
+}
+```
+and
+```
+@INPROCEEDINGS{rumberger2022panoptic,
+  author={Rumberger, Josef Lorenz and Baumann, Elias and Hirsch, Peter and Janowczyk, Andrew and Zlobec, Inti and Kainmueller, Dagmar},
+  booktitle={2022 IEEE International Symposium on Biomedical Imaging Challenges (ISBIC)}, 
+  title={Panoptic segmentation with highly imbalanced semantic labels}, 
+  year={2022},
+  pages={1-4},
+  doi={10.1109/ISBIC56247.2022.9854551}}
+```

From acc4f5ceb79d0bec830243b35f4504c9d693f31f Mon Sep 17 00:00:00 2001
From: eliasbaumann <elias.baumann@unibe.ch>
Date: Wed, 9 Apr 2025 09:31:01 +0200
Subject: [PATCH 3/3] updated requirements

---
 requirements.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 52238ba..e95b2a5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -25,4 +25,5 @@ toml
 numcodecs
 imagecodecs
 timm==0.9.6
-geojson
\ No newline at end of file
+geojson
+pylibCZIrw
\ No newline at end of file