Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
58 commits
Select commit Hold shift + click to select a range
6ffb8e3
Create Dockerfile for MSK SMIT Lung GTV
locastre Mar 10, 2025
90e36a1
1st draft Dockerfile
locastre Mar 10, 2025
4de5a52
Create README.md
locastre Mar 15, 2025
54f36b7
draft SMITrunner.py
locastre Mar 15, 2025
84ffd42
add src files
locastre Mar 15, 2025
903d37c
add utils
locastre Mar 15, 2025
d38cfe4
Add files via upload
locastre Mar 15, 2025
1802f70
Add files via upload
locastre Mar 15, 2025
8aa9989
Update Dockerfile
locastre Mar 15, 2025
ec99930
mkdir for conda-pack
locastre Mar 15, 2025
483c92a
add default.yml
locastre Mar 15, 2025
5aa9531
add config.json
locastre Mar 15, 2025
874da7e
Rename config.json to meta.json
locastre Mar 15, 2025
5ccb70c
Update meta.json
locastre Mar 17, 2025
20b0960
Update meta.json
locastre Mar 17, 2025
d5733f6
Update meta.json
locastre Mar 17, 2025
d346ec2
Update meta.json
locastre Mar 17, 2025
28a2adf
Update meta.json
locastre Mar 17, 2025
c20d489
Update Dockerfile
locastre Mar 17, 2025
523ea46
Update SMITrunner.py
locastre Mar 17, 2025
caac4e0
Update Dockerfile with venv python
locastre Apr 7, 2025
bbd2e0a
Create requirements.txt
locastre Apr 7, 2025
382c076
Update Dockerfile requirements path
locastre Apr 7, 2025
12f40f2
Update Dockerfile set BASH shell
locastre Apr 8, 2025
20998fe
Update meta.json with draft submission comments
locastre Apr 10, 2025
7eb13a4
Create mhub.toml
locastre Apr 10, 2025
bf7adaa
Update meta.json
locastre Apr 10, 2025
02b2ec9
Update meta.json
locastre Apr 10, 2025
a78a589
Update mhub.toml Zenodo URL
locastre Apr 10, 2025
1687fd0
Update meta.json rename seg output to LUNG+NEOPLASM_MALIGNANT_PRIMARY
locastre Apr 10, 2025
a108233
Update SMITrunner.py name of output segmentation to LUNG+NEOPLASM_MAL…
locastre Apr 16, 2025
f07cac6
Rename SMITrunner.py to SMITRunner.py
locastre Apr 17, 2025
c8de4c8
Update default.yml
locastre Apr 17, 2025
2187d35
Update default.yml NiftiConverter parameters
locastre Apr 18, 2025
e715f91
Create bash_run_SMIT_mhub.sh
locastre Apr 18, 2025
c828416
Update SMITRunner.py
locastre Apr 18, 2025
afe4fd0
Update bash_run_SMIT_mhub.sh dos2unix line endings
locastre Apr 18, 2025
84a6e0f
Update SMITRunner.py
locastre Apr 18, 2025
0d3f5ad
Delete models/msk_smit_lung_gtv/src/bash_run_SMIT_mhub.sh
locastre Apr 18, 2025
7f3582c
Delete models/msk_smit_lung_gtv/src/bash_run_SMIT_Segmentation.sh
locastre Apr 18, 2025
897c275
Update default.yml fix trailing typo in seg output name
locastre Apr 18, 2025
1a53325
Update Dockerfile
locastre Apr 18, 2025
5542fe0
Create get_weights.sh
locastre Apr 18, 2025
cd456eb
Update Dockerfile run script to get weights
locastre Apr 18, 2025
08ffd52
Update mhub.toml
locastre Apr 18, 2025
4b1b3aa
Update meta.json
locastre Apr 18, 2025
46b907e
Update meta.json
locastre Apr 18, 2025
a70c892
Update Dockerfile
locastre Apr 18, 2025
13ecac0
Update meta.json correct tag to uri
locastre Apr 18, 2025
2ffd74e
Update mhub.toml with Zenodo data from IDC
locastre Apr 21, 2025
07bcd5a
Update meta.json remove overly long table
locastre Apr 21, 2025
1ecec35
Update Dockerfile explicit install instruction for torch 1.12.1+cu116
locastre Apr 22, 2025
7e66021
Update get_weights.sh
locastre Apr 23, 2025
4c619d6
Update default.yml
locastre Apr 23, 2025
01e781f
Update mhub.toml doi
locastre Apr 23, 2025
5deffe4
Update mhub.toml link to Zenodo test data, 3x the charm
locastre Apr 23, 2025
f564a6a
Update default.yml with sample input/output file structure
locastre Apr 24, 2025
a0c04ba
Update default.yml
locastre Apr 24, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions models/msk_smit_lung_gtv/config/default.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
general:
data_base_dir: /app/data
version: 1.0.0
description: Default configuration for SMIT model (dicom to dicom)

execute:
- DicomImporter
- NiftiConverter
- SMITRunner
- DsegConverter
- DataOrganizer

modules:
DicomImporter:
source_dir: input_data
import_dir: sorted_data
sort_data: true
meta:
mod: '%Modality'

NiftiConverter:
engine: dcm2niix

DsegConverter:
model_name: SMIT
body_part_examined: CHEST
source_segs: nifti:mod=seg
skip_empty_slices: true

DataOrganizer:
targets:
- dicomseg:mod=seg-->[i:sid]/msk_smit_lung_gtv.seg.dcm

sample:
input:
dicom/: Folder with DICOM files of one or more CT scans.
output:
1.3.6.1.4.1.14519.5.2.1.7311.5101.160028252338004527274326500702/msk_smit_lung_gtv.seg.dcm: The DICOM SEG file with Lung GTV segmentation (arbitrary series ID foldername).
22 changes: 22 additions & 0 deletions models/msk_smit_lung_gtv/dockerfiles/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
FROM mhubai/base:latest

# Update authors label
LABEL authors="jiangj1@mskcc.org,aptea@mskcc.org,deasyj@mskcc.org,iyera@mskcc.org,locastre@mskcc.org"

SHELL ["/bin/bash", "-c"]

RUN apt update

ARG MHUB_MODELS_REPO
ENV MODEL_NAME=msk_smit_lung_gtv
RUN buildutils/import_mhub_model.sh msk_smit_lung_gtv ${MHUB_MODELS_REPO}

RUN source /app/models/msk_smit_lung_gtv/src/get_weights.sh

RUN uv venv --python-preference only-managed -p 3.9 .venv39
RUN uv pip install -n -p .venv39 --extra-index-url https://download.pytorch.org/whl/cu116 torch==1.12.1+cu116
RUN uv pip install -n -p .venv39 simpleitk==2.2.1 nibabel==4.0.2 monai==0.8.0 timm==0.6.11 ml-collections==0.1.1 einops==0.8.1 scikit-image==0.19.3 Cmake imagecodecs monai==0.8.0 torchaudio==0.12.1 pytorch-ignite==0.4.8
RUN uv pip install -n -p .venv39 numpy==1.23.4

ENTRYPOINT ["mhub.run"]
CMD ["--config", "/app/models/msk_smit_lung_gtv/config/default.yml"]
118 changes: 118 additions & 0 deletions models/msk_smit_lung_gtv/meta.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
{
"id": "",
"name": "msk_smit_lung_gtv",
"title": "Self-supervised 3D segmentation using self-distilled masked image transformer for Lung GTV Segmentation",
"summary": {
"description": "A Lung GTV segmentation model, fine-tuned from a foundation model pretrained with 10K CT scans",
"inputs": [
{
"label": "Input Image",
"description": "The CT scan of a patient.",
"format": "NIFTI",
"modality": "CT",
"bodypartexamined": "Chest",
"slicethickness": "5mm",
"contrast": true,
"non-contrast": true
}
],
"outputs": [
{
"label": "Segmentation",
"description": "Segmentation of the lung GTV for input CT images.",
"type": "Segmentation",
"classes": [
"LUNG+NEOPLASM_MALIGNANT_PRIMARY"
]
}
],
"model": {
"architecture": "Swin3D Transformer",
"training": "supervised",
"cmpapproach": "3D"
},
"data": {
"training": {
"vol_samples": 377
},
"evaluation": {
"vol_samples": 139
},
"public": true,
"external": false
}
},
"details": {
"name": "Self-supervised 3D anatomy segmentation using self-distilled masked image transformer (SMIT)",
"version": "1.0.0",
"devteam": "",
"authors": ["Jue Jiang, Harini Veeraraghavan"],
"type": "it is a 3D Swin transformer based segmentation net, which was pretrained with 10K CT data and then finetuned for Lung GTV Segmentation",
"date": {
"code": "11.03.2025",
"weights": "11.03.2025",
"pub": "15.07.2024"
},
"cite": "Jiang, Jue, and Harini Veeraraghavan. Self-supervised pretraining in the wild imparts image acquisition robustness to medical image transformers: an application to lung cancer segmentation. Proceedings of machine learning research 250 (2024): 708.",
"license": {
"code": "GNU General Public License",
"weights": "GNU General Public License"
},
"publications": [
{
"title": "Self-supervised pretraining in the wild imparts image acquisition robustness to medical image transformers: an application to lung cancer segmentation",
"uri": "https://openreview.net/pdf?id=G9Te2IevNm"
},
{
"title":"Self-supervised 3D anatomy segmentation using self-distilled masked image transformer (SMIT)",
"uri":"https://link.springer.com/chapter/10.1007/978-3-031-16440-8_53"
}
],
"github": "https://github.com/The-Veeraraghavan-Lab/CTRobust_Transformers.git"
},
"info": {
"use": {
"title": "Intended use",
"text": "This model is intended to be used on CT images (with or without contrast)",
"references": [],
"tables": []

},
"evaluation": {
"title": "Evaluation data",
"text": "To assess the model's segmentation performance in the NSCLC Radiogenomics dataset, we considered that the original input data is a full 3D volume. The model segmented not only the labeled tumor but also tumors that were not manually annotated. Therefore, we evaluated the model based on the manually labeled tumors. After applying the segmentation model, we extracted a 128*128*128 cubic region containing the manual segmentation to assess the model’s performance.",
"references": [],
"tables": [],
"limitations": "The model might produce minor false positives but this could be easilily removed by post-processing such as constrain the tumor segmentation only in lung slices"
},
"training": {
"title": "Training data",
"text": "Training data was from 377 data in the TCIA NSCLC-Radiomics data, references: Aerts, H. J. W. L., Wee, L., Rios Velazquez, E., Leijenaar, R. T. H., Parmar, C., Grossmann, P., Carvalho, S., Bussink, J., Monshouwer, R., Haibe-Kains, B., Rietveld, D., Hoebers, F., Rietbergen, M. M., Leemans, C. R., Dekker, A., Quackenbush, J., Gillies, R. J., Lambin, P. (2014). Data From NSCLC-Radiomics (version 4) [Data set]. The Cancer Imaging Archive."
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add references using the references: [] key, the format is described here.


},
"analyses": {
"title": "Evaluation",
"text": "Evaluation was determined with DICE score, See the paper (Methods, Section 4.2, section on Experiments and evaluation metrics, and Results 5.1, Table 2 for additional details.",
"references": [
{
"label": "Self-supervised pretraining in the wild imparts image acquisition robustness to medical image transformers: an application to lung cancer segmentation",
"uri": "https://proceedings.mlr.press/v250/jiang24b.html"
}
],
"tables": [
{
"label": "Dice scores",
"entries": {
"From Scratch": "0.54 ±0.31",
"This model": "0.69 ±0.18"
}
}

]
},
"limitations": {
"title": "Limitations",
"text": "The model might produce minor false positives but this could be easilily removed by post-processing such as constrain the tumor segmentation only in lung slices"
}
}
}
2 changes: 2 additions & 0 deletions models/msk_smit_lung_gtv/mhub.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[model.deployment]
test = "https://zenodo.org/records/15270887/files/msk_smit_lung_gtv.test.zip"
4 changes: 4 additions & 0 deletions models/msk_smit_lung_gtv/src/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
## References
[1] Jiang, Jue, and Harini Veeraraghavan. "Self-supervised pretraining in the wild imparts image acquisition robustness to medical image transformers: an application to lung cancer segmentation." In Medical Imaging with Deep Learning. 2024.

[2] Jiang, Jue, Neelam Tyagi, Kathryn Tringale, Christopher Crane, and Harini Veeraraghavan. "Self-supervised 3D anatomy segmentation using self-distilled masked image transformer (SMIT)." In International Conference on Medical Image Computing and Computer-Assisted Intervention, pp. 556-566. Cham: Springer Nature Switzerland, 2022.
186 changes: 186 additions & 0 deletions models/msk_smit_lung_gtv/src/edit_inference_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
# Copyright (c) MONAI Consortium
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Any, Callable, List, Sequence, Tuple, Union

import torch
import torch.nn.functional as F

from monai.data.utils import compute_importance_map, dense_patch_slices, get_valid_patch_size
from monai.utils import BlendMode, PytorchPadMode, fall_back_tuple, look_up_option, optional_import

import time

tqdm, _ = optional_import("tqdm", name="tqdm")

__all__ = ["sliding_window_inference"]


def sliding_window_inference(
inputs: torch.Tensor,
roi_size: Union[Sequence[int], int],
sw_batch_size: int,
predictor: Callable[..., torch.Tensor],
overlap: float = 0.25,
mode: Union[BlendMode, str] = BlendMode.CONSTANT,
sigma_scale: Union[Sequence[float], float] = 0.125,
padding_mode: Union[PytorchPadMode, str] = PytorchPadMode.CONSTANT,
cval: float = 0.0,
sw_device: Union[torch.device, str, None] = None,
device: Union[torch.device, str, None] = None,
*args: Any,
**kwargs: Any,
) -> torch.Tensor:
"""
Sliding window inference on `inputs` with `predictor`.

When roi_size is larger than the inputs' spatial size, the input image are padded during inference.
To maintain the same spatial sizes, the output image will be cropped to the original input size.

Args:
inputs: input image to be processed (assuming NCHW[D])
roi_size: the spatial window size for inferences.
When its components have None or non-positives, the corresponding inputs dimension will be used.
if the components of the `roi_size` are non-positive values, the transform will use the
corresponding components of img size. For example, `roi_size=(32, -1)` will be adapted
to `(32, 64)` if the second spatial dimension size of img is `64`.
sw_batch_size: the batch size to run window slices.
predictor: given input tensor `patch_data` in shape NCHW[D], `predictor(patch_data)`
should return a prediction with the same spatial shape and batch_size, i.e. NMHW[D];
where HW[D] represents the patch spatial size, M is the number of output channels, N is `sw_batch_size`.
overlap: Amount of overlap between scans.
mode: {``"constant"``, ``"gaussian"``}
How to blend output of overlapping windows. Defaults to ``"constant"``.

- ``"constant``": gives equal weight to all predictions.
- ``"gaussian``": gives less weight to predictions on edges of windows.

sigma_scale: the standard deviation coefficient of the Gaussian window when `mode` is ``"gaussian"``.
Default: 0.125. Actual window sigma is ``sigma_scale`` * ``dim_size``.
When sigma_scale is a sequence of floats, the values denote sigma_scale at the corresponding
spatial dimensions.
padding_mode: {``"constant"``, ``"reflect"``, ``"replicate"``, ``"circular"``}
Padding mode for ``inputs``, when ``roi_size`` is larger than inputs. Defaults to ``"constant"``
See also: https://pytorch.org/docs/stable/nn.functional.html#pad
cval: fill value for 'constant' padding mode. Default: 0
sw_device: device for the window data.
By default the device (and accordingly the memory) of the `inputs` is used.
Normally `sw_device` should be consistent with the device where `predictor` is defined.
device: device for the stitched output prediction.
By default the device (and accordingly the memory) of the `inputs` is used. If for example
set to device=torch.device('cpu') the gpu memory consumption is less and independent of the
`inputs` and `roi_size`. Output is on the `device`.
args: optional args to be passed to ``predictor``.
kwargs: optional keyword args to be passed to ``predictor``.

Note:
- input must be channel-first and have a batch dim, supports N-D sliding window.

"""
num_spatial_dims = len(inputs.shape) - 2
if overlap < 0 or overlap >= 1:
raise AssertionError("overlap must be >= 0 and < 1.")

# determine image spatial size and batch size
# Note: all input images must have the same image size and batch size
image_size_ = list(inputs.shape[2:])
batch_size = inputs.shape[0]

if device is None:
device = inputs.device
if sw_device is None:
sw_device = inputs.device

roi_size = fall_back_tuple(roi_size, image_size_)
# in case that image size is smaller than roi size
image_size = tuple(max(image_size_[i], roi_size[i]) for i in range(num_spatial_dims))
pad_size = []
for k in range(len(inputs.shape) - 1, 1, -1):
diff = max(roi_size[k - 2] - inputs.shape[k], 0)
half = diff // 2
pad_size.extend([half, diff - half])
inputs = F.pad(inputs, pad=pad_size, mode=look_up_option(padding_mode, PytorchPadMode).value, value=cval)

scan_interval = _get_scan_interval(image_size, roi_size, num_spatial_dims, overlap)

# Store all slices in list
slices = dense_patch_slices(image_size, roi_size, scan_interval)
num_win = len(slices) # number of windows per image
total_slices = num_win * batch_size # total number of windows

# Create window-level importance map
importance_map = compute_importance_map(
get_valid_patch_size(image_size, roi_size), mode=mode, sigma_scale=sigma_scale, device=device
)
importance_map=importance_map.cpu()
# Perform predictions
output_image, count_map = torch.tensor(0.0, device=device), torch.tensor(0.0, device=device)
_initialized = False
for slice_g in range(0, total_slices, sw_batch_size):
slice_range = range(slice_g, min(slice_g + sw_batch_size, total_slices))
unravel_slice = [
[slice(int(idx / num_win), int(idx / num_win) + 1), slice(None)] + list(slices[idx % num_win])
for idx in slice_range
]
window_data = torch.cat([inputs[win_slice] for win_slice in unravel_slice]).to(sw_device)
seg_prob = predictor(window_data, *args, **kwargs).to(device) # batched patch segmentation

if not _initialized: # init. buffer at the first iteration
output_classes = seg_prob.shape[1]
output_shape = [batch_size, output_classes] + list(image_size)
# allocate memory to store the full output and the count for overlapping parts
#output_image = torch.zeros(output_shape, dtype=torch.float32, device=device)
#count_map = torch.zeros(output_shape, dtype=torch.float32, device=device)

output_image = torch.zeros(output_shape, dtype=torch.float32, device='cpu')
count_map = torch.zeros(output_shape, dtype=torch.float32, device='cpu')

_initialized = True

# store the result in the proper location of the full output. Apply weights from importance map.
for idx, original_idx in zip(slice_range, unravel_slice):
output_image[original_idx] += importance_map * seg_prob[idx - slice_g].cpu()
count_map[original_idx] += importance_map

# account for any overlapping sections
output_image = output_image / count_map

final_slicing: List[slice] = []
for sp in range(num_spatial_dims):
slice_dim = slice(pad_size[sp * 2], image_size_[num_spatial_dims - sp - 1] + pad_size[sp * 2])
final_slicing.insert(0, slice_dim)
while len(final_slicing) < len(output_image.shape):
final_slicing.insert(0, slice(None))
return output_image[final_slicing]


def _get_scan_interval(
image_size: Sequence[int], roi_size: Sequence[int], num_spatial_dims: int, overlap: float
) -> Tuple[int, ...]:
"""
Compute scan interval according to the image size, roi size and overlap.
Scan interval will be `int((1 - overlap) * roi_size)`, if interval is 0,
use 1 instead to make sure sliding window works.

"""
if len(image_size) != num_spatial_dims:
raise ValueError("image coord different from spatial dims.")
if len(roi_size) != num_spatial_dims:
raise ValueError("roi coord different from spatial dims.")

scan_interval = []
for i in range(num_spatial_dims):
if roi_size[i] == image_size[i]:
scan_interval.append(int(roi_size[i]))
else:
interval = int(roi_size[i] * (1 - overlap))
scan_interval.append(interval if interval > 0 else 1)
return tuple(scan_interval)
14 changes: 14 additions & 0 deletions models/msk_smit_lung_gtv/src/get_weights.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/bash

DEST="$1"

if [ -z "$DEST" ]
then
DEST=/app
fi

MODEL_NAME=msk_smit_lung_gtv
WEIGHTS_HASH=H4sIADC3/mcAAwXByRGAIAwAwL/FkJFDwW4wqDCAYQwPtHp3Y++NN4DKGVHsNARSBY7+OQJw9z0hUDG30lnyG42S2UZnSwrz6tTQJy1NXN/0A15deWNIAAAA
WEIGHTS_URL=`base64 -d <<<${WEIGHTS_HASH} | gunzip`
wget $WEIGHTS_URL -O weights.tar.gz
tar xvf weights.tar.gz -C $DEST/models/${MODEL_NAME}/src && rm weights.tar.gz
Loading