diff --git a/.gitignore b/.gitignore index d813b504..1d23acb3 100644 --- a/.gitignore +++ b/.gitignore @@ -58,3 +58,9 @@ vsts* superbuild build debug.log +data/dicoms_from_tcia/* +venv/* +src/applications/FeTS_Tool_Helper.egg-info/* +src/applications/__pycache__/* +data/*.zip +src/applications/data_prep_models/* diff --git a/CMakeLists.txt b/CMakeLists.txt index e8082963..bd4f19f2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,9 +11,9 @@ SET( ${PROJECT_NAME}_Variant "Full" ) # the particular variant of CaPTk (Full/Ne # So small changes here to formatting or naming convention *may potentially break the linux packager*! # STRING(TIMESTAMP TODAY "%Y%m%d") -SET( PROJECT_VERSION_MAJOR 1 ) +SET( PROJECT_VERSION_MAJOR 2 ) SET( PROJECT_VERSION_MINOR 0 ) -SET( PROJECT_VERSION_PATCH 3 ) +SET( PROJECT_VERSION_PATCH 0 ) SET( PROJECT_VERSION_TWEAK ) # check for the string "nonRelease" in the PROJECT_VERSION_PATCH variable @@ -157,12 +157,12 @@ IF(${PROJECT_NAME}_Variant MATCHES "Full" ) ELSE() SET( PROJECT_NAME_EXTENDED "Federated Tumor Segmentation (${PROJECT_NAME}_${${PROJECT_NAME}_Variant})" ) ENDIF() -SET( PROJECT_VENDOR "CBICA - UPenn" ) +SET( PROJECT_VENDOR "The FeTS-AI Group" ) SET( LICENSE_FILE "${PROJECT_SOURCE_DIR}/LICENSE" ) SET( README_FILE "${PROJECT_SOURCE_DIR}/README.txt" ) -SET( PROJECT_DOMAIN "https://www.med.upenn.edu/cbica/captk/" ) -SET( PROJECT_CONTACT "software@cbica.upenn.edu" ) -SET( COPYRIGHT "Copyright (c) 2019 CBICA. All rights reserved.") +SET( PROJECT_DOMAIN "https://www.fets.ai" ) +SET( PROJECT_CONTACT "admin@fets" ) +SET( COPYRIGHT "Copyright (c) 2023 FeTS-AI. All rights reserved.") SET( IDENTIFIER "") ADD_DEFINITIONS(-DPROJECT_NAME="${PROJECT_NAME}" ) ADD_DEFINITIONS(-DPROJECT_VERSION="${PROJECT_VERSION}" ) @@ -544,95 +544,95 @@ IF( NOT BUILD_DOCUMENTATION_ONLY ) ${LIBNAME_Applications} ) - # update submodule - IF( NOT EXISTS "${PROJECT_SOURCE_DIR}/OpenFederatedLearning/setup.py" ) - EXECUTE_PROCESS(COMMAND git submodule update --init --recursive - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - RESULT_VARIABLE git_result - OUTPUT_VARIABLE git_ver) - ENDIF() + # # update submodule + # IF( NOT EXISTS "${PROJECT_SOURCE_DIR}/OpenFederatedLearning/setup.py" ) + # EXECUTE_PROCESS(COMMAND git submodule update --init --recursive + # WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + # RESULT_VARIABLE git_result + # OUTPUT_VARIABLE git_ver) + # ENDIF() - # update submodule - IF( NOT EXISTS "${PROJECT_SOURCE_DIR}/Models_Pretrained/README.md" ) - EXECUTE_PROCESS(COMMAND git submodule update --init --recursive - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - RESULT_VARIABLE git_result - OUTPUT_VARIABLE git_ver) - ENDIF() + # # update submodule + # IF( NOT EXISTS "${PROJECT_SOURCE_DIR}/Models_Pretrained/README.md" ) + # EXECUTE_PROCESS(COMMAND git submodule update --init --recursive + # WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + # RESULT_VARIABLE git_result + # OUTPUT_VARIABLE git_ver) + # ENDIF() - # get lfs to pull - IF( EXISTS "${PROJECT_SOURCE_DIR}/BrainMaGe" ) - EXECUTE_PROCESS(COMMAND git lfs pull - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/BrainMaGe - RESULT_VARIABLE git_result - OUTPUT_VARIABLE git_ver) - ENDIF() + # # get lfs to pull + # IF( EXISTS "${PROJECT_SOURCE_DIR}/BrainMaGe" ) + # EXECUTE_PROCESS(COMMAND git lfs pull + # WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/BrainMaGe + # RESULT_VARIABLE git_result + # OUTPUT_VARIABLE git_ver) + # ENDIF() - # copy plans from algorithms - IF( EXISTS "${PROJECT_SOURCE_DIR}/OpenFederatedLearning/submodules/fets_ai/Algorithms/fets/bin/federations/plans/pt_3dresunet_brainmagebrats.yaml" ) - FILE( GLOB FETS_AI_PLANS - "${PROJECT_SOURCE_DIR}/OpenFederatedLearning/submodules/fets_ai/Algorithms/fets/bin/federations/plans/*.yaml" - ) - FOREACH(fetsai_plan ${FETS_AI_PLANS}) - CONFIGURE_FILE("${fetsai_plan}" "${PROJECT_SOURCE_DIR}/OpenFederatedLearning/bin/federations/plans/" COPYONLY) - ENDFOREACH() - ENDIF() - - # copy weights from models - SET( DOWNLOAD_LINK "ftp://www.nitrc.org/home/groups/captk/downloads/models/torch/pt_3dresunet_ss_brainmagebrats_best.pt" ) - SET( FILE_TO_EXTRACT "${PROJECT_SOURCE_DIR}/OpenFederatedLearning/bin/federations/weights/pt_3dresunet_ss_brainmagebrats_best.pt") -# IF( NOT EXISTS "${FILE_TO_EXTRACT}" ) - -# # download exe from url -# MESSAGE( STATUS "Downloading weights for skull stripping" ) -# FILE(DOWNLOAD "${DOWNLOAD_LINK}" "${FILE_TO_EXTRACT}" TIMEOUT 1000000 STATUS STATUS_CODE SHOW_PROGRESS) -# IF(NOT STATUS_CODE EQUAL 0) -# MESSAGE(FATAL_ERROR "Failed to download skull-stripping weights. Status=${STATUS_CODE}") -# ENDIF() -# ENDIF() - - # copy network information - ## todo: change to a direct installation so that git doesn't get confused that submodule has changed - CONFIGURE_FILE("${PROJECT_SOURCE_DIR}/data/fets/network.yaml" "${PROJECT_SOURCE_DIR}/OpenFederatedLearning/bin/federations/plans/defaults/network.yaml") - CONFIGURE_FILE("${PROJECT_SOURCE_DIR}/data/fets/cert_chain.crt" "${PROJECT_SOURCE_DIR}/OpenFederatedLearning/bin/federations/pki/cert_chain.crt") - #CONFIGURE_FILE("${PROJECT_SOURCE_DIR}/data/fets/pt_3dresunet_brainmagebrats.yaml" "${PROJECT_SOURCE_DIR}/OpenFederatedLearning/bin/federations/plans/pt_3dresunet_brainmagebrats.yaml" COPYONLY) - CONFIGURE_FILE("${PROJECT_SOURCE_DIR}/data/fets/deepscan_inference.yaml" "${PROJECT_SOURCE_DIR}/OpenFederatedLearning/bin/federations/plans/deepscan_inference.yaml" COPYONLY) - CONFIGURE_FILE("${PROJECT_SOURCE_DIR}/data/fets/nnunet_inference.yaml" "${PROJECT_SOURCE_DIR}/OpenFederatedLearning/bin/federations/plans/nnunet_inference.yaml" COPYONLY) + # # copy plans from algorithms + # IF( EXISTS "${PROJECT_SOURCE_DIR}/OpenFederatedLearning/submodules/fets_ai/Algorithms/fets/bin/federations/plans/pt_3dresunet_brainmagebrats.yaml" ) + # FILE( GLOB FETS_AI_PLANS + # "${PROJECT_SOURCE_DIR}/OpenFederatedLearning/submodules/fets_ai/Algorithms/fets/bin/federations/plans/*.yaml" + # ) + # FOREACH(fetsai_plan ${FETS_AI_PLANS}) + # CONFIGURE_FILE("${fetsai_plan}" "${PROJECT_SOURCE_DIR}/OpenFederatedLearning/bin/federations/plans/" COPYONLY) + # ENDFOREACH() + # ENDIF() - # find stand-alone CMD applications - FILE( GLOB DeepScan_Weights - "${PROJECT_SOURCE_DIR}/data/fets/*.tar" - ) - file(MAKE_DIRECTORY "${PROJECT_SOURCE_DIR}/OpenFederatedLearning/bin/federations/weights/deepscan/") - FOREACH(deepScan_weight ${DeepScan_Weights}) - CONFIGURE_FILE("${deepScan_weight}" "${PROJECT_SOURCE_DIR}/OpenFederatedLearning/bin/federations/weights/deepscan/" COPYONLY) - ENDFOREACH() + # # copy weights from models + # SET( DOWNLOAD_LINK "ftp://www.nitrc.org/home/groups/captk/downloads/models/torch/pt_3dresunet_ss_brainmagebrats_best.pt" ) + # SET( FILE_TO_EXTRACT "${PROJECT_SOURCE_DIR}/OpenFederatedLearning/bin/federations/weights/pt_3dresunet_ss_brainmagebrats_best.pt") + # IF( NOT EXISTS "${FILE_TO_EXTRACT}" ) + + # # download exe from url + # MESSAGE( STATUS "Downloading weights for skull stripping" ) + # FILE(DOWNLOAD "${DOWNLOAD_LINK}" "${FILE_TO_EXTRACT}" TIMEOUT 1000000 STATUS STATUS_CODE SHOW_PROGRESS) + # IF(NOT STATUS_CODE EQUAL 0) + # MESSAGE(FATAL_ERROR "Failed to download skull-stripping weights. Status=${STATUS_CODE}") + # ENDIF() + # ENDIF() - INSTALL( - DIRECTORY "${PROJECT_SOURCE_DIR}/OpenFederatedLearning" - DESTINATION bin - PATTERN ".git" EXCLUDE - PATTERN "venv" EXCLUDE - ) + # # copy network information + # ## todo: change to a direct installation so that git doesn't get confused that submodule has changed + # CONFIGURE_FILE("${PROJECT_SOURCE_DIR}/data/fets/network.yaml" "${PROJECT_SOURCE_DIR}/OpenFederatedLearning/bin/federations/plans/defaults/network.yaml") + # CONFIGURE_FILE("${PROJECT_SOURCE_DIR}/data/fets/cert_chain.crt" "${PROJECT_SOURCE_DIR}/OpenFederatedLearning/bin/federations/pki/cert_chain.crt") + # #CONFIGURE_FILE("${PROJECT_SOURCE_DIR}/data/fets/pt_3dresunet_brainmagebrats.yaml" "${PROJECT_SOURCE_DIR}/OpenFederatedLearning/bin/federations/plans/pt_3dresunet_brainmagebrats.yaml" COPYONLY) + # CONFIGURE_FILE("${PROJECT_SOURCE_DIR}/data/fets/deepscan_inference.yaml" "${PROJECT_SOURCE_DIR}/OpenFederatedLearning/bin/federations/plans/deepscan_inference.yaml" COPYONLY) + # CONFIGURE_FILE("${PROJECT_SOURCE_DIR}/data/fets/nnunet_inference.yaml" "${PROJECT_SOURCE_DIR}/OpenFederatedLearning/bin/federations/plans/nnunet_inference.yaml" COPYONLY) + + # # find stand-alone CMD applications + # FILE( GLOB DeepScan_Weights + # "${PROJECT_SOURCE_DIR}/data/fets/*.tar" + # ) + # file(MAKE_DIRECTORY "${PROJECT_SOURCE_DIR}/OpenFederatedLearning/bin/federations/weights/deepscan/") + # FOREACH(deepScan_weight ${DeepScan_Weights}) + # CONFIGURE_FILE("${deepScan_weight}" "${PROJECT_SOURCE_DIR}/OpenFederatedLearning/bin/federations/weights/deepscan/" COPYONLY) + # ENDFOREACH() + + # INSTALL( + # DIRECTORY "${PROJECT_SOURCE_DIR}/OpenFederatedLearning" + # DESTINATION bin + # PATTERN ".git" EXCLUDE + # PATTERN "venv" EXCLUDE + # ) - INSTALL( - DIRECTORY "${PROJECT_SOURCE_DIR}/LabelFusion" - DESTINATION bin - PATTERN ".git" EXCLUDE - PATTERN "venv" EXCLUDE - ) + # INSTALL( + # DIRECTORY "${PROJECT_SOURCE_DIR}/LabelFusion" + # DESTINATION bin + # PATTERN ".git" EXCLUDE + # PATTERN "venv" EXCLUDE + # ) - INSTALL( - DIRECTORY "${PROJECT_SOURCE_DIR}/BrainMaGe" - DESTINATION bin - PATTERN ".git" EXCLUDE - PATTERN "venv" EXCLUDE - ) + # INSTALL( + # DIRECTORY "${PROJECT_SOURCE_DIR}/BrainMaGe" + # DESTINATION bin + # PATTERN ".git" EXCLUDE + # PATTERN "venv" EXCLUDE + # ) - FILE( GLOB_RECURSE ALL_PY_FILES "${PROJECT_SOURCE_DIR}/src/applications/*.py" ) + FILE( GLOB ALL_PY_FILES "${PROJECT_SOURCE_DIR}/src/applications/*.py" ) FOREACH( pyFile ${ALL_PY_FILES} ) - INSTALL( FILES "${pyFile}" DESTINATION bin ) + INSTALL( FILES "${pyFile}" DESTINATION bin PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE) ENDFOREACH() # find stand-alone CMD applications @@ -1046,9 +1046,9 @@ IF( NOT BUILD_DOCUMENTATION_ONLY ) SET(CPACK_NSIS_ENABLE_UNINSTALL_BEFORE_INSTALL ON) SET(CPACK_NSIS_MUI_ICON "${DATA_DIR}/icons/application/windows/FeTS.ico") SET(CPACK_NSIS_MUI_FINISHPAGE_RUN "${EXE_NAME}.exe") - SET(CPACK_NSIS_HELP_LINK "https:\\\\\\\\www.med.upenn.edu\\\\cbica\\\\captk\\\\") + SET(CPACK_NSIS_HELP_LINK "https:\\\\\\\\www.fets.ai\\\\") SET(CPACK_NSIS_CONTACT "${PROJECT_CONTACT}") - SET(CPACK_NSIS_URL_INFO_ABOUT "https:\\\\\\\\www.med.upenn.edu\\\\cbica\\\\captk\\\\") + SET(CPACK_NSIS_URL_INFO_ABOUT "https:\\\\\\\\www.fets.ai\\\\") #SET(CPACK_NSIS_MODIFY_PATH "ON") # desktop shortcut doesn't work, for some reason # create extra shortcuts and respective uninstall targets SET(CPACK_NSIS_CREATE_ICONS_EXTRA "CreateShortCut '\$SMPROGRAMS\\\\$STARTMENU_FOLDER\\\\${EXE_NAME}.lnk' '\$INSTDIR\\\\bin\\\\${EXE_NAME}.exe'") diff --git a/Dockerfile b/Dockerfile index 71387d7e..1c23e327 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,65 +1,54 @@ -FROM ghcr.io/fets-ai/fetstool_docker_dependencies:0.0.2.gpu +FROM ghcr.io/fets-ai/fetstool_docker_dependencies AS fets_base LABEL authors="FeTS_Admin " RUN apt-get update && apt-get update --fix-missing && apt-get install -y libnss3 libnspr4 libxcursor1 libxcursor-dev libasound2 libdbus-1-dev libglfw3-dev libgles2-mesa-dev ffmpeg libsm6 libxext6 python3.8 python3.8-venv python3.8-dev python3-setuptools -# older python -RUN apt-get update -y && apt install -y --reinstall software-properties-common && add-apt-repository ppa:deadsnakes/ppa && apt update -y && apt install -y python3.7 python3.7-venv python3.7-dev python3-setuptools - -ENV PATH=/workspace/CaPTk/bin/qt/5.12.1/bin:/workspace/CaPTk/bin/qt/5.12.1/libexec:$PATH -ENV CMAKE_PREFIX_PATH=/workspace/CaPTk/bin/ITK-build:/workspace/CaPTk/bin/DCMTK-build:/workspace/CaPTk/bin/qt/5.12.1/lib/cmake/Qt5:$CMAKE_PREFIX_PATH -ENV SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True +ENV PATH=/CaPTk/bin/qt/5.12.1/bin:/CaPTk/bin/qt/5.12.1/libexec:$PATH +ENV CMAKE_PREFIX_PATH=/CaPTk/bin/ITK-build:/CaPTk/bin/DCMTK-build:/CaPTk/bin/qt/5.12.1/lib/cmake/Qt5:$CMAKE_PREFIX_PATH RUN pwd && ls -l WORKDIR /Front-End -COPY . . +# Download model checkpoints to torch checkpoint location +# https://pytorch.org/docs/stable/hub.html#where-are-my-downloaded-models-saved +ENV TORCH_HOME="/.pytorch_cache" +RUN mkdir -p $TORCH_HOME/hub/checkpoints && \ + wget -O $TORCH_HOME/hub/checkpoints/dpn98-722954780.pth http://data.lip6.fr/cadene/pretrainedmodels/dpn98-722954780.pth --no-check-certificate && \ + wget -O $TORCH_HOME/hub/checkpoints/resnet50-19c8e357.pth https://download.pytorch.org/models/resnet50-19c8e357.pth + +COPY src src + +COPY CMakeLists.txt README.txt LICENSE . + +COPY cmake_modules cmake_modules -RUN pwd && ls -l && mv ./data/Algorithms_for_fetsTool1.0.zip OpenFederatedLearning/submodules/fets_ai/ && cd OpenFederatedLearning/submodules/fets_ai/ && unzip -qq Algorithms_for_fetsTool1.0.zip && rm -rf Algorithms_for_fetsTool1.0.zip +COPY data data -RUN pwd && ls -l && mv ./data/GANDLF_for_fetsTool1.0.zip OpenFederatedLearning/submodules/fets_ai/Algorithms && cd OpenFederatedLearning/submodules/fets_ai/Algorithms && unzip -qq GANDLF_for_fetsTool1.0.zip && rm -rf GANDLF_for_fetsTool1.0.zip +COPY docs_sources docs_sources + +RUN pwd && ls -l ## C++ build -RUN mkdir bin && cd bin && cmake -DCMAKE_INSTALL_PREFIX="./install/appdir/usr" -DITK_DIR="/workspace/CaPTk/bin/ITK-build" -DDCMTK_DIR="/workspace/CaPTk/bin/DCMTK-build" -DBUILD_TESTING=OFF .. && make -j$(nproc) && make install/strip - -# ## Python package installation -- this is for the new docker image, which is much simpler -# RUN cd bin/install/appdir/usr/bin/ && python3.8 -m venv ./venv && ./venv/bin/pip install --upgrade pip wheel && ./venv/bin/pip install torch==1.13.1+cpu torchvision==0.14.1+cpu torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cpu && ./venv/bin/pip install -e . && ./venv/bin/pip install setuptools-rust Cython scikit-build scikit-learn openvino-dev==2023.0.1 && ./venv/bin/pip install -e . - -# set up environment and install correct version of pytorch -RUN echo "Setting up virtual environment for OpenFederatedLearning with base dependencies" && \ - cd bin/install/appdir/usr/bin/OpenFederatedLearning && \ - rm -rf ./venv && python3.7 -m venv ./venv && ./venv/bin/pip install Cython && \ - ./venv/bin/pip install --upgrade pip setuptools wheel setuptools-rust && \ - ./venv/bin/pip install torch==1.7.1+cu110 torchvision==0.8.2+cu110 torchaudio==0.7.2 -f https://download.pytorch.org/whl/torch_stable.html && \ - ./venv/bin/pip install wheel && \ - ./venv/bin/pip install SimpleITK==1.2.4 && \ - ./venv/bin/pip install protobuf==3.17.3 grpcio==1.30.0 && \ - ./venv/bin/pip install opencv-python==4.2.0.34 && \ - ./venv/bin/pip install scikit-build scikit-learn && \ - make install_openfl && \ - make install_openfl_pytorch - -RUN echo "Setting up virtual environment for OpenFederatedLearning with second-level dependencies" && \ - cd bin/install/appdir/usr/bin/OpenFederatedLearning && \ - ./venv/bin/pip install ../BrainMaGe && \ - ./venv/bin/pip install ./submodules/fets_ai/Algorithms && \ - ./venv/bin/pip install -e ./submodules/fets_ai/Algorithms/GANDLF - -RUN echo "Installing separate environment for LabelFusion" && \ - cd bin/install/appdir/usr/bin/LabelFusion && \ - rm -rf venv && python3.8 -m venv ./venv && \ - ./venv/bin/pip install --upgrade pip setuptools wheel setuptools-rust && \ - ./venv/bin/pip install -e . - -RUN echo "Downloading model weights" && \ - cd bin/install/appdir/usr/data && \ - wget https://upenn.box.com/shared/static/f7zt19d08c545qt3tcaeg7b37z6qafum.zip -O nnunet.zip && \ - unzip -qq nnunet.zip && rm -rf nnunet.zip && \ - wget https://upenn.box.com/shared/static/hhvn8nb9xtz6nxcilmdl8kbx9n1afkdu.zip -O ./fets_consensus_models.zip && \ - unzip -qq fets_consensus_models.zip && rm -rf fets_consensus_models.zip - +RUN mkdir bin && cd bin && cmake -DCMAKE_INSTALL_PREFIX="./install/appdir/usr" -DITK_DIR="/CaPTk/bin/ITK-build" -DDCMTK_DIR="/CaPTk/bin/DCMTK-build" -DBUILD_TESTING=OFF .. && make -j$(nproc) && make install/strip + +## Python package installation +RUN apt-get install software-properties-common curl -y && \ + add-apt-repository ppa:deadsnakes/ppa -y && apt-get update && \ + apt-get install python3.8 python3.8-distutils -y && \ + apt-get remove --purge python3.6 -y && \ + apt autoremove -y && \ + apt-get install python3.8-distutils -y && \ + rm -fr /usr/bin/python /usr/bin/python3 /usr/bin/pip /usr/bin/pip3 && \ + ln -s /usr/bin/python3.8 /usr/bin/python && ln -s /usr/bin/python3.8 /usr/bin/python3 && \ + ln -s /usr/bin/pip3.8 /usr/bin/pip && ln -s /usr/bin/pip3.8 /usr/bin/pip3 + +RUN curl -fSsL -O https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \ + python3.8 get-pip.py && rm get-pip.py + +RUN cd bin/install/appdir/usr/bin/ && pip install --upgrade pip wheel && pip install torch==1.13.1+cpu torchvision==0.14.1+cpu torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cpu && pip install -e . && pip install setuptools-rust Cython scikit-build scikit-learn openvino==2023.0.1 openvino-dev==2023.0.1 && pip install -e . + ### put together a data example that is already aligned and ready to invoke the brain extraction and tumor segmentation # set up the docker for GUI @@ -71,4 +60,43 @@ ENV QT_GRAPHICSSYSTEM="native" RUN echo "Env paths\n" && echo $PATH && echo $LD_LIBRARY_PATH # define entry point -ENTRYPOINT ["/Front-End/bin/install/appdir/usr/bin/FeTS_CLI_Segment"] +ENTRYPOINT ["python", "/Front-End/bin/install/appdir/usr/bin/PrepareDataset.py"] + +FROM fets_base AS data_prep + +RUN find /Front-End/bin/install/appdir/usr/bin -type f \( -perm -u=x -o -type l \) -exec cp -P {} /usr/bin \; + +WORKDIR / + +COPY ./mlcubes/data_preparation/project/requirements.txt /project/requirements.txt + +RUN pip install --upgrade pip + +RUN pip install -r /project/requirements.txt + +ENV LANG C.UTF-8 + +RUN mkdir /project/stages + +RUN cp /Front-End/bin/install/appdir/usr/bin/*.py /project/stages/ + +RUN cp -R /Front-End/bin/install/appdir/usr/bin/data_prep_models /project/stages/data_prep_models + +# Hotfix: install more recent version of GaNDLF for metrics generation +RUN pip install git+https://github.com/mlcommons/GaNDLF@616b37bafad8f89d5c816a88f44fa30470601311 + +# setup a separate env for nnunet +RUN python -m venv /nnunet_env && /nnunet_env/bin/pip install --upgrade pip + +RUN /nnunet_env/bin/pip install torch==1.12.1+cu102 torchvision==0.13.1+cu102 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu102 + +RUN /nnunet_env/bin/pip install git+https://github.com/MIC-DKFZ/nnUNet.git@nnunetv1 + +ENV nnUNet_raw_data_base="/tmp/nnUNet_raw_data_base" +ENV nnUNet_preprocessed="/tmp/nnUNet_preprocessed" +# see https://docs.docker.com/config/containers/resource_constraints/#gpu for detailed explanation +ENV CUDA_VISIBLE_DEVICES="0" + +COPY ./mlcubes/data_preparation/project /project + +ENTRYPOINT ["python", "/project/mlcube.py"] diff --git a/data/cwlFiles/FeTS_CLI.cwl b/data/cwlFiles/FeTS_CLI.cwl deleted file mode 100644 index f116434f..00000000 --- a/data/cwlFiles/FeTS_CLI.cwl +++ /dev/null @@ -1,80 +0,0 @@ -cwlVersion: v1.0 -class: CommandLineTool -baseCommand: FeTS_CLI -inputs: - dataDir: - type: Directory - label: Dir with Read/Write access - inputBinding: - position: 1 - prefix: -d - doc: Input data directory. - training: - type: boolean - label: 0 or 1 - inputBinding: - position: 1 - prefix: -t - doc: Whether performing training or inference.1==Train and 0==Inference. - runtest: - type: string? - label: none - inputBinding: - position: 1 - prefix: -rt - doc: Runs the tests. - cwl: - type: string? - label: none - inputBinding: - position: 1 - prefix: -cwl - doc: Generates a .cwl file for the software. - trainPlan: - type: boolean? - label: YAML file - inputBinding: - position: 1 - prefix: -tp - doc: Training plan.Defaults to 'fets_phase1_1'. - LoggingDir: - type: Directory? - label: Dir with write access - inputBinding: - position: 1 - prefix: -L - doc: Location of logging directory. - archs: - type: string? - label: 3DResUNet,deepMedic,deepscan - inputBinding: - position: 1 - prefix: -a - doc: "The architecture(s) to infer/train on.Only a single architecture is supported for training.Comma-separated values for multiple options.Defaults to: 3dresunet." - labelFuse: - type: string? - label: STAPLE,ITKVoting,SIMPLE,MajorityVoting - inputBinding: - position: 1 - prefix: -lF - doc: "The label fusion strategy to follow for multi-arch inference.Comma-separated values for multiple options.Defaults to: STAPLE." - gpu: - type: boolean? - label: 0-1 - inputBinding: - position: 1 - prefix: -g - doc: Whether to run the process on GPU or not.Defaults to '0'. - colName: - type: string? - label: none - inputBinding: - position: 1 - prefix: -c - doc: Common name of collaborator.Required for training. -hints: - SoftwareRequirement: - packages: - FeTS_CLI: - version: - - 0.0.2 \ No newline at end of file diff --git a/data/cwlFiles/FullProcessingPipeline.cwl b/data/cwlFiles/FullProcessingPipeline.cwl deleted file mode 100644 index 29f6337a..00000000 --- a/data/cwlFiles/FullProcessingPipeline.cwl +++ /dev/null @@ -1,66 +0,0 @@ -cwlVersion: v1.0 -class: CommandLineTool -baseCommand: FullProcessingPipeline -inputs: - inputCSV: - type: File - label: Input CSV file - inputBinding: - position: 1 - prefix: -i - doc: Input CSV file which contains paths to structural images.Headers should be 'PatientID,T1,T1GD,T2,T2FLAIR'. - outputDir: - type: Directory - label: Directory - inputBinding: - position: 1 - prefix: -o - doc: "Output directory for final output.This will write 2 folders: 'DataForFeTS' and 'DataForQC'.Former contains only the files needed for FeTS inference/training and .latter contains all intermediate files from this processing." - runtest: - type: string? - label: none - inputBinding: - position: 1 - prefix: -rt - doc: Runs the tests. - cwl: - type: string? - label: none - inputBinding: - position: 1 - prefix: -cwl - doc: Generates a .cwl file for the software. - archs: - type: string? - label: 3DResUNet,deepMedic,deepscan - inputBinding: - position: 1 - prefix: -a - doc: "The architecture(s) to infer/train on.Only a single architecture is supported for training.Comma-separated values for multiple options.Defaults to: 3dresunet." - labelFuse: - type: string? - label: STAPLE,ITKVoting,SIMPLE,MajorityVoting - inputBinding: - position: 1 - prefix: -lF - doc: "The label fusion strategy to follow for multi-arch inference.Comma-separated values for multiple options.Defaults to: STAPLE." - gpu: - type: boolean? - label: 0-1 - inputBinding: - position: 1 - prefix: -g - doc: Whether to run the process on GPU or not.Defaults to '0'. - LoggingDir: - type: Directory? - label: Dir with write access - inputBinding: - position: 1 - prefix: -L - doc: Location of logging directory. -hints: - SoftwareRequirement: - packages: - FullProcessingPipeline: - version: - - 0.0.2 \ No newline at end of file diff --git a/data/cwlFiles/PrepareDataset.cwl b/data/cwlFiles/PrepareDataset.cwl deleted file mode 100644 index b38c6082..00000000 --- a/data/cwlFiles/PrepareDataset.cwl +++ /dev/null @@ -1,38 +0,0 @@ -cwlVersion: v1.0 -class: CommandLineTool -baseCommand: PrepareDataset -inputs: - inputCSV: - type: File - label: Input CSV file - inputBinding: - position: 1 - prefix: -i - doc: Input CSV file which contains paths to structural images.Headers should be 'PatientID,T1,T1GD,T2,T2FLAIR'. - outputDir: - type: Directory - label: Directory - inputBinding: - position: 1 - prefix: -o - doc: "Output directory for final output.This will write 2 folders: 'DataForFeTS' and 'DataForQC'.Former contains only the files needed for FeTS inference/training and .latter contains all intermediate files from this processing." - runtest: - type: string? - label: none - inputBinding: - position: 1 - prefix: -rt - doc: Runs the tests. - cwl: - type: string? - label: none - inputBinding: - position: 1 - prefix: -cwl - doc: Generates a .cwl file for the software. -hints: - SoftwareRequirement: - packages: - PrepareDataset: - version: - - 0.0.2 \ No newline at end of file diff --git a/docs/_config.yaml b/docs/_config.yaml index 25e6133d..276a6828 100644 --- a/docs/_config.yaml +++ b/docs/_config.yaml @@ -12,4 +12,6 @@ navigation: - title: Extras url: extras - title: ITCR Connectivity - url: itcr_connectivity \ No newline at end of file + url: itcr_connectivity + - title: RANO Federation + url: rano_federation \ No newline at end of file diff --git a/docs/assets/img/rano_fig1.png b/docs/assets/img/rano_fig1.png new file mode 100644 index 00000000..ae208418 Binary files /dev/null and b/docs/assets/img/rano_fig1.png differ diff --git a/docs/assets/img/rano_fig2.png b/docs/assets/img/rano_fig2.png new file mode 100644 index 00000000..90293db4 Binary files /dev/null and b/docs/assets/img/rano_fig2.png differ diff --git a/docs/assets/img/rano_fig3.png b/docs/assets/img/rano_fig3.png new file mode 100644 index 00000000..50ad5183 Binary files /dev/null and b/docs/assets/img/rano_fig3.png differ diff --git a/docs/assets/img/rano_fig4.png b/docs/assets/img/rano_fig4.png new file mode 100644 index 00000000..287571bb Binary files /dev/null and b/docs/assets/img/rano_fig4.png differ diff --git a/docs/assets/img/rano_fig5.png b/docs/assets/img/rano_fig5.png new file mode 100644 index 00000000..1e9c41ac Binary files /dev/null and b/docs/assets/img/rano_fig5.png differ diff --git a/docs/assets/img/rano_fig6.png b/docs/assets/img/rano_fig6.png new file mode 100644 index 00000000..82dcdf7c Binary files /dev/null and b/docs/assets/img/rano_fig6.png differ diff --git a/docs/assets/img/rano_fig7.png b/docs/assets/img/rano_fig7.png new file mode 100644 index 00000000..2545a525 Binary files /dev/null and b/docs/assets/img/rano_fig7.png differ diff --git a/docs/assets/img/rano_fig8.png b/docs/assets/img/rano_fig8.png new file mode 100644 index 00000000..70be5585 Binary files /dev/null and b/docs/assets/img/rano_fig8.png differ diff --git a/docs/assets/img/rano_fig9.png b/docs/assets/img/rano_fig9.png new file mode 100644 index 00000000..fc1ebb96 Binary files /dev/null and b/docs/assets/img/rano_fig9.png differ diff --git a/docs/rano_federation.md b/docs/rano_federation.md new file mode 100644 index 00000000..78b87c10 --- /dev/null +++ b/docs/rano_federation.md @@ -0,0 +1,415 @@ +# RANO Federation + +## Table of contents +- [RANO Federation](#rano-federation) + - [Table of contents](#table-of-contents) + - [Introduction](#introduction) + - [System Requirements](#system-requirements) +- [Instructions](#instructions) + - [Install a containerization tool](#install-a-containerization-tool) + - [Installing Docker](#installing-docker) + - [Installing Singularity](#installing-singularity) + - [Install ITK-SNAP](#install-itk-snap) + - [Structuring your data](#structuring-your-data) + - [DICOM Data (STRONGLY PREFERRED)](#dicom-data-strongly-preferred) + - [Non-Preprocessed NIfTI Data](#non-preprocessed-nifti-data) + - [Co-Registered and Skull-Stripped NIfTI Data](#co-registered-and-skull-stripped-nifti-data) + - [Co-Registered and Skull-Stripped NiFTI with accompanying tumor segmentation](#co-registered-and-skull-stripped-nifti-with-accompanying-tumor-segmentation) + - [Setting up MedPerf](#setting-up-medperf) + - [Set up your MedPerf profile](#set-up-your-medperf-profile) + - [Setting Singularity as the default container runner](#setting-singularity-as-the-default-container-runner) + - [Setting the number of GPUs](#setting-the-number-of-gpus) + - [Executing the Data Preparation Procedure](#executing-the-data-preparation-procedure) + - [Preparation with MedPerf](#preparation-with-medperf) + - [Monitoring tool](#monitoring-tool) +- [Additional Information](#additional-information) + - [Updating MedPerf](#updating-medperf) + +## Introduction + +MedPerf is an open benchmarking platform for medical artificial intelligence using Federated Evaluation. This document will provide all the details required for installing the MedPerf package for the Data preparation stage of the FL – POST challenge. This document enlists the system requirements for the project, available documentation, and instructions for running the MedPerf for the Data preparation task. + +## System Requirements + +1. Operating System: Supported operating systems include Ubuntu Linux 18+ and Windows 10+ with Windows Subsystem for Linux. Other operating systems may work but are not officially supported. +2. Memory Requirements: 16GB or more +3. Storage: 25GB for MedPerf and accompanying MLCube. Also at least 2 times the size of your dataset must be available to proceed (e.g. your dataset weights 100GB, then you should have an additional 200GB available) + + **Note:** If using Singularity, we need an additional 100GB of space for the Docker image conversion. + +1. Graphics Card (GPU): A GPU with 12GB or more VRAM. + + [Back To Top ↑](#table-of-contents) + +# Instructions + +## Install a containerization tool + +MedPerf uses container technology to enable running code on the host machine. Because of this, a containerization tool needs to be installed on your machine prior to going through the preparation procedure. We currently support Docker and Singularity. Only one of the two options is required: + +### Installing Docker + +1. To install Docker on Ubuntu-based machines, please follow the instructions outlined [here](https://docs.docker.com/engine/install/ubuntu/). For Windows with WSL, please refer to [this guide](https://learn.microsoft.com/en-us/windows/wsl/tutorials/wsl-containers). +2. Confirm that you can run docker **without** sudo. If the following command runs without error, you can skip this section: +``` +docker run hello-world +``` + +3. If the above fails, first ensure that you have installed docker _including the recommended post install steps_: [https://docs.docker.com/engine/install/](https://docs.docker.com/engine/install/). +4. If you are still not able to run the hello-world command without sudo on Linux, there are several potential fixes. First, make sure you have followed the post install instructions from the link in step 2. Next, you can try to run the following commands, rechecking the hello-world command after each one: +``` +sudo systemctl restart docker +unset DOCKER_HOST +docker context use default +``` + +5. Note that docker rootless is not supported and may result in issues. It is recommended to remove docker rootless before proceeding. + +### Installing Singularity + +1. To install singularity, please refer to the following [instructions](https://docs.sylabs.io/guides/3.0/user-guide/installation.html). For Windows with WSL, follow [this article](https://www.blopig.com/blog/2021/09/using-singularity-on-windows-with-wsl2/). +2. Confirm your installation is working correctly by running the following commands +``` +singularity pull docker://godlovedc/lolcow +singularity run lolcow_latest.sif +``` + + [Back To Top ↑](#table-of-contents) + +## Install ITK-SNAP + +ITK-SNAP is the preferred tool for making annotations on the data. It is supported by default by the provided tools, and automations have been provided to simplify the annotation process. For both Linux and windows with WSL, please follow the instructions for [installing ITK-SNAP on Linux](http://www.itksnap.org/pmwiki/pmwiki.php?n=Documentation.TutorialSectionInstallation). + +If using a Windows WSL, you may need to install the following dependencies to get ITK-SNAP working: +``` +sudo apt-get update -y +sudo apt-get install -y libglvnd-dev +sudo apt-get install -y libxkbcommon-x11-0 +sudo apt-get install -y qtwayland5 +``` + +You can verify your installation by running the following command on a new terminal window +``` +itksnap +``` + +If ITK-SNAP starts after running the command, your installation is successful. + +[Back To Top ↑](#table-of-contents) + +## Structuring your data + +In order to start the data preparation procedure, it is necessary to ensure that the input data is structured in a way that MedPerf can identify and process correctly. You may create your data directory anywhere, but please ensure that it is in a location with relatively fast read/write access and with at least 2x more free disk space than your dataset currently occupies. + +**Note:** Input can be either DICOM files (before any coregistration or skull-stripping), non-preprocessed NIfTI files (before any coregistration or skull-stripping), or fully preprocessed, coregistered, skullstripped NIfTI files. + +Several different data structures are currently supported. Please ensure your data follows one of these possible folder structures before continuing: + +### DICOM Data (STRONGLY PREFERRED) + +The data preparation process can handle DICOM images that have not been co-registered nor skull-stripped.For this, your data needs to follow a folder hierarchy where images are separated by `//` + +**Please note:** Series-level folders must use the following abbreviations: t2f (T2-weighted FLAIR), t1n (T1-weighted non-contrast), t1c (T1-weighted with contrast), and t2w (T2-weighted). For more information about the required series, please [refer to the FeTS 2.0 manual](https://docs.google.com/document/d/1DVyGJBOBR-B_7uDRMp9Cnkj6pIXosgzCZa-qQS-olTc/edit#heading=h.ctr3ehc5ev9c). PatientID and Timepoint must be unique between and within patients, respectively, and Timepoint should be sortable into chronologic order. +``` +. +├──AAAC_0 +│ ├──2008.03.30 +│ │ ├──t2f +│ │ │ ├──t2_Flair_axial-2_echo1_S0002_I000001.dcm +│ │ │ └──... +│ │ ├──t1n +│ │ │ ├──t1_axial-3_echo1_S0003_I000001.dcm +│ │ │ └──... +│ │ ├──t1c +│ │ │ ├──t1_axial_stealth-post-14_echo1_S0014_I000001.dcm +│ │ │ └──... +│ │ └──t2w +│ │ │ ├──T2_SAG_SPACE-4_echo1_S0004_I000001.dcm +│ │ │ └──... +``` +### Non-Preprocessed NIfTI Data + +Although DICOM data is strongly preferred, the Data preparation pipeline supports NIfTI files that are not yet co-registered nor skull-stripped. For the data preparation procedure to handle this scenario, a strict folder structure and naming convention must be followed. Please ensure your NIfTI files are contained in a `/` folder structure, and that each NIfTI image is named as `.nii.gz`. Please note, this workaround should only be used if you no longer have access to the original DICOM data! +``` +. +├──AAAC_0 +│ ├──2008.03.30 +│ │ ├──t1c.nii.gz +│ │ ├──t1n.nii.gz +│ │ ├──t2f.nii.gz +│ │ └──t2w.nii.gz +│ ├──2008.12.17 +│ │ ├──t1c.nii.gz +│ │ ├──t1n.nii.gz +│ │ ├──t2f.nii.gz +│ │ └──t2w.nii.gz +``` + +### Co-Registered and Skull-Stripped NIfTI Data + +Additional to non-preprocessed NIfTI files, the Data preparation process supports NIfTI images that have already been previously prepared such that the data are co-registered to each other, in the SRI atlas space, and the skull of the patient is removed such that only the brain is present. For these cases to be identified, a strict naming convention must be followed, where each NiFTI file is named as `__brain_.nii.gz`. Please note, this workaround should only be used if you no longer have access to the original DICOM data! +``` +. +├──AAAC_0 +│ ├──2008.03.30 +│ │ ├──AAAC_0_2008.03.30_brain_t1c.nii.gz +│ │ ├──AAAC_0_2008.03.30_brain_t1n.nii.gz +│ │ ├──AAAC_0_2008.03.30_brain_t2f.nii.gz +│ │ └──AAAC_0_2008.03.30_brain_t2w.nii.gz +│ ├──2008.12.17 +│ │ ├──AAAC_0_2008.12.17_brain_t1c.nii.gz +│ │ ├──AAAC_0_2008.12.17_brain_t1n.nii.gz +│ │ ├──AAAC_0_2008.12.17_brain_t2f.nii.gz +│ │ └──AAAC_0_2008.12.17_brain_t2w.nii.gz +``` +A folder hierarchy of `PatientID/Timepoint` is preferred, but the pipeline also correctly handles structures where all timepoints are combined for a given patient. +``` +. +├──AAAC_0 +│ ├──2008.03.30 +│ │ ├──AAAC_0_2008.03.30_brain_t1c.nii.gz +│ │ ├──AAAC_0_2008.03.30_brain_t1n.nii.gz +│ │ ├──AAAC_0_2008.03.30_brain_t2f.nii.gz +│ │ └──AAAC_0_2008.03.30_brain_t2w.nii.gz +│ │ ├──AAAC_0_2008.12.17_brain_t1c.nii.gz +│ │ ├──AAAC_0_2008.12.17_brain_t1n.nii.gz +│ │ ├──AAAC_0_2008.12.17_brain_t2f.nii.gz +│ │ └──AAAC_0_2008.12.17_brain_t2w.nii.gz +``` + +### Co-Registered and Skull-Stripped NiFTI with accompanying tumor segmentation + +Lastly, the pipeline can also handle cases where brain-isolated NiFTI images are provided, along with a tumor segmentation mask. Additional to all requirements specified for Skull-stripped NiFTIs, the tumor segmentation must follow a strict naming convention as well, of `__final_seg.nii.gz`. + +**Note:** The data preparation pipeline will assume the tumor segmentation to have already been manually corrected, but you will have the opportunity to make further modifications if needed as explained in the **manual annotations** section. +``` +. +├──AAAC_0 +│ ├──2008.03.30 +│ │ ├──AAAC_0_2008.03.30_brain_t1c.nii.gz +│ │ ├──AAAC_0_2008.03.30_brain_t1n.nii.gz +│ │ ├──AAAC_0_2008.03.30_brain_t2f.nii.gz +│ │ ├──AAAC_0_2008.03.30_brain_t2w.nii.gz +│ │ └──AAAC_0_2008.03.30_final_seg.nii.gz # Segmentation mask +``` + +[Back To Top ↑](#table-of-contents) + +## Setting up MedPerf + +1. Download or Clone the MedPerf branch for this MLCube + + This command will download / clone the MedPerf package to the local machine and this will be used to build the MLCube. It is recommended to run this command in your user/home directory. + ``` + git clone --branch data-prep-manual-steps https://github.com/aristizabal95/medperf-2 ./medperf + cd medperf + ``` +
+ Figure 1 +
Figure 1. MedPerf GitHub repository. This repository can be downloaded or cloned to the work machine.
+
+ +2. Installing MedPerf Package + + MedPerf is best installed using a virtual environment. We recommend using Anaconda. Anaconda installation instructions can be found here: [https://docs.anaconda.com/free/anaconda/install/index.html](https://docs.anaconda.com/free/anaconda/install/index.html). If you prefer not to install Anaconda (not recommended), then ensure that you have python 3.9x with pip and skip to step 3. + + - Using Anaconda, create a virtual environment named MedPerf-env with the following command: + ``` + conda create -n MedPerf-env python=3.9 + ``` + + - Now we will activate the created environment so that we can use it. From the above step, we have created an environment named 'MedPerf-env'. The following command will activate the environment: + ``` + conda activate MedPerf-env + ``` + +3. Install the latest version of MedPerf from source. This can be done by running the below command inside the MedPerf folder. +``` +pip install -e ./cli +``` + +1. Verify the Installation, Check the version of the MedPerf package that was installed. The expected output is: MedPerf version x.x.x +``` +MedPerf --version +``` + +1. Create a MedPerf account if you do not already have one. Keep in mind that the MedPerf team must give you clearance before you can create your user account. If you're having trouble signing up, please contact the MedPerf team. + + Navigate to [https://www.MedPerf.org/](https://www.MedPerf.org/) and click the "Sign Up" link in the top right. Enter your email, and verify using the code sent to your email address. + +2. Log into the MedPerf CLI with your MedPerf account + + This step is required for logging into the MedPerf server. You can log in on your CLI with + ``` + MedPerf auth login + ``` + + When prompted, enter the email address associated with your MedPerf account. Next, follow the link in your terminal, confirm that the code is correct, and log in to your MedPerf account. You will once again verify using a code sent to your email address. Once login and verification is complete, the terminal output should say: Done! You may need to authenticate again in the future after a timeout period. + +### Set up your MedPerf profile + +Depending on your configuration, you may need to modify your MedPerf profile so that it uses your containerization tool and the desired number of GPUs. MedPerf by default uses Docker, and runs without GPUs specified. The following are commands that you can use to change these default settings to your needs. + +#### Setting Singularity as the default container runner + +If you have decided to use Singularity instead of Docker, you need to define that as your default container runner on MedPerf. You can do so easily by running the following command +``` +MedPerf profile set --platform=singularity +``` + +#### Setting the number of GPUs + +If you desire to work with a GPU, the best approach would be to set it by default on MedPerf. You can do so by running the following command +``` +MedPerf profile set --gpus=0 +``` + +The values accepted by the gpus flag are: + +- `--gpus=all` to expose all GPUs to the container +- `--gpus=` to specify the GPU count visible to the container (e.g., `--gpus=3` will expose three GPUs) +- `--gpus="device="` to specify a specific GPU to be exposed to the container (e.g., `--gpus="device=1"` will expose GPU of ID #1) + +**Note:** We recommend just assigning one GPU to reduce the chance of failure due to incompatible GPU images. The Data Preparation procedure will only use one GPU even if multiple are exposed, so there's no real benefit in exposing more. + +You can verify your profile configuration by running the following command +``` +MedPerf profile view +``` + +Please ensure the platform and GPUs value are assigned properly before you proceed. + +[Back To Top ↑](#table-of-contents) + +## Executing the Data Preparation Procedure + +To aid in the process of data labeling and preparation, an MLCube has been provided to streamline and automate most of the workflow. This MLCube will execute the following transformations to the input data: + +- **NiFTI Conversion:** The input data, if it is in DICOM, will be converted into NiFTI ensuring that it is oriented correctly. +- **Skull Stripping:** NiFTI images will be processed by an ensemble of brain-mask segmentation models to remove the skull from the scans. +- **Baseline Tumor Segmentation:** An ensemble of tumor segmentation models have been provided to aid in the creation of labels for the data. This baseline segmentation should accelerate the manual labeling process. +- **Manual review of segmentations:** The MLCube provides the required logic to wait for a manual review process to be done on the segmentations before proceeding. +- **Split creation:** After all the data and labels have been processed correctly, splits for training and validation are automatically generated. + +### Preparation with MedPerf + +**Requirements:** + +On linux systems, make sure to start Docker using the following command: +``` +sudo systemctl start docker +``` + +**Start the data preparation procedure with MedPerf.** + +To start the Data Preparation MLCube, you must first register your dataset. This can be done with the MedPerf dataset submit command. You must provide the location of your data and labels (in this case the labels path should point to the same data path). The data path should point to the folder where your subject/timepoints folders are located. This folder should contain one of the folder structures [explained previously](#_heading=h.emknfj8yheyw). Additionally, you need to specify a unique name for your data no longer than 20 characters, a description no longer than 20 characters and the location of origin of your dataset. The specificity of the dataset location is left to the user, as to what level of detail they feel comfortable sharing. The location field may be used in the future to run location-based analyses. + +**Note:** Running this command will trigger the Data Preparation MLCube download to verify the integrity and correctness of the image and associated assets. This could take a while depending on your internet connection. If you're using Singularity expect extra delays (~15 minutes) due to image conversion. + +After submitting your dataset, you can get your dataset ID with `medperf dataset ls`. The dataset ID is a unique number that identifies the previously submitted dataset (e.g. 24, 32, etc). Lastly, start the execution with MedPerf dataset prepare by providing the dataset ID. + +**Note:** **No image or subject data is being shared by registering your dataset or any other MedPerf command in this document**. Any aggregate metadata regarding your dataset, for example % completion of the dataset preparation, will only be sent with your explicit permission, which will be requested at each relevant instance. +``` +medperf dataset submit -p 98 -d -l --name --description "" --location "" +medperf dataset ls --mine # retrieve your dset_ID with this command +medperf dataset prepare -d +``` + +When you start your preparation, MedPerf will continuously report back to the server with progress summaries, indicating what proportion of the dataset being prepared is at a given stage, and whether the preparation process is on-going or failed. **This will only be sent with the explicit approval of the user** , which is asked for right after executing the command. + +![Figure 2](assets/img/rano_fig2.png) +*Figure 2. Manual for MedPerf dataset create. This figure shows the options for MedPerf dataset create.* + +### Monitoring tool + +An additional tool has been implemented to simplify and streamline most of the processes related to preparing your dataset. This tool is called the **rano monitoring tool**. It provides a terminal UI that can be launched locally or through ssh, and that will provide real-time updates of your dataset, as well as instructions on how to proceed at every step of the data preparation process. + +On a separate terminal window, install prerequisites and run the monitoring tool. Figure 3. shows the monitoring tool for a given dataset. + +Figure 4. shows the monitoring of the data preparation process on the terminal. +``` +cd path/to/MedPerf # CD to the MedPerf repo retrieved previously +pip install -e scripts/monitor +rano-monitor -d +``` + ![Figure 3](assets/img/rano_fig3.png) +*Figure 3. Summary from monitoring tool. The summary statistics shows the progress for the command, 'MedPerf dataset prepare'.* + + ![Figure 4](assets/img/rano_fig4.png) +*Figure 4. Monitoring data preparation process. This image shows the execution of the data preparation MLCube on the terminal.* + +1. ### Checking the state of your data + + The following are some guidelines to keep in mind while preparing the dataset + + - #### Handling interruption + If the processing gets interrupted (by either being idle for too long, quitting the process or stopping the machine, you can resume it by running the same MedPerf dataset creation command as step 7. + ``` + MedPerf dataset prepare -d + ``` + + - #### Handling data changes + If subjects need to be modified, added or removed, you can do so from within the input data folder, which in this case is . Additionally, you may invalidate subjects that should not be part of your data after examination by clicking the "invalidate" button on the subject view (Figure 5). + + ![Figure 5](assets/img/rano_fig5.png) + *Figure 5. Helper tools for managing and annotating subjects.* + +2. ### Manual annotations +During the data preparation phase, there will come a time where user intervention is needed. This is indicated by the "Manual Review Required" stage within the monitoring tool, as shown within figure 6. This stage requires the user to inspect the baseline tumor segmentation files and make changes if necessary to ensure high quality segmentations for the training procedure. In certain scenarios, the user might need to modify the brain mask generated by the skull-stripping procedure. The monitoring tool is equipped with multiple helpers for handling manual annotation. + + ![Figure 6](assets/img/rano_fig6.png) + *Figure 6. Monitoring tool used for tracking the segmentation. Subjects whose state recently changed are highlighted with yellow within the subject list.* + + - #### On-device Annotations + In the scenarios where the user has access to ITK Snap within the same machine as where the data preparation procedure is taking place, the monitoring tool provides buttons for automatically opening the tumor segmentation and/or brain mask. You may use these buttons to make the necessary changes to your data. Once annotations are complete, you may press the Finalize button to complete the process with the given subject (Figure 5). + + - #### Mounted-drive Annotations + In scenarios where the user doesn't have access to ITK Snap within the same machine as where the data preparation procedure is taking place, an option is to mount the MedPerf storage directory remotely to a machine that does have access to this program. You may usually find the MedPerf dataset storage at the following path: + ``` + ~/.medperf/data/api_medperf_org/ + ``` + Then, within the machine that has access to ITK Snap, you can start the monitoring tool by passing the path to the dataset in question. + ``` + rano-monitor -d //.medperf/data/api_medperf_org/ + ``` + This will start the monitoring window as normal, and you will be able to do the manual annotations as described on section 2.b + + - #### Remote Annotation through tarball files + If mounting a remote drive is not a viable option, the monitoring tool allows you to work with tarball files for the manual review procedure. For this, within the "SUMMARY" view is a button for packaging all the subjects that are ready for review inside a tarball file "review_cases.tar.gz". Using this file, you can move it to a machine that has access to ITK Snap and start the monitor tool there, passing the path to the tarball file + ``` + rano-monitor -d path/to/review_cases.tar.gz + ``` + This will open a simplified view of the monitoring tool, in which you may find the same helper buttons for reviewing tumor segmentations and brain masks (Figure 7). + + ![Figure 7](assets/img/rano_fig7.png) + *Figure 7. Monitoring tool for tarball annotation* + + Once you're done reviewing your data, you can finally press the "Package cases" button to create a new tarball file containing the revised segmentations. Placing this file within the remote machine, at the location where the monitor tool is running, will immediately load the data and send it to the correct locations for continuing the data preparation process. + + Manual review is required for every subject within your dataset, and the data preparation pipeline will not proceed until that is the case. + + When all subjects have been reviewed, confirm the dataset looks okay by answering the pop-up from within the monitoring tool. + + ![Figure 8](assets/img/rano_fig8.png) + *Figure 8. Review confirmation message.* + +3. ### Completion Message +If everything went smoothly, the dataset should be prepared. All subjects should indicate they're DONE within the report. Figure 9. shows the completion message, once the data preparation MLCube is completed. + +![Figure 9](assets/img/rano_fig9.png) +*Figure 9. Completion message for the Data preparation MLCube.* + +[Back To Top ↑](#table-of-contents) + +# Additional Information + +## Updating MedPerf + +During the preparation phase, there might be a need for users to update MedPerf. This might be because of bug fixes or features that might aid with the preparation process. If you're ever requested to update your MedPerf installation, please follow the next steps: +``` +cd path/to/MedPerf # CD to the MedPerf repo retrieved previously +git pull +pip install --force-reinstall -e ./cli +rm ~/mlcube.yaml +``` +[Back To Top ↑](#table-of-contents) \ No newline at end of file diff --git a/mlcubes/.gitignore b/mlcubes/.gitignore new file mode 100644 index 00000000..be8d1082 --- /dev/null +++ b/mlcubes/.gitignore @@ -0,0 +1,10 @@ +*.csv +*.txt +*.nii.gz +*.mat +*.dcm +*.png +*/mlcube/workspace/* +!requirements.txt +!*/mlcube/workspace/parameters.yaml +models \ No newline at end of file diff --git a/mlcubes/data_preparation/mlcube/mlcube.yaml b/mlcubes/data_preparation/mlcube/mlcube.yaml new file mode 100644 index 00000000..cd837282 --- /dev/null +++ b/mlcubes/data_preparation/mlcube/mlcube.yaml @@ -0,0 +1,50 @@ +name: Data Preparator MLCube with Manual preparation steps +description: Data Preparator MLCube showcasing examples were automated and manual steps are required. Provided by MLCommons +authors: + - {name: MLCommons} + +platform: + accelerator_count: 0 + +docker: + # Image name + image: mlcommons/rano-data-prep-mlcube:latest + # Docker build context relative to $MLCUBE_ROOT. Default is `build`. + build_context: "../project" + # Docker file name within docker build context, default is `Dockerfile`. + build_file: "Dockerfile" + +tasks: + prepare: + parameters: + inputs: { + data_path: input_data, + labels_path: input_labels, + parameters_file: parameters.yaml, + models: additional_files/models, + } + outputs: { + output_path: data/, + output_labels_path: labels/, + report_file: {type: file, default: report.yaml}, + metadata_path: metadata/, + } + sanity_check: + parameters: + inputs: { + data_path: data/, + labels_path: labels/, + parameters_file: parameters.yaml, + metadata_path: metadata/, + } + statistics: + parameters: + inputs: { + data_path: data/, + labels_path: labels/, + parameters_file: parameters.yaml, + metadata_path: metadata/, + } + outputs: { + output_path: {type: file, default: statistics.yaml} + } \ No newline at end of file diff --git a/mlcubes/data_preparation/mlcube/workspace/parameters.yaml b/mlcubes/data_preparation/mlcube/workspace/parameters.yaml new file mode 100644 index 00000000..7755bbbd --- /dev/null +++ b/mlcubes/data_preparation/mlcube/workspace/parameters.yaml @@ -0,0 +1,21 @@ +seed: 2784 +train_percent: 0.8 +medperf_report_stages: +- "IDENTIFIED" +- "VALIDATED" +- "MISSING_MODALITIES" +- "EXTRA_MODALITIES" +- "VALIDATION_FAILED" +- "CONVERTED_TO_NIfTI" +- "NIfTI_CONVERSION_FAILED" +- "BRAIN_EXTRACT_FINISHED" +- "BRAIN_EXTRACT_FINISHED" +- "TUMOR_EXTRACT_FAILED" +- "MANUAL_REVIEW_COMPLETE" +- "MANUAL_REVIEW_REQUIRED" +- "MULTIPLE_ANNOTATIONS_ERROR" +- "COMPARISON_COMPLETE" +- "EXACT_MATCH_IDENTIFIED" +- "ANNOTATION_COMPARISON_FAILED" +- "ANNOTATION_CONFIRMED" +- "DONE" \ No newline at end of file diff --git a/mlcubes/data_preparation/project/mlcube.py b/mlcubes/data_preparation/project/mlcube.py new file mode 100644 index 00000000..f798c49d --- /dev/null +++ b/mlcubes/data_preparation/project/mlcube.py @@ -0,0 +1,62 @@ +"""MLCube handler file""" +import typer +import subprocess + + +app = typer.Typer() + + +def exec_python(cmd: str) -> None: + """Execute a python script as a subprocess + + Args: + cmd (str): command to run as would be written inside the terminal + """ + splitted_cmd = cmd.split() + process = subprocess.Popen(splitted_cmd, cwd=".") + process.wait() + assert process.returncode == 0, f"command failed: {cmd}" + + +@app.command("prepare") +def prepare( + data_path: str = typer.Option(..., "--data_path"), + labels_path: str = typer.Option(..., "--labels_path"), + parameters_file: str = typer.Option(..., "--parameters_file"), + models_path: str = typer.Option(..., "--models"), + output_path: str = typer.Option(..., "--output_path"), + output_labels_path: str = typer.Option(..., "--output_labels_path"), + report_file: str = typer.Option(..., "--report_file"), + metadata_path: str = typer.Option(..., "--metadata_path"), +): + cmd = f"python3 project/prepare.py --data_path={data_path} --labels_path={labels_path} --models_path={models_path} --data_out={output_path} --labels_out={output_labels_path} --report={report_file} --parameters={parameters_file} --metadata_path={metadata_path}" + exec_python(cmd) + + +@app.command("sanity_check") +def sanity_check( + data_path: str = typer.Option(..., "--data_path"), + labels_path: str = typer.Option(..., "--labels_path"), + parameters_file: str = typer.Option(..., "--parameters_file"), + metadata_path: str = typer.Option(..., "--metadata_path"), +): + # Modify the sanity_check command as needed + cmd = f"python3 project/sanity_check.py --data_path={data_path} --labels_path={labels_path} --metadata={metadata_path}" + exec_python(cmd) + + +@app.command("statistics") +def sanity_check( + data_path: str = typer.Option(..., "--data_path"), + labels_path: str = typer.Option(..., "--labels_path"), + parameters_file: str = typer.Option(..., "--parameters_file"), + metadata_path: str = typer.Option(..., "--metadata_path"), + out_path: str = typer.Option(..., "--output_path"), +): + # Modify the statistics command as needed + cmd = f"python3 project/statistics.py --data_path={data_path} --labels_path={labels_path} --out_file={out_path} --metadata={metadata_path}" + exec_python(cmd) + + +if __name__ == "__main__": + app() diff --git a/mlcubes/data_preparation/project/prepare.py b/mlcubes/data_preparation/project/prepare.py new file mode 100644 index 00000000..754d1446 --- /dev/null +++ b/mlcubes/data_preparation/project/prepare.py @@ -0,0 +1,166 @@ +import os +import argparse +import pandas as pd +import yaml +from stages.generate_report import GenerateReport +from stages.get_csv import AddToCSV +from stages.nifti_transform import NIfTITransform +from stages.extract import Extract +from stages.extract_nnunet import ExtractNnUNet +from stages.manual import ManualStage +from stages.comparison import SegmentationComparisonStage +from stages.confirm import ConfirmStage +from stages.split import SplitStage +from stages.pipeline import Pipeline +from stages.mlcube_constants import * +from stages.constants import INTERIM_FOLDER, FINAL_FOLDER, TUMOR_MASK_FOLDER + +def find_csv_filenames(path_to_dir, suffix=".csv"): + filenames = os.listdir(path_to_dir) + return [filename for filename in filenames if filename.endswith(suffix)] + + +def setup_argparser(): + parser = argparse.ArgumentParser("Medperf Data Preparator Example") + parser.add_argument( + "--data_path", dest="data", type=str, help="path containing raw data" + ) + parser.add_argument( + "--labels_path", dest="labels", type=str, help="path containing labels" + ) + parser.add_argument( + "--models_path", dest="models", type=str, help="path to the nnunet models" + ) + parser.add_argument( + "--data_out", dest="data_out", type=str, help="path to store prepared data" + ) + parser.add_argument( + "--labels_out", + dest="labels_out", + type=str, + help="path to store prepared labels", + ) + parser.add_argument( + "--report", dest="report", type=str, help="path to the report csv file to store" + ) + parser.add_argument( + "--parameters", + dest="parameters", + type=str, + help="path to the parameters yaml file", + ) + parser.add_argument( + "--metadata_path", + dest="metadata_path", + type=str, + help="path to the local metadata folder" + ) + + return parser.parse_args() + + +def init_pipeline(args): + # RUN COLUMN-WISE PROCESSING + out_raw = os.path.join(args.data_out, RAW_PATH) + valid_data_out = os.path.join(args.data_out, VALID_PATH) + nifti_data_out = os.path.join(args.data_out, PREP_PATH) + brain_data_out = os.path.join(args.data_out, BRAIN_PATH) + tumor_data_out = os.path.join(args.data_out, TUMOR_PATH) + match_data_out = args.labels_out + backup_out = os.path.join(args.labels_out, TUMOR_BACKUP_PATH) + staging_folders = [ + out_raw, + valid_data_out, + nifti_data_out, + brain_data_out, + tumor_data_out, + backup_out, + ] + out_data_csv = os.path.join(args.data_out, OUT_CSV) + trash_folder = os.path.join(args.data_out, TRASH_PATH) + invalid_subjects_file = os.path.join(args.metadata_path, INVALID_FILE) + + loop = None + report_gen = GenerateReport( + out_data_csv, + args.data, + out_raw, + args.labels, + args.labels_out, + args.data_out, + DONE_STAGE_STATUS, + brain_data_out, + BRAIN_STAGE_STATUS, + tumor_data_out, + MANUAL_STAGE_STATUS\ + ) + csv_proc = AddToCSV(out_raw, out_data_csv, valid_data_out, out_raw) + nifti_proc = NIfTITransform(out_data_csv, nifti_data_out, valid_data_out, args.metadata_path, args.data_out) + brain_extract_proc = Extract( + out_data_csv, + brain_data_out, + INTERIM_FOLDER, + nifti_data_out, + INTERIM_FOLDER, + # loop, + "extract_brain", + BRAIN_STAGE_STATUS, + ) + tumor_extract_proc = ExtractNnUNet( + out_data_csv, + tumor_data_out, + INTERIM_FOLDER, + brain_data_out, + INTERIM_FOLDER, + TUMOR_STAGE_STATUS, + ) + manual_proc = ManualStage(out_data_csv, tumor_data_out, tumor_data_out, backup_out) + match_proc = SegmentationComparisonStage( + out_data_csv, + match_data_out, + tumor_data_out, + backup_out, + ) + confirm_proc = ConfirmStage( + out_data_csv, + args.data_out, + args.labels_out, + tumor_data_out, + backup_out, + staging_folders, + ) + split_proc = SplitStage( + args.parameters, args.data_out, args.labels_out, staging_folders + ) + stages = [ + csv_proc, + nifti_proc, + brain_extract_proc, + tumor_extract_proc, + manual_proc, + match_proc, + confirm_proc, + split_proc + ] + return Pipeline(report_gen, stages, staging_folders, [trash_folder], invalid_subjects_file) + +def init_report(args) -> pd.DataFrame: + report = None + if os.path.exists(args.report): + with open(args.report, "r") as f: + report_data = yaml.safe_load(f) + report = pd.DataFrame(report_data) + + return report + + +def main(): + args = setup_argparser() + + os.environ["RESULTS_FOLDER"] = os.path.join(args.models, "nnUNet_trained_models") + report = init_report(args) + pipeline = init_pipeline(args) + pipeline.run(report, args.report) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/mlcubes/data_preparation/project/requirements.txt b/mlcubes/data_preparation/project/requirements.txt new file mode 100644 index 00000000..f07ba674 --- /dev/null +++ b/mlcubes/data_preparation/project/requirements.txt @@ -0,0 +1,11 @@ +typer +pandas +PyYAML +# Include all your requirements here +SimpleITK +tqdm +scikit-image +FigureGenerator==0.0.4 +gandlf==0.0.16 +labelfusion==1.0.14 +nibabel==5.1.0 \ No newline at end of file diff --git a/mlcubes/data_preparation/project/sanity_check.py b/mlcubes/data_preparation/project/sanity_check.py new file mode 100644 index 00000000..e4da55bb --- /dev/null +++ b/mlcubes/data_preparation/project/sanity_check.py @@ -0,0 +1,45 @@ +import yaml +import argparse +import pandas as pd + +from stages.utils import has_prepared_folder_structure + + +def sanity_check(data_path: str, labels_path: str): + """Runs a few checks to ensure data quality and integrity + + Args: + data_path (str): Path to data. + labels_path (str): Path to labels. + """ + # Here you must add all the checks you consider important regarding the + # state of the data + assert has_prepared_folder_structure( + data_path, labels_path + ), "The contents of the labels and data don't ressemble a prepared dataset" + + +if __name__ == "__main__": + parser = argparse.ArgumentParser("Medperf Model Sanity Check Example") + parser.add_argument( + "--data_path", + dest="data", + type=str, + help="directory containing the prepared data", + ) + parser.add_argument( + "--labels_path", + dest="labels", + type=str, + help="directory containing the prepared labels", + ) + parser.add_argument( + "--metadata_path", + dest="metadata_path", + type=str, + help="path to the local metadata folder", + ) + + args = parser.parse_args() + + sanity_check(args.data, args.labels) diff --git a/mlcubes/data_preparation/project/stages/comparison.py b/mlcubes/data_preparation/project/stages/comparison.py new file mode 100644 index 00000000..9d17f2ad --- /dev/null +++ b/mlcubes/data_preparation/project/stages/comparison.py @@ -0,0 +1,166 @@ +from typing import Union, Tuple +import os +import shutil + +import pandas as pd +from pandas import DataFrame +import numpy as np +import nibabel as nib + +from .row_stage import RowStage +from .utils import get_id_tp, update_row_with_dict, md5_file +from .constants import TUMOR_MASK_FOLDER, INTERIM_FOLDER +from .mlcube_constants import COMPARISON_STAGE_STATUS + + +class SegmentationComparisonStage(RowStage): + def __init__( + self, + data_csv: str, + out_path: str, + prev_stage_path, + backup_path: str, + ): + self.data_csv = data_csv + self.out_path = out_path + self.prev_stage_path = prev_stage_path + self.backup_path = backup_path + + @property + def name(self): + return "Label Segmentation Comparison" + + @property + def status_code(self): + return COMPARISON_STAGE_STATUS + + def __get_input_path(self, index: Union[str, int]) -> str: + id, tp = get_id_tp(index) + path = os.path.join( + self.prev_stage_path, INTERIM_FOLDER, id, tp, TUMOR_MASK_FOLDER, "finalized" + ) + return path + + def __get_backup_path(self, index: Union[str, int]) -> str: + id, tp = get_id_tp(index) + path = os.path.join(self.backup_path, id, tp, TUMOR_MASK_FOLDER) + return path + + def __get_output_path(self, index: Union[str, int]) -> str: + id, tp = get_id_tp(index) + path = os.path.join(self.out_path, id, tp) + return path + + def __get_case_path(self, index: Union[str, int]) -> str: + path = self.__get_input_path(index) + case = os.listdir(path)[0] + + return os.path.join(path, case) + + def __report_gt_not_found( + self, index: Union[str, int], report: pd.DataFrame, reviewed_hash: str + ) -> pd.DataFrame: + case_path = self.__get_case_path(index) + data_path = report.loc[index, "data_path"] + report_data = { + "status": -self.status_code - 0.2, # -6.2 + "data_path": data_path, + "labels_path": case_path, + "segmentation_hash": reviewed_hash, + } + update_row_with_dict(report, report_data, index) + return report + + def __report_exact_match( + self, index: Union[str, int], report: pd.DataFrame, reviewed_hash: str + ) -> pd.DataFrame: + case_path = self.__get_case_path(index) + data_path = report.loc[index, "data_path"] + report_data = { + "status": -self.status_code - 0.1, # -6.1 + "data_path": data_path, + "labels_path": case_path, + "num_changed_voxels": 0, + "segmentation_hash": reviewed_hash, + } + update_row_with_dict(report, report_data, index) + return report + + def __report_success( + self, + index: Union[str, int], + report: pd.DataFrame, + num_changed_voxels: int, + reviewed_hash: str, + ) -> pd.DataFrame: + case_path = self.__get_case_path(index) + data_path = report.loc[index, "data_path"] + report_data = { + "status": -self.status_code, # -6 + "data_path": data_path, + "labels_path": case_path, + "num_changed_voxels": num_changed_voxels, + "segmentation_hash": reviewed_hash, + } + update_row_with_dict(report, report_data, index) + return report + + def could_run(self, index: Union[str, int], report: DataFrame) -> bool: + # Ensure a single reviewed segmentation file exists + path = self.__get_input_path(index) + gt_path = self.__get_backup_path(index) + + is_valid = True + path_exists = os.path.exists(path) + gt_path_exists = os.path.exists(gt_path) + contains_case = False + reviewed_hash = None + if path_exists: + cases = os.listdir(path) + num_cases = len(cases) + if num_cases: + reviewed_file = os.path.join(path, cases[0]) + reviewed_hash = md5_file(reviewed_file) + contains_case = num_cases == 1 + + prev_hash = report.loc[index]["segmentation_hash"] + hash_changed = prev_hash != reviewed_hash + is_valid = path_exists and contains_case and gt_path_exists and hash_changed + + return is_valid + + def execute( + self, index: Union[str, int], report: DataFrame + ) -> Tuple[DataFrame, bool]: + path = self.__get_input_path(index) + cases = os.listdir(path) + + match_output_path = self.__get_output_path(index) + os.makedirs(match_output_path, exist_ok=True) + # Get the necessary files for match check + # We assume reviewed and gt files have the same name + reviewed_file = os.path.join(path, cases[0]) + reviewed_hash = md5_file(reviewed_file) + gt_file = os.path.join(self.__get_backup_path(index), cases[0]) + + if not os.path.exists(gt_file): + # Ground truth file not found, reviewed file most probably renamed + report = self.__report_gt_not_found( + index, report, reviewed_hash + ) + return report, False + + reviewed_img = nib.load(reviewed_file) + gt_img = nib.load(gt_file) + + reviewed_voxels = np.array(reviewed_img.dataobj) + gt_voxels = np.array(gt_img.dataobj) + + num_changed_voxels = np.sum(reviewed_voxels != gt_voxels) + + if num_changed_voxels == 0: + report = self.__report_exact_match(index, report, reviewed_hash) + return report, True + + report = self.__report_success(index, report, num_changed_voxels, reviewed_hash) + return report, True diff --git a/mlcubes/data_preparation/project/stages/confirm.py b/mlcubes/data_preparation/project/stages/confirm.py new file mode 100644 index 00000000..63f706e9 --- /dev/null +++ b/mlcubes/data_preparation/project/stages/confirm.py @@ -0,0 +1,153 @@ +from typing import Union, Tuple +import os +import yaml +import shutil +from time import sleep +from typing import List + +import pandas as pd +from pandas import DataFrame + +from .dset_stage import DatasetStage +from .utils import get_id_tp, cleanup_storage +from .constants import TUMOR_MASK_FOLDER, INTERIM_FOLDER, FINAL_FOLDER +from .mlcube_constants import CONFIRM_STAGE_STATUS + + +class ConfirmStage(DatasetStage): + def __init__( + self, + data_csv: str, + out_data_path: str, + out_labels_path: str, + prev_stage_path: str, + backup_path: str, + staging_folders: List[str], + ): + self.data_csv = data_csv + self.out_data_path = out_data_path + self.out_labels_path = out_labels_path + self.prev_stage_path = prev_stage_path + self.backup_path = backup_path + self.staging_folders = staging_folders + self.prompt_file = ".prompt.txt" + self.response_file = ".response.txt" + + @property + def name(self): + return "Annotations Confirmation" + + @property + def status_code(self): + return CONFIRM_STAGE_STATUS + + def __get_input_data_path(self, index: Union[str, int]): + id, tp = get_id_tp(index) + path = os.path.join(self.prev_stage_path, FINAL_FOLDER, id, tp) + return path + + def __get_input_label_path(self, index: Union[str, int]): + id, tp = get_id_tp(index) + path = os.path.join( + self.prev_stage_path, INTERIM_FOLDER, id, tp, TUMOR_MASK_FOLDER, "finalized" + ) + case = os.listdir(path)[0] + + return os.path.join(path, case) + + def __get_output_data_path(self, index: Union[str, int]): + id, tp = get_id_tp(index) + path = os.path.join(self.out_data_path, id, tp) + return path + + def __get_output_label_path(self, index: Union[str, int]): + id, tp = get_id_tp(index) + path = os.path.join(self.out_labels_path, id, tp) + filename = f"{id}_{tp}_final_seg.nii.gz" + return path, filename + + def __confirm(self, exact_match_percent: float) -> bool: + exact_match_percent = round(exact_match_percent * 100, 2) + msg = ( + f"We've identified {exact_match_percent}% of cases have not been modified " + + "with respect to the baseline segmentation. Do you confirm this is intended? " + + "[Y]/n" + ) + + # user_input = input(msg).lower() + prompt_path = os.path.join(self.out_data_path, self.prompt_file) + response_path = os.path.join(self.out_data_path, self.response_file) + + with open(prompt_path, "w") as f: + f.write(msg) + + while not os.path.exists(response_path): + sleep(1) + + with open(response_path, "r") as f: + user_input = f.readline().strip() + + os.remove(prompt_path) + os.remove(response_path) + + return user_input == "y" or user_input == "" + + def __report_failure(self, report: DataFrame) -> DataFrame: + # For this stage, failure is done when the user doesn't confirm + # This means he probably wants to keep working on the data + # And needs to know which rows are exact matches. + # Because of this, failing this stage keeps the report intact + return report + + def __process_row(self, row: pd.Series) -> pd.Series: + """process a row by moving the required files + to their respective locations, and removing any extra files + + Args: + report (DataFrame): data preparation report + + Returns: + DataFrame: modified data preparation report + """ + index = row.name + input_data_path = self.__get_input_data_path(index) + input_label_filepath = self.__get_input_label_path(index) + output_data_path = self.__get_output_data_path(index) + output_label_path, filename = self.__get_output_label_path(index) + output_label_filepath = os.path.join(output_label_path, filename) + + shutil.rmtree(output_data_path, ignore_errors=True) + shutil.copytree(input_data_path, output_data_path) + os.makedirs(output_label_path, exist_ok=True) + shutil.copy(input_label_filepath, output_label_filepath) + + row["status"] = self.status_code + row["data_path"] = output_data_path + row["labels_path"] = output_label_path + return row + + def could_run(self, report: DataFrame) -> bool: + # could run once all cases have been compared to the ground truth + missing_voxels = report["num_changed_voxels"].isnull().values.any() + prev_path_exists = os.path.exists(self.prev_stage_path) + empty_prev_path = True + if prev_path_exists: + empty_prev_path = len(os.listdir(self.prev_stage_path)) == 0 + + return prev_path_exists and not empty_prev_path and not missing_voxels + + def execute(self, report: DataFrame) -> Tuple[DataFrame, bool]: + exact_match_percent = (report["num_changed_voxels"] == 0).sum() / len(report) + confirmed = self.__confirm(exact_match_percent) + + if not confirmed: + report = self.__report_failure(report) + return report, False + + report = report.apply(self.__process_row, axis=1) + # Remove all intermediary steps + cleanup_storage(self.staging_folders) + if os.path.exists(self.data_csv): + os.remove(self.data_csv) + + return report, True diff --git a/mlcubes/data_preparation/project/stages/dset_stage.py b/mlcubes/data_preparation/project/stages/dset_stage.py new file mode 100644 index 00000000..9792a9e2 --- /dev/null +++ b/mlcubes/data_preparation/project/stages/dset_stage.py @@ -0,0 +1,32 @@ +from abc import ABC, abstractmethod +import pandas as pd +from typing import Tuple + +from .stage import Stage + + +class DatasetStage(Stage, ABC): + @abstractmethod + def could_run(self, report: pd.DataFrame) -> bool: + """Establishes if this step could be executed + + Args: + index (Union[str, int]): case index in the report + report (pd.DataFrame): Dataframe containing the current state of the preparation flow + + Returns: + bool: wether this stage could be executed + """ + + @abstractmethod + def execute(self, report: pd.DataFrame) -> Tuple[pd.DataFrame, bool]: + """Executes the stage + + Args: + index (Union[str, int]): case index in the report + report (pd.DataFrame): DataFrame containing the current state of the preparation flow + + Returns: + pd.DataFrame: Updated report dataframe + bool: Success status + """ diff --git a/mlcubes/data_preparation/project/stages/extract.py b/mlcubes/data_preparation/project/stages/extract.py new file mode 100644 index 00000000..c8987391 --- /dev/null +++ b/mlcubes/data_preparation/project/stages/extract.py @@ -0,0 +1,175 @@ +from typing import Union, List, Tuple +from tqdm import tqdm +import pandas as pd +import os +import shutil +import traceback + +from .row_stage import RowStage +from .PrepareDataset import Preparator, FINAL_FOLDER +from .utils import update_row_with_dict, get_id_tp, md5_file + + +class Extract(RowStage): + def __init__( + self, + data_csv: str, + out_path: str, + subpath: str, + prev_stage_path: str, + prev_subpath: str, + # pbar: tqdm, + func_name: str, + status_code: int, + extra_labels_path=[], + ): + self.data_csv = data_csv + self.out_path = out_path + self.subpath = subpath + self.data_subpath = FINAL_FOLDER + self.prev_path = prev_stage_path + self.prev_subpath = prev_subpath + os.makedirs(self.out_path, exist_ok=True) + self.prep = Preparator(data_csv, out_path, "BraTSPipeline") + self.func_name = func_name + self.func = getattr(self.prep, func_name) + self.pbar = tqdm() + self.failed = False + self.exception = None + self.__status_code = status_code + self.extra_labels_path = extra_labels_path + + @property + def name(self) -> str: + return self.func_name.replace("_", " ").capitalize() + + @property + def status_code(self) -> str: + return self.__status_code + + def could_run(self, index: Union[str, int], report: pd.DataFrame) -> bool: + """Determine if case at given index needs to be converted to NIfTI + + Args: + index (Union[str, int]): Case index, as used by the report dataframe + report (pd.DataFrame): Report Dataframe for providing additional context + + Returns: + bool: Wether this stage could be executed for the given case + """ + prev_paths = self.__get_paths(index, self.prev_path, self.prev_subpath) + return all([os.path.exists(path) for path in prev_paths]) + + def execute( + self, index: Union[str, int], report: pd.DataFrame + ) -> Tuple[pd.DataFrame, bool]: + """Executes the NIfTI transformation stage on the given case + + Args: + index (Union[str, int]): case index, as used by the report + report (pd.DataFrame): DataFrame containing the current state of the preparation flow + + Returns: + pd.DataFrame: Updated report dataframe + """ + self.__prepare_exec() + self.__copy_case(index) + self._process_case(index) + report, success = self.__update_state(index, report) + self.prep.write() + + return report, success + + def __prepare_exec(self): + # Reset the file contents for errors + open(self.prep.stderr_log, "w").close() + + # Update the out dataframes to current state + self.prep.read() + + def __get_paths(self, index: Union[str, int], path: str, subpath: str): + id, tp = get_id_tp(index) + data_path = os.path.join(path, self.data_subpath, id, tp) + out_path = os.path.join(path, subpath, id, tp) + return data_path, out_path + + def __copy_case(self, index: Union[str, int]): + prev_paths = self.__get_paths(index, self.prev_path, self.prev_subpath) + copy_paths = self.__get_paths(index, self.out_path, self.prev_subpath) + for prev, copy in zip(prev_paths, copy_paths): + shutil.copytree(prev, copy, dirs_exist_ok=True) + + def _process_case(self, index: Union[str, int]): + id, tp = get_id_tp(index) + df = self.prep.subjects_df + row_search = df[(df["SubjectID"] == id) & (df["Timepoint"] == tp)] + if len(row_search) > 0: + row = row_search.iloc[0] + else: + # Most probably this case was semi-prepared. Mock a row + row = pd.Series({"SubjectID": id, "Timepoint": tp, "T1": "", "T1GD": "", "T2": "", "FLAIR": ""}) + self.func(row, self.pbar) + + def __hide_paths(self, hide_paths): + for path in hide_paths: + dirname = os.path.dirname(path) + hidden_name = f".{os.path.basename(path)}" + hidden_path = os.path.join(dirname, hidden_name) + if os.path.exists(hidden_path): + shutil.rmtree(hidden_path) + shutil.move(path, hidden_path) + + def __update_state( + self, index: Union[str, int], report: pd.DataFrame + ) -> Tuple[pd.DataFrame, bool]: + if self.failed: + del_paths = self.__get_paths(index, self.out_path, self.subpath) + report, success = self.__report_failure(index, report) + for del_path in del_paths: + shutil.rmtree(del_path, ignore_errors=True) + else: + # Backup the paths in case we need to revert to this stage + hide_paths = self.__get_paths(index, self.prev_path, self.prev_subpath) + _, out_path = self.__get_paths(index, self.out_path, self.subpath) + # Wait a little so that file gets created + brain_mask_file = os.path.join(out_path, "brainMask_fused.nii.gz") + # Handle the case where a brain mask doesn't exist + # Due to the subject being semi-prepared + brain_mask_hash = "" + if os.path.exists(brain_mask_file): + brain_mask_hash = md5_file(brain_mask_file) + report, success = self.__report_success(index, report, brain_mask_hash) + self.__hide_paths(hide_paths) + + return report, success + + def __report_success( + self, index: Union[str, int], report: pd.DataFrame, brain_mask_hash: str + ) -> Tuple[pd.DataFrame, bool]: + data_path, labels_path = self.__get_paths(index, self.out_path, self.subpath) + labels_path = os.path.join(labels_path, *self.extra_labels_path) + report_data = { + "status": self.status_code, + "data_path": data_path, + "labels_path": labels_path, + "brain_mask_hash": brain_mask_hash, + } + update_row_with_dict(report, report_data, index) + return report, True + + def __report_failure( + self, index: Union[str, int], report: pd.DataFrame + ) -> Tuple[pd.DataFrame, bool]: + prev_data_path, prev_labels_path = self.__get_paths( + index, self.prev_path, self.prev_subpath + ) + msg = f"{str(self.exception)}: {self.traceback}" + + report_data = { + "status": -self.status_code, + "comment": msg, + "data_path": prev_data_path, + "labels_path": prev_labels_path, + } + update_row_with_dict(report, report_data, index) + return report, False diff --git a/mlcubes/data_preparation/project/stages/extract_nnunet.py b/mlcubes/data_preparation/project/stages/extract_nnunet.py new file mode 100644 index 00000000..3ef95e2f --- /dev/null +++ b/mlcubes/data_preparation/project/stages/extract_nnunet.py @@ -0,0 +1,187 @@ +from typing import Union, List, Tuple +from tqdm import tqdm +import pandas as pd +import os +from os.path import realpath, dirname, join +import shutil +import time +import SimpleITK as sitk +import subprocess +import traceback +from LabelFusion.wrapper import fuse_images + +from .extract import Extract +from .PrepareDataset import ( + Preparator, + FINAL_FOLDER, + generate_tumor_segmentation_fused_images, + save_screenshot, +) +from .utils import update_row_with_dict, get_id_tp, MockTqdm + +MODALITY_MAPPING = { + "t1c": "t1c", + "t1ce": "t1c", + "t1": "t1n", + "t1n": "t1n", + "t2": "t2w", + "t2w": "t2w", + "t2f": "t2f", + "flair": "t2f", +} + +MODALITY_VARIANTS = { + "t1c": "T1GD", + "t1ce": "T1GD", + "t1": "T1", + "t1n": "T1", + "t2": "T2", + "t2w": "T2", + "t2f": "FLAIR", + "flair": "FLAIR", +} + + +class ExtractNnUNet(Extract): + def __init__( + self, + data_csv: str, + out_path: str, + subpath: str, + prev_stage_path: str, + prev_subpath: str, + status_code: int, + extra_labels_path=[], + nnunet_executable: str = "/nnunet_env/bin/nnUNet_predict" + ): + self.data_csv = data_csv + self.out_path = out_path + self.subpath = subpath + self.data_subpath = FINAL_FOLDER + self.prev_path = prev_stage_path + self.prev_subpath = prev_subpath + os.makedirs(self.out_path, exist_ok=True) + self.prep = Preparator(data_csv, out_path, "BraTSPipeline") + self.pbar = tqdm() + self.failed = False + self.exception = None + self.__status_code = status_code + self.extra_labels_path = extra_labels_path + self.nnunet_executable = nnunet_executable + + @property + def name(self) -> str: + return "nnUNet Tumor Extraction" + + @property + def status_code(self) -> str: + return self.__status_code + + def __get_models(self): + models_path = os.path.join(os.environ["RESULTS_FOLDER"], "nnUNet", "3d_fullres") + return os.listdir(models_path) + + def __get_mod_order(self, model): + order_path = os.path.join(os.environ["RESULTS_FOLDER"], os.pardir, "nnUNet_modality_order", model, "order") + with open(order_path, "r") as f: + order_str = f.readline() + # remove 'order = ' from the splitted list + modalities = order_str.split()[2:] + modalities = [MODALITY_MAPPING[mod] for mod in modalities] + return modalities + + def __prepare_case(self, path, id, tp, order): + tmp_subject = f"{id}-{tp}" + tmp_path = os.path.join(path, "tmp-data") + tmp_subject_path = os.path.join(tmp_path, tmp_subject) + tmp_out_path = os.path.join(path, "tmp-out") + shutil.rmtree(tmp_path, ignore_errors=True) + shutil.rmtree(tmp_out_path, ignore_errors=True) + os.makedirs(tmp_subject_path) + os.makedirs(tmp_out_path) + in_modalities_path = os.path.join(path, "DataForFeTS", id, tp) + input_modalities = {} + for modality_file in os.listdir(in_modalities_path): + if not modality_file.endswith(".nii.gz"): + continue + modality = modality_file[:-7].split("_")[-1] + norm_mod = MODALITY_MAPPING[modality] + mod_idx = order.index(norm_mod) + mod_idx = str(mod_idx).zfill(4) + + out_modality_file = f"{tmp_subject}_{mod_idx}.nii.gz" + in_file = os.path.join(in_modalities_path, modality_file) + out_file = os.path.join(tmp_subject_path, out_modality_file) + input_modalities[MODALITY_VARIANTS[modality]] = in_file + shutil.copyfile(in_file, out_file) + + return tmp_subject_path, tmp_out_path, input_modalities + + def __run_model(self, model, data_path, out_path): + # models are named Task_..., where is always 3 numbers + task_id = model[4:7] + cmd = f"{self.nnunet_executable} -i {data_path} -o {out_path} -t {task_id}" + print(cmd) + print(os.listdir(data_path)) + start = time.time() + subprocess.call(cmd, shell=True) + end = time.time() + total_time = end - start + print(f"Total time elapsed is {total_time} seconds") + + def __finalize_pred(self, tmp_out_path, out_pred_filepath): + # We assume there's only one file in out_path + pred = None + for file in os.listdir(tmp_out_path): + if file.endswith(".nii.gz"): + pred = file + + if pred is None: + raise RuntimeError("No tumor segmentation was found") + + pred_filepath = os.path.join(tmp_out_path, pred) + shutil.move(pred_filepath, out_pred_filepath) + return out_pred_filepath + + def _process_case(self, index: Union[str, int]): + id, tp = get_id_tp(index) + subject_id = f"{id}_{tp}" + models = self.__get_models() + outputs = [] + images_for_fusion = [] + out_path = os.path.join(self.out_path, "DataForQC", id, tp) + out_pred_path = os.path.join(out_path, "TumorMasksForQC") + os.makedirs(out_pred_path, exist_ok=True) + for i, model in enumerate(models): + order = self.__get_mod_order(model) + tmp_data_path, tmp_out_path, input_modalities = self.__prepare_case( + self.out_path, id, tp, order + ) + out_pred_filepath = os.path.join( + out_pred_path, f"{id}_{tp}_tumorMask_model_{i}.nii.gz" + ) + self.__run_model(model, tmp_data_path, tmp_out_path) + output = self.__finalize_pred(tmp_out_path, out_pred_filepath) + outputs.append(output) + images_for_fusion.append(sitk.ReadImage(output, sitk.sitkUInt8)) + + # cleanup + shutil.rmtree(tmp_data_path, ignore_errors=True) + shutil.rmtree(tmp_out_path, ignore_errors=True) + + fused_outputs = generate_tumor_segmentation_fused_images( + images_for_fusion, out_pred_path, subject_id + ) + outputs += fused_outputs + + for output in outputs: + # save the screenshot + tumor_mask_id = os.path.basename(output).replace(".nii.gz", "") + save_screenshot( + input_modalities, + os.path.join( + out_path, + f"{tumor_mask_id}_summary.png", + ), + output, + ) diff --git a/mlcubes/data_preparation/project/stages/generate_report.py b/mlcubes/data_preparation/project/stages/generate_report.py new file mode 100644 index 00000000..a7661137 --- /dev/null +++ b/mlcubes/data_preparation/project/stages/generate_report.py @@ -0,0 +1,403 @@ +from .dset_stage import DatasetStage +import pandas as pd +import numpy as np +import os +import re +import shutil +from typing import Tuple +from .utils import has_prepared_folder_structure, md5_dir, md5_file +from .constants import INTERIM_FOLDER, FINAL_FOLDER, TUMOR_MASK_FOLDER +from .mlcube_constants import REPORT_STAGE_STATUS + +DICOM_MODALITIES_PREFIX = {"fl": "t2_Flair", "t1": "t1_axial-3", "t1c": "t1_axial_stealth", "t2": "T2_SAG"} +NIFTI_MODALITIES = ["t1c", "t1n", "t2f", "t2w"] +BRAIN_SCAN_NAME = "brain_(.*)" +TUMOR_SEG_NAME = "final_seg" +CSV_HEADERS = ["SubjectID", "Timepoint", "T1", "T1GD", "T2", "FLAIR"] + +def get_index(subject, timepoint): + return f"{subject}|{timepoint}" + +def has_alternative_folder_structure(subject_tp_path, og_path): + contents = os.listdir(subject_tp_path) + prefixes_presence = {prefix: False for prefix in DICOM_MODALITIES_PREFIX.values()} + for content in contents: + content_path = os.path.join(subject_tp_path, content) + # Search recursively across folders + if os.path.isdir(content_path): + return has_alternative_folder_structure(content_path, og_path) + + # Check if the file is a dicom file with an expected prefix + if not content.endswith(".dcm"): + continue + + for prefix in DICOM_MODALITIES_PREFIX.values(): + if content.startswith(prefix): + prefixes_presence[prefix] = True + + # If all prefixes are found within the current path, then it has the folder structure + if all(prefixes_presence.values()): + return True, subject_tp_path + + # Structure not identified at this tree + return False, og_path + +def to_expected_folder_structure(subject_tp_path, contents_path): + # Create the modality folders + for modality in DICOM_MODALITIES_PREFIX.keys(): + modality_path = os.path.join(subject_tp_path, modality) + os.mkdir(modality_path) + + # Move the dicoms to the needed location + dicoms = os.listdir(contents_path) + prefix2mod = {prefix: mod for mod, prefix in DICOM_MODALITIES_PREFIX.items()} + for dicom in dicoms: + for prefix in prefix2mod.keys(): + if not dicom.startswith(prefix): + continue + mod = prefix2mod[prefix] + old_path = os.path.join(contents_path, dicom) + new_path = os.path.join(subject_tp_path, mod, dicom) + shutil.move(old_path, new_path) + + # Remove extra folders + desired_folders = set(DICOM_MODALITIES_PREFIX.keys()) + found_folders = set(os.listdir(subject_tp_path)) + extra_folders = found_folders - desired_folders + for folder in extra_folders: + folder_path = os.path.join(subject_tp_path, folder) + shutil.rmtree(folder_path) + +def has_semiprepared_folder_structure(subject_tp_path, og_path, recursive=True): + contents = os.listdir(subject_tp_path) + suffixes_presence = {suffix: False for suffix in NIFTI_MODALITIES} + for content in contents: + content_path = os.path.join(subject_tp_path, content) + if os.path.isdir(content_path): + if recursive: + return has_semiprepared_folder_structure(content_path, og_path) + else: + continue + + if not content.endswith(".nii.gz"): + continue + + for suffix in NIFTI_MODALITIES: + full_suffix = f"_brain_{suffix}.nii.gz" + if content.endswith(full_suffix): + suffixes_presence[suffix] = True + + if all(suffixes_presence.values()): + return True, subject_tp_path + + return False, og_path + +def get_timepoints(subject, subject_tp_path): + contents = os.listdir(subject_tp_path) + timepoints = set() + for content in contents: + content_path = os.path.join(subject_tp_path, subject) + if os.path.isdir(content_path): + # Assume any directory at this point represents a timepoint + timepoints.add(content) + continue + + pattern = re.compile(f"{subject}_(.*)_(?:{BRAIN_SCAN_NAME}|{TUMOR_SEG_NAME})\.nii\.gz") + result = pattern.search(content) + timepoint = result.group(1) + timepoints.add(timepoint) + + return list(timepoints) + +def get_tumor_segmentation(subject, timepoint, subject_tp_path): + contents = os.listdir(subject_tp_path) + seg_file = f"{subject}_{timepoint}_{TUMOR_SEG_NAME}.nii.gz" + if seg_file in contents: + return seg_file + return None + +def move_brain_scans(subject, timepoint, in_subject_path, out_data_path): + final_path = os.path.join(out_data_path, FINAL_FOLDER, subject, timepoint) + os.makedirs(final_path, exist_ok=True) + + contents = os.listdir(in_subject_path) + + pattern = re.compile(f"{subject}_{timepoint}_{BRAIN_SCAN_NAME}\.nii\.gz") + brain_scans = [content for content in contents if pattern.match(content)] + + for scan in brain_scans: + in_scan = os.path.join(in_subject_path, scan) + out_scan = os.path.join(final_path, scan) + shutil.copyfile(in_scan, out_scan) + +def move_tumor_segmentation(subject, timepoint, seg_file, in_subject_path, out_data_path, out_labels_path): + interim_path = os.path.join(out_data_path, INTERIM_FOLDER, subject, timepoint) + os.makedirs(interim_path, exist_ok=True) + + in_seg_path = os.path.join(in_subject_path, seg_file) + tumor_mask_path = os.path.join(interim_path, TUMOR_MASK_FOLDER) + under_review_path = os.path.join(tumor_mask_path, "under_review") + finalized_path = os.path.join(tumor_mask_path, "finalized") + os.makedirs(under_review_path, exist_ok=True) + os.makedirs(finalized_path, exist_ok=True) + + seg_root_path = os.path.join(tumor_mask_path, seg_file) + seg_under_review_path = os.path.join(under_review_path, seg_file) + seg_finalized_path = os.path.join(finalized_path, seg_file) + shutil.copyfile(in_seg_path, seg_root_path) + shutil.copyfile(in_seg_path, seg_under_review_path) + shutil.copyfile(in_seg_path, seg_finalized_path) + + # Place the segmentation in the backup folder + backup_path = os.path.join(out_labels_path, ".tumor_segmentation_backup") + subject_tp_backup_path = os.path.join(backup_path, subject, timepoint, TUMOR_MASK_FOLDER) + os.makedirs(subject_tp_backup_path, exist_ok=True) + seg_backup_path = os.path.join(subject_tp_backup_path, seg_file) + shutil.copyfile(in_seg_path, seg_backup_path) + + return in_seg_path, seg_finalized_path + +def write_partial_csv(csv_path, subject, timepoint): + # Used when cases are semi-prepared, in which case they + # skip the formal csv creation + if os.path.exists(csv_path): + df = pd.read_csv(csv_path) + else: + df = pd.DataFrame(columns=CSV_HEADERS) + + row = pd.Series(index=CSV_HEADERS) + row["SubjectID"] = subject + row["Timepoint"] = timepoint + row.name = get_index(subject, timepoint) + row = row.fillna("") + + # Check for existence of this row + row_search = df[(df["SubjectID"] == subject) & (df["Timepoint"] == timepoint)] + if len(row_search) == 0: + df = df.append(row) + + df.to_csv(csv_path, index=False) + + +class GenerateReport(DatasetStage): + def __init__( + self, + data_csv: str, + input_path: str, + output_path: str, + input_labels_path: str, + output_labels_path, + done_data_out_path: str, + done_status: int, + brain_data_out_path: str, + brain_status: int, + tumor_data_out_path: str, + reviewed_status: int, + ): + self.data_csv = data_csv + self.input_path = input_path + self.output_path = output_path + self.input_labels_path = input_labels_path + self.output_labels_path = output_labels_path + self.done_data_out_path = done_data_out_path + self.done_status_code = done_status + self.brain_data_out_path = brain_data_out_path + self.brain_status = brain_status + self.tumor_data_out_path = tumor_data_out_path + self.reviewed_status = reviewed_status + + @property + def name(self) -> str: + return "Generate Report" + + @property + def status_code(self) -> int: + return REPORT_STAGE_STATUS + + def _proceed_to_comparison(self, subject, timepoint, in_subject_path, report): + index = get_index(subject, timepoint) + final_path = os.path.join(self.tumor_data_out_path, FINAL_FOLDER, subject, timepoint) + input_hash = md5_dir(in_subject_path) + # Stop if the subject was already present and no input change has happened + if index in report.index: + if input_hash == report.loc[index]["input_hash"]: + return report + + # Move brain scans to its expected location + move_brain_scans(subject, timepoint, in_subject_path, self.tumor_data_out_path) + + # Move tumor segmentation to its expected location + seg_file = f"{subject}_{timepoint}_{TUMOR_SEG_NAME}.nii.gz" + _, seg_finalized_path = move_tumor_segmentation(subject, timepoint, seg_file, in_subject_path, self.tumor_data_out_path, self.output_labels_path) + + # Update the report + data = { + "status": self.reviewed_status, + "data_path": final_path, + "labels_path": seg_finalized_path, + "num_changed_voxels": np.nan, + "brain_mask_hash": "", + "segmentation_hash": "", + "input_hash": input_hash, + } + + subject_series = pd.Series(data) + subject_series.name = index + report = report.append(subject_series) + + write_partial_csv(self.data_csv, subject, timepoint) + + return report + + def _proceed_to_tumor_extraction(self, subject, timepoint, in_subject_path, report): + index = get_index(subject, timepoint) + input_hash = md5_dir(in_subject_path) + # Stop if the subject was already present and no input change has happened + if index in report.index: + if input_hash == report.loc[index]["input_hash"]: + return report + final_path = os.path.join(self.brain_data_out_path, FINAL_FOLDER, subject, timepoint) + labels_path = os.path.join(self.brain_data_out_path, INTERIM_FOLDER, subject, timepoint) + os.makedirs(final_path, exist_ok=True) + os.makedirs(labels_path, exist_ok=True) + + # Move brain scans to its expected location + move_brain_scans(subject, timepoint, in_subject_path, self.brain_data_out_path) + + # Update the report + data = { + "status": self.brain_status, + "data_path": final_path, + "labels_path": labels_path, + "num_changed_voxels": np.nan, + "brain_mask_hash": "", + "segmentation_hash": "", + "input_hash": input_hash, + } + + subject_series = pd.Series(data) + subject_series.name = index + report = report.append(subject_series) + + write_partial_csv(self.data_csv, subject, timepoint) + + return report + + def could_run(self, report: pd.DataFrame): + return True + + def execute(self, report: pd.DataFrame) -> Tuple[pd.DataFrame, bool]: + # Rewrite the report + cols = [ + "status", + "status_name", + "comment", + "data_path", + "labels_path", + "input_hash", + ] + if report is None: + report = pd.DataFrame(columns=cols) + + input_is_prepared = has_prepared_folder_structure( + self.input_path, self.input_labels_path + ) + if input_is_prepared: + # If prepared, store data directly in the data folder + self.output_path = self.done_data_out_path + + observed_cases = set() + + for subject in os.listdir(self.input_path): + in_subject_path = os.path.join(self.input_path, subject) + out_subject_path = os.path.join(self.output_path, subject) + in_labels_subject_path = os.path.join(self.input_labels_path, subject) + out_labels_subject_path = os.path.join(self.output_labels_path, subject) + + if not os.path.isdir(in_subject_path): + continue + + has_semiprepared, _ = has_semiprepared_folder_structure(in_subject_path, in_subject_path, recursive=False) + if has_semiprepared: + timepoints = get_timepoints(subject, in_subject_path) + for timepoint in timepoints: + index = get_index(subject, timepoint) + tumor_seg = get_tumor_segmentation(subject, timepoint, in_subject_path) + if tumor_seg is not None: + report = self._proceed_to_comparison(subject, timepoint, in_subject_path, report) + else: + report = self._proceed_to_tumor_extraction(subject, timepoint, in_subject_path, report) + observed_cases.add(index) + continue + + for timepoint in os.listdir(in_subject_path): + in_tp_path = os.path.join(in_subject_path, timepoint) + out_tp_path = os.path.join(out_subject_path, timepoint) + in_labels_tp_path = os.path.join(in_labels_subject_path, timepoint) + out_labels_tp_path = os.path.join(out_labels_subject_path, timepoint) + + if not os.path.isdir(in_tp_path): + continue + + input_hash = md5_dir(in_tp_path) + + index = get_index(subject, timepoint) + + # Keep track of the cases that were found on the input folder + observed_cases.add(index) + + has_semiprepared, in_tp_path = has_semiprepared_folder_structure(in_tp_path, in_tp_path, recursive=True) + if has_semiprepared: + tumor_seg = get_tumor_segmentation(subject, timepoint, in_tp_path) + if tumor_seg is not None: + report = self._proceed_to_comparison(subject, timepoint, in_tp_path, report) + else: + report = self._proceed_to_tumor_extraction(subject, timepoint, in_tp_path, report) + continue + + if index in report.index: + # Case has already been identified, see if input hash is different + # if so, override the contents and restart the state for that case + if report.loc[index]["input_hash"] == input_hash: + continue + + shutil.rmtree(out_tp_path, ignore_errors=True) + shutil.copytree(in_tp_path, out_tp_path) + report = report.drop(index) + else: + # New case not identified by the report. Add it + shutil.copytree(in_tp_path, out_tp_path) + + data = { + "status": self.status_code, + "data_path": out_tp_path, + "labels_path": "", + "num_changed_voxels": np.nan, + "brain_mask_hash": "", + "segmentation_hash": "", + "input_hash": input_hash, + } + + has_alternative, contents_path = has_alternative_folder_structure(out_tp_path, out_tp_path) + if has_alternative: + # Move files around so it has the expected structure + to_expected_folder_structure(out_tp_path, contents_path) + + if input_is_prepared: + data["status_code"] = self.done_status_code + shutil.rmtree(out_labels_tp_path, ignore_errors=True) + shutil.copytree(in_labels_tp_path, out_labels_tp_path) + + subject_series = pd.Series(data) + subject_series.name = index + report = report.append(subject_series) + + reported_cases = set(report.index) + removed_cases = reported_cases - observed_cases + + # Stop reporting removed cases + for case_index in removed_cases: + report = report.drop(case_index) + + report = report.sort_index() + return report, True diff --git a/mlcubes/data_preparation/project/stages/get_csv.py b/mlcubes/data_preparation/project/stages/get_csv.py new file mode 100644 index 00000000..417a7153 --- /dev/null +++ b/mlcubes/data_preparation/project/stages/get_csv.py @@ -0,0 +1,117 @@ +from .row_stage import RowStage +from .CreateCSVForDICOMs import CSVCreator +from .utils import update_row_with_dict, get_id_tp +from pathlib import Path + +import pandas as pd +from typing import Union, Tuple +import os +import shutil +from .mlcube_constants import CSV_STAGE_STATUS + + +class AddToCSV(RowStage): + def __init__( + self, input_dir: str, output_csv: str, out_dir: str, prev_stage_path: str + ): + self.input_dir = input_dir + self.output_csv = output_csv + self.out_dir = out_dir + self.prev_stage_path = prev_stage_path + os.makedirs(self.out_dir, exist_ok=True) + self.csv_processor = CSVCreator(self.input_dir, self.output_csv) + if os.path.exists(self.output_csv): + # Use the updated version of the CSV + self.contents = pd.read_csv(self.output_csv) + self.csv_processor.output_df_for_csv = self.contents + else: + # Use the default, empty version + self.contents = self.csv_processor.output_df_for_csv + + @property + def name(self) -> str: + return "Initial Validation" + + @property + def status_code(self) -> int: + return CSV_STAGE_STATUS + + def could_run(self, index: Union[str, int], report: pd.DataFrame) -> bool: + """Determines if getting a new CSV is necessary. + This is done by checking the existence of the expected file + + Args: + index (Union[str, int]): case index in the report + report (pd.DataFrame): Dataframe containing the current state of the preparation flow + + Returns: + bool: wether this stage could be executed + """ + id, tp = get_id_tp(index) + prev_case_path = os.path.join(self.prev_stage_path, id, tp) + + return os.path.exists(prev_case_path) + + def execute( + self, index: Union[str, int], report: pd.DataFrame + ) -> Tuple[pd.DataFrame, bool]: + """Adds valid cases to the data csv that is used for later processing + Invalid cases are flagged in the report + + Args: + index (Union[str, int]): case index in the report + report (pd.DataFrame): DataFrame containing the current state of the preparation flow + + Returns: + pd.DataFrame: Updated report dataframe + """ + id, tp = get_id_tp(index) + subject_path = os.path.join(self.input_dir, id) + tp_path = os.path.join(subject_path, tp) + subject_out_path = os.path.join(self.out_dir, id) + tp_out_path = os.path.join(subject_out_path, tp) + # We will first copy the timepoint to the out folder + # This is so, if successful, the csv will point to the data + # in the next stage, instead of the previous + shutil.copytree(tp_path, tp_out_path) + + try: + self.csv_processor.process_timepoint(tp, id, subject_out_path) + report_data = { + "status": self.status_code, + "data_path": tp_out_path, + "labels_path": "", + } + except Exception as e: + report_data = { + "status": -self.status_code - 0.3, + "comment": str(e), + "data_path": tp_path, + "labels_path": "", + } + update_row_with_dict(report, report_data, index) + return report, False + + if f"{id}_{tp}" in self.csv_processor.subject_timepoint_missing_modalities: + shutil.rmtree(tp_out_path, ignore_errors=True) + # Differentiate errors by floating point value + status_code = -self.status_code - 0.1 # -1.1 + report_data["status"] = status_code + report_data["data_path"] = tp_path + success = False + elif f"{id}_{tp}" in self.csv_processor.subject_timepoint_extra_modalities: + shutil.rmtree(tp_out_path, ignore_errors=True) + # Differentiate errors by floating point value + status_code = -self.status_code - 0.2 # -1.2 + report_data["status"] = status_code + report_data["data_path"] = tp_path + success = False + else: + shutil.rmtree(tp_path) + success = True + + update_row_with_dict(report, report_data, index) + + self.csv_processor.write() + + return report, success diff --git a/mlcubes/data_preparation/project/stages/manual.py b/mlcubes/data_preparation/project/stages/manual.py new file mode 100644 index 00000000..89ce48a7 --- /dev/null +++ b/mlcubes/data_preparation/project/stages/manual.py @@ -0,0 +1,220 @@ +from typing import Union, Tuple +import pandas as pd +import os +import shutil + +from .row_stage import RowStage +from .constants import TUMOR_MASK_FOLDER, INTERIM_FOLDER, FINAL_FOLDER +from .mlcube_constants import MANUAL_STAGE_STATUS +from .utils import ( + get_id_tp, + update_row_with_dict, + set_files_read_only, + copy_files, + md5_file, +) + + +class ManualStage(RowStage): + def __init__( + self, data_csv: str, out_path: str, prev_stage_path: str, backup_path: str + ): + self.data_csv = data_csv + self.out_path = out_path + self.prev_stage_path = prev_stage_path + self.backup_path = backup_path + self.rollback_path = os.path.join(os.path.dirname(out_path), "prepared") + self.brain_mask_file = "brainMask_fused.nii.gz" + + @property + def name(self): + return "Manual review" + + @property + def status_code(self) -> int: + return MANUAL_STAGE_STATUS + + def __get_input_paths(self, index: Union[str, int]): + id, tp = get_id_tp(index) + tumor_mask_path = os.path.join( + self.prev_stage_path, INTERIM_FOLDER, id, tp, TUMOR_MASK_FOLDER + ) + brain_mask_path = os.path.join( + self.prev_stage_path, INTERIM_FOLDER, id, tp, self.brain_mask_file + ) + return tumor_mask_path, brain_mask_path + + def __get_under_review_path(self, index: Union[str, int]): + id, tp = get_id_tp(index) + path = os.path.join( + self.out_path, INTERIM_FOLDER, id, tp, TUMOR_MASK_FOLDER, "under_review" + ) + return path + + def __get_output_path(self, index: Union[str, int]): + id, tp = get_id_tp(index) + path = os.path.join( + self.out_path, INTERIM_FOLDER, id, tp, TUMOR_MASK_FOLDER, "finalized" + ) + return path + + def __get_backup_path(self, index: Union[str, int]): + id, tp = get_id_tp(index) + path = os.path.join(self.backup_path, id, tp, TUMOR_MASK_FOLDER) + return path + + def __get_rollback_paths(self, index: Union[str, int]): + id, tp = get_id_tp(index) + data_path = os.path.join(self.rollback_path, FINAL_FOLDER, id, tp) + labels_path = os.path.join(self.rollback_path, INTERIM_FOLDER, id, tp) + return data_path, labels_path + + def __report_success( + self, index: Union[str, int], report: pd.DataFrame + ) -> pd.DataFrame: + labels_path = self.__get_output_path(index) + data_path = report.loc[index, "data_path"] + report_data = { + "status": 5, + "data_path": data_path, + "labels_path": labels_path, + } + update_row_with_dict(report, report_data, index) + return report + + def __report_step_missing( + self, index: Union[str, int], report: pd.DataFrame + ) -> pd.DataFrame: + in_path, _ = self.__get_input_paths(index) + data_path = report.loc[index, "data_path"] + + report_data = { + "status": -self.status_code, + "data_path": data_path, + "labels_path": in_path, + } + update_row_with_dict(report, report_data, index) + return report + + def __report_multiple_cases_error( + self, index: Union[str, int], report: pd.DataFrame + ) -> pd.DataFrame: + path = self.__get_output_path(index) + data_path = report.loc[index, "data_path"] + + report_data = { + "status": -self.status_code - 0.1, # -5.1 + "data_path": data_path, + "labels_path": path, + } + update_row_with_dict(report, report_data, index) + return report + + def __rollback(self, index): + # Unhide the rollback paths + rollback_paths = self.__get_rollback_paths(index) + for rollback_path in rollback_paths: + rollback_dirname = os.path.dirname(rollback_path) + rollback_basename = os.path.basename(rollback_path) + hidden_rollback_path = os.path.join( + rollback_dirname, f".{rollback_basename}" + ) + + if os.path.exists(hidden_rollback_path): + shutil.move(hidden_rollback_path, rollback_path) + + # Move the modified brain mask to the rollback path + _, rollback_labels_path = rollback_paths + tumor_masks_path, brain_mask_path = self.__get_input_paths(index) + rollback_brain_mask_path = os.path.join( + rollback_labels_path, self.brain_mask_file + ) + if os.path.exists(rollback_brain_mask_path): + os.remove(rollback_brain_mask_path) + shutil.move(brain_mask_path, rollback_brain_mask_path) + + # Remove the complete subject path + subject_path = os.path.abspath(os.path.join(tumor_masks_path, "..")) + + shutil.rmtree(subject_path) + + def __report_rollback( + self, index: Union[str, int], report: pd.DataFrame, mask_hash + ) -> pd.DataFrame: + rollback_fets_path, rollback_qc_path = self.__get_rollback_paths(index) + + report_data = { + "status": 2, # Move back to nifti transform finished + "data_path": rollback_qc_path, + "labels_path": rollback_fets_path, + "brain_mask_hash": mask_hash, + "num_changed_voxels": 0.0, # Ensure voxel count is reset + "segmentation_hash": "", + } + update_row_with_dict(report, report_data, index) + return report + + def could_run(self, index: Union[str, int], report: pd.DataFrame) -> bool: + out_path = self.__get_output_path(index) + cases = [] + if os.path.exists(out_path): + cases = os.listdir(out_path) + + in_path, brain_path = self.__get_input_paths(index) + brain_mask_hash = "" + if os.path.exists(brain_path): + brain_mask_hash = md5_file(brain_path) + expected_brain_mask_hash = report.loc[index, "brain_mask_hash"] + + segmentation_exists = os.path.exists(in_path) + annotation_exists = len(cases) == 1 + brain_mask_changed = brain_mask_hash != expected_brain_mask_hash + return segmentation_exists and (not annotation_exists or brain_mask_changed) + + def execute( + self, index: Union[str, int], report: pd.DataFrame + ) -> Tuple[pd.DataFrame, bool]: + """Manual steps are by definition not doable by an algorithm. Therefore, + execution of this step leads to a failed stage message, indicating that + the manual step has not been done. + + Args: + index (Union[str, int]): current case index + report (pd.DataFrame): data preparation report + + Returns: + pd.DataFrame: _description_ + """ + + # Generate a hidden copy of the baseline segmentations + in_path, brain_path = self.__get_input_paths(index) + out_path = self.__get_output_path(index) + under_review_path = self.__get_under_review_path(index) + bak_path = self.__get_backup_path(index) + if not os.path.exists(bak_path): + copy_files(in_path, bak_path) + set_files_read_only(bak_path) + os.makedirs(under_review_path, exist_ok=True) + os.makedirs(out_path, exist_ok=True) + + cases = os.listdir(out_path) + + brain_mask_hash = "" + if os.path.exists(brain_path): + brain_mask_hash = md5_file(brain_path) + expected_brain_mask_hash = report.loc[index, "brain_mask_hash"] + brain_mask_changed = brain_mask_hash != expected_brain_mask_hash + + if brain_mask_changed: + # Found brain mask changed + self.__rollback(index) + # Label this as able to continue + return self.__report_rollback(index, report, brain_mask_hash), True + + if len(cases) > 1: + # Found more than one reviewed case + return self.__report_multiple_cases_error(index, report), False + elif not len(cases): + # Found no cases yet reviewed + return self.__report_step_missing(index, report), False + return self.__report_success(index, report), True diff --git a/mlcubes/data_preparation/project/stages/mlcube_constants.py b/mlcubes/data_preparation/project/stages/mlcube_constants.py new file mode 100644 index 00000000..89eed691 --- /dev/null +++ b/mlcubes/data_preparation/project/stages/mlcube_constants.py @@ -0,0 +1,19 @@ +RAW_PATH = "raw" +VALID_PATH = "validated" +PREP_PATH = "prepared" +BRAIN_PATH = "brain_extracted" +TUMOR_PATH = "tumor_extracted" +TUMOR_BACKUP_PATH = ".tumor_segmentation_backup" +OUT_CSV = "data.csv" +TRASH_PATH = ".trash" +INVALID_FILE = ".invalid.txt" + +REPORT_STAGE_STATUS = 0 +CSV_STAGE_STATUS = 1 +NIFTI_STAGE_STATUS = 2 +BRAIN_STAGE_STATUS = 3 +TUMOR_STAGE_STATUS = 4 +MANUAL_STAGE_STATUS = 5 +COMPARISON_STAGE_STATUS = 6 +CONFIRM_STAGE_STATUS = 7 +DONE_STAGE_STATUS = 8 diff --git a/mlcubes/data_preparation/project/stages/nifti_transform.py b/mlcubes/data_preparation/project/stages/nifti_transform.py new file mode 100644 index 00000000..5ff1fc61 --- /dev/null +++ b/mlcubes/data_preparation/project/stages/nifti_transform.py @@ -0,0 +1,152 @@ +from typing import Union +from tqdm import tqdm +import pandas as pd +import os +import shutil + +from .row_stage import RowStage +from .PrepareDataset import Preparator, INTERIM_FOLDER, FINAL_FOLDER +from .utils import update_row_with_dict, get_id_tp, MockTqdm, unnormalize_path +from .mlcube_constants import NIFTI_STAGE_STATUS + + +class NIfTITransform(RowStage): + def __init__( + self, data_csv: str, out_path: str, prev_stage_path: str, metadata_path: str, data_out: str, + ): + self.data_csv = data_csv + self.out_path = out_path + self.data_out = data_out + self.prev_stage_path = prev_stage_path + self.metadata_path = metadata_path + os.makedirs(self.out_path, exist_ok=True) + self.prep = Preparator(data_csv, out_path, "BraTSPipeline") + # self.pbar = pbar + self.pbar = tqdm() + + @property + def name(self) -> str: + return "NiFTI Conversion" + + @property + def status_code(self) -> int: + return NIFTI_STAGE_STATUS + + def could_run(self, index: Union[str, int], report: pd.DataFrame) -> bool: + """Determine if case at given index needs to be converted to NIfTI + + Args: + index (Union[str, int]): Case index, as used by the report dataframe + report (pd.DataFrame): Report Dataframe for providing additional context + + Returns: + bool: Wether this stage could be executed for the given case + """ + id, tp = get_id_tp(index) + prev_case_path = os.path.join(self.prev_stage_path, id, tp) + if os.path.exists(prev_case_path): + return len(os.listdir(prev_case_path)) > 0 + return False + + def execute(self, index: Union[str, int], report: pd.DataFrame) -> pd.DataFrame: + """Executes the NIfTI transformation stage on the given case + + Args: + index (Union[str, int]): case index, as used by the report + report (pd.DataFrame): DataFrame containing the current state of the preparation flow + + Returns: + pd.DataFrame: Updated report dataframe + """ + self.__prepare_exec() + self.__process_case(index) + report, success = self.__update_report(index, report) + self.prep.write() + self.__update_metadata() + + return report, success + + def __get_output_paths(self, index: Union[str, int]): + id, tp = get_id_tp(index) + fets_path = os.path.join(self.prep.final_output_dir, id, tp) + qc_path = os.path.join(self.prep.interim_output_dir, id, tp) + + return fets_path, qc_path + + def __prepare_exec(self): + # Reset the file contents for errors + open(self.prep.stderr_log, "w").close() + + self.prep.read() + + def __process_case(self, index: Union[str, int]): + id, tp = get_id_tp(index) + df = self.prep.subjects_df + row = df[(df["SubjectID"] == id) & (df["Timepoint"] == tp)].iloc[0] + self.prep.convert_to_dicom(hash(index), row, self.pbar) + + def __update_prev_stage_state(self, index: Union[str, int], report: pd.DataFrame): + prev_data_path = report.loc[index]["data_path"] + prev_data_path = unnormalize_path(prev_data_path, self.data_out) + shutil.rmtree(prev_data_path) + + def __undo_current_stage_changes(self, index: Union[str, int]): + fets_path, qc_path = self.__get_output_paths(index) + shutil.rmtree(fets_path, ignore_errors=True) + shutil.rmtree(qc_path, ignore_errors=True) + + def __update_report( + self, index: Union[str, int], report: pd.DataFrame + ) -> pd.DataFrame: + id, tp = get_id_tp(index) + failing = self.prep.failing_subjects + failing_subject = failing[ + (failing["SubjectID"] == id) & (failing["Timepoint"] == tp) + ] + if len(failing_subject): + self.__undo_current_stage_changes(index) + report = self.__report_failure(index, report) + success = False + else: + self.__update_prev_stage_state(index, report) + report = self.__report_success(index, report) + success = True + + return report, success + + def __update_metadata(self): + fets_path = os.path.join(self.out_path, "DataForFeTS") + for file in os.listdir(fets_path): + filepath = os.path.join(fets_path, file) + out_filepath = os.path.join(self.metadata_path, file) + if os.path.isfile(filepath) and filepath.endswith(".yaml"): + shutil.copyfile(filepath, out_filepath) + + def __report_success( + self, index: Union[str, int], report: pd.DataFrame + ) -> pd.DataFrame: + fets_path, qc_path = self.__get_output_paths(index) + report_data = { + "status": self.status_code, + "data_path": qc_path, + "labels_path": fets_path, + } + update_row_with_dict(report, report_data, index) + return report + + def __report_failure( + self, index: Union[str, int], report: pd.DataFrame + ) -> pd.DataFrame: + prev_data_path = report.loc[index]["data_path"] + + with open(self.prep.stderr_log, "r") as f: + msg = f.read() + + report_data = { + "status": -self.status_code, + "comment": msg, + "data_path": prev_data_path, + "labels_path": "", + } + update_row_with_dict(report, report_data, index) + return report diff --git a/mlcubes/data_preparation/project/stages/pipeline.py b/mlcubes/data_preparation/project/stages/pipeline.py new file mode 100644 index 00000000..99fa2935 --- /dev/null +++ b/mlcubes/data_preparation/project/stages/pipeline.py @@ -0,0 +1,300 @@ +from pandas import DataFrame +from typing import Union, List, Tuple +from tqdm import tqdm +import traceback +import yaml +import os + +from .dset_stage import DatasetStage +from .row_stage import RowStage +from .stage import Stage +from .utils import cleanup_storage +from .mlcube_constants import DONE_STAGE_STATUS + + +def normalize_report_paths(report: DataFrame) -> DataFrame: + """Ensures paths are normalized and converts them to relative paths for the local machine + + Args: + report (DataFrame): report to normalize + + Returns: + DataFrame: report with transformed paths + """ + pattern = "mlcube_io\d+" + report["data_path"] = report["data_path"].str.split(pattern).str[-1] + report["labels_path"] = report["labels_path"].str.split(pattern).str[-1] + return report + + +def write_report(report: DataFrame, filepath: str): + report = normalize_report_paths(report) + report_dict = report.to_dict() + with open(filepath, "w") as f: + yaml.dump(report_dict, f) + + +class Pipeline: + def __init__( + self, + init_stage: DatasetStage, + stages: List[Union[DatasetStage, RowStage]], + staging_folders: List[str], + trash_folders: List[str], + invalid_subjects_file: str + ): + self.init_stage = init_stage + self.stages = stages + self.staging_folders = staging_folders + self.trash_folders = trash_folders + self.invalid_subjects_file = invalid_subjects_file + + def __invalid_subjects(self) -> List[str]: + """Retrieve invalid subjects + + Returns: + List[str]: list of invalid subjects + """ + if not os.path.exists(self.invalid_subjects_file): + open(self.invalid_subjects_file, "a").close() + + with open(self.invalid_subjects_file, "r") as f: + invalid_subjects = set([line.strip() for line in f]) + + return invalid_subjects + + def __is_subject_done(self, subject: Union[str, int], report: DataFrame) -> bool: + """Determines if a subject is considered done + + Args: + subject (Union[str, int]): subject index + report (DataFrame): DataFrame containing the state of the processing + + Returns: + bool: wether the subject is done or not + """ + subject_status = report.loc[subject, "status"] + + return subject_status == DONE_STAGE_STATUS + + def __is_done(self, report: DataFrame) -> bool: + """Determines if the preparation is complete + + Args: + report (DataFrame): DataFrame containing the state of the processing + + Returns: + bool: Wether the preparation is complete + """ + return all(report["status"] == DONE_STAGE_STATUS) + + def __get_report_stage_to_run( + self, subject: Union[str, int], report: DataFrame + ) -> Union[DatasetStage, RowStage]: + """Retrieves the stage a subject is in indicated by the report + + Args: + subject (Union[str, int]): Subject index + report (DataFrame): Dataframe containing the state of the processing + + Returns: + Union[DatasetStage, RowStage]: Stage the current subject is in + """ + report_status_code = int(report.loc[subject, "status"]) + if report_status_code < 0: + # Error code, rerun the stage specified in the report + report_status_code = abs(report_status_code) + else: + # Success code, reported stage works so move to the next one + report_status_code += 1 + for stage in self.stages: + if stage.status_code == report_status_code: + return stage + + return None + + def determine_next_stage( + self, subject: Union[str, int], report + ) -> Tuple[List[Union[DatasetStage, RowStage]], bool]: + """Determines what stage to run + First priority goes to a stage if it is the only one that could run. (only one stage can run) + Second priority goes to what the report says should run next. (The report knows what stage can run) + Third priority goes to the first of all possible stages that could run. (Earliest of all possible stages) + + Args: + subject (Union[str, int]): Subject name (SubjectID, Timepoint) + report (pd.DataFrame): report dataframe + + Returns: + Tuple[List[Union[DatasetStage, RowStage]], bool]: Stage to run, and wether it is done or not + """ + could_run_stages = [] + for i, stage in enumerate(self.stages): + could_run = False + if isinstance(stage, RowStage): + could_run = stage.could_run(subject, report) + else: + could_run = stage.could_run(report) + + if could_run: + runnable_stage = self.stages[i] + could_run_stages.append(runnable_stage) + + # TODO: split into a function + if len(could_run_stages) == 1: + stage = could_run_stages[0] + is_last_subject = subject == report.index[-1] + if isinstance(stage, DatasetStage) and not is_last_subject: + # Only run dataset stages on the last subject, so all subjects can update + # their state if needed before proceeding + return None, False + return stage, False + + # Handle errors + # Either no stage can be executed (len(could_run_stages == 0)) + # or multiple stages can be executed (len(could_run_stages > 1)) + report_stage = self.__get_report_stage_to_run(subject, report) + + # TODO: split into a function + if len(could_run_stages) == 0: + # Either the case processing was on-going but it's state is broken + # or the next stage is a dataset stage, which means we're done with this one + # or the case is done and no stage can nor should run + # We can tell this by looking at the report + is_done = self.__is_subject_done(subject, report) + is_dset_stage = isinstance(report_stage, DatasetStage) + if is_done or is_dset_stage: + return None, True + else: + return None, False + # TODO: split into a function + else: + # Multiple stages could run. Remove ambiguity by + # syncing with the report + if report_stage in could_run_stages: + return report_stage, False + + return could_run_stages[0], False + + def run(self, report: DataFrame, report_path: str): + # cleanup the trash at the very beginning + cleanup_storage(self.trash_folders) + + # The init stage always has to be executed + report, _ = self.init_stage.execute(report) + write_report(report, report_path) + + invalid_subjects = self.__invalid_subjects() + + should_loop = True + should_stop = False + while should_loop: + + # Since we could have row and dataset stages interwoven, we want + # to make sure we continue processing subjects until nothing new has happened. + # This means we can resume a given subject and its row stages even after a dataset stage + prev_status = report["status"].copy() + subjects = list(report.index) + subjects_loop = tqdm(subjects) + + for subject in subjects_loop: + report, should_stop = self.process_subject( + subject, report, report_path, subjects_loop + ) + + if should_stop: + break + + # If a new invalid subject is identified, start over + new_invalid_subjects = self.__invalid_subjects() + if invalid_subjects != new_invalid_subjects: + invalid_subjects = new_invalid_subjects + # We're going to restart the subjects loop + break + + # Check for report differences. If there are, rerun the loop + should_loop = any(report["status"] != prev_status) and not should_stop + + if self.__is_done(report): + cleanup_folders = self.staging_folders + self.trash_folders + cleanup_storage(cleanup_folders) + + def process_subject( + self, subject: Union[int, str], report: DataFrame, report_path: str, pbar: tqdm + ): + should_stop = False + while True: + # Check if subject has been invalidated + invalid_subjects = self.__invalid_subjects() + if subject in invalid_subjects: + break + + # Filter out invalid subjects + working_report = report[~report.index.isin(invalid_subjects)].copy() + + stage, done = self.determine_next_stage(subject, working_report) + + if done: + break + + try: + working_report, successful = self.run_stage( + stage, subject, working_report, pbar + ) + except Exception: + # TODO: The superclass could be in charge of catching the error, reporting it and cleaning up + # and raise the exception again to be caught here + working_report = self.__report_unhandled_exception( + stage, subject, working_report + ) + print(traceback.format_exc()) + successful = False + + report.update(working_report) + write_report(report, report_path) + + if not successful: + # Send back a signal that a dset stage failed + if isinstance(stage, DatasetStage): + should_stop = True + break + + return report, should_stop + + def run_stage(self, stage, subject, report, pbar): + successful = False + if isinstance(stage, RowStage): + pbar.set_description(f"{subject} | {stage.name}") + report, successful = stage.execute(subject, report) + elif isinstance(stage, DatasetStage): + pbar.set_description(f"{stage.name}") + report, successful = stage.execute(report) + + + return report, successful + + def __report_unhandled_exception( + self, + stage: Stage, + subject: Union[int, str], + report: DataFrame, + ): + # Assign a special status code for unhandled errors, associated + # to the stage status code + status_code = -stage.status_code - 0.101 + name = f"{stage.name.upper().replace(' ', '_')}_UNHANDLED_ERROR" + comment = traceback.format_exc() + data_path = report.loc[subject]["data_path"] + labels_path = report.loc[subject]["labels_path"] + + body = { + "status": status_code, + "status_name": name, + "comment": comment, + "data_path": data_path, + "labels_path": labels_path, + } + + report.loc[subject] = body + + return report diff --git a/mlcubes/data_preparation/project/stages/row_stage.py b/mlcubes/data_preparation/project/stages/row_stage.py new file mode 100644 index 00000000..70701beb --- /dev/null +++ b/mlcubes/data_preparation/project/stages/row_stage.py @@ -0,0 +1,34 @@ +from abc import ABC, abstractmethod +from typing import Union, Tuple +import pandas as pd + +from .stage import Stage + + +class RowStage(Stage, ABC): + @abstractmethod + def could_run(self, index: Union[str, int], report: pd.DataFrame) -> bool: + """Establishes if this step could be executed for the given case + + Args: + index (Union[str, int]): case index in the report + report (pd.DataFrame): Dataframe containing the current state of the preparation flow + + Returns: + bool: wether this stage could be executed + """ + + @abstractmethod + def execute( + self, index: Union[str, int], report: pd.DataFrame + ) -> Tuple[pd.DataFrame, bool]: + """Executes the stage on the given case + + Args: + index (Union[str, int]): case index in the report + report (pd.DataFrame): DataFrame containing the current state of the preparation flow + + Returns: + pd.DataFrame: Updated report dataframe + bool: Success status + """ diff --git a/mlcubes/data_preparation/project/stages/split.py b/mlcubes/data_preparation/project/stages/split.py new file mode 100644 index 00000000..7580b06b --- /dev/null +++ b/mlcubes/data_preparation/project/stages/split.py @@ -0,0 +1,101 @@ +import os +import yaml +import pandas as pd +from typing import List +import math + +from .dset_stage import DatasetStage +from .utils import get_id_tp, cleanup_storage +from .mlcube_constants import DONE_STAGE_STATUS + + +def row_to_path(row: pd.Series) -> str: + id = row["SubjectID"] + tp = row["Timepoint"] + return os.path.join(id, tp) + + +class SplitStage(DatasetStage): + def __init__( + self, + params: str, + data_path: str, + labels_path: str, + staging_folders: List[str], + ): + self.params = params + self.data_path = data_path + self.labels_path = labels_path + self.split_csv_path = os.path.join(data_path, "splits.csv") + self.train_csv_path = os.path.join(data_path, "train.csv") + self.val_csv_path = os.path.join(data_path, "val.csv") + self.staging_folders = staging_folders + + @property + def name(self) -> str: + return "Generate splits" + + @property + def status_code(self) -> int: + return DONE_STAGE_STATUS + + def could_run(self, report: pd.DataFrame) -> bool: + split_exists = os.path.exists(self.split_csv_path) + if split_exists: + # This stage already ran + return False + + for index in report.index: + id, tp = get_id_tp(index) + case_data_path = os.path.join(self.data_path, id, tp) + case_labels_path = os.path.join(self.labels_path, id, tp) + data_exists = os.path.exists(case_data_path) + labels_exist = os.path.exists(case_labels_path) + + if not data_exists or not labels_exist: + # Some subjects are not ready + return False + + return True + + def __report_success(self, report: pd.DataFrame) -> pd.DataFrame: + report["status"] = self.status_code + + return report + + def execute(self, report: pd.DataFrame) -> pd.DataFrame: + with open(self.params, "r") as f: + params = yaml.safe_load(f) + + seed = params["seed"] + train_pct = params["train_percent"] + + split_df = report.copy(deep=True) + split_df["SubjectID"] = split_df.index.str.split("|").str[0] + split_df["Timepoint"] = split_df.index.str.split("|").str[1] + split_df = split_df[["SubjectID", "Timepoint"]].reset_index(drop=True) + subjects = split_df["SubjectID"].drop_duplicates() + subjects = subjects.sample(frac=1, random_state=seed) + train_size = math.floor(len(subjects) * train_pct) + + train_subjects = subjects.iloc[:train_size] + val_subjects = subjects.iloc[train_size:] + + train_mask = split_df["SubjectID"].isin(train_subjects) + val_mask = split_df["SubjectID"].isin(val_subjects) + + split_df.loc[train_mask, "Split"] = "Train" + split_df.loc[val_mask, "Split"] = "Val" + + split_df.to_csv(self.split_csv_path, index=False) + + # Generate separate splits files with relative path + split_df["path"] = split_df.apply(row_to_path, axis=1) + + split_df.loc[train_mask].to_csv(self.train_csv_path, index=False) + split_df.loc[val_mask].to_csv(self.val_csv_path, index=False) + + report = self.__report_success(report) + cleanup_storage(self.staging_folders) + + return report, True diff --git a/mlcubes/data_preparation/project/stages/stage.py b/mlcubes/data_preparation/project/stages/stage.py new file mode 100644 index 00000000..ac453bd6 --- /dev/null +++ b/mlcubes/data_preparation/project/stages/stage.py @@ -0,0 +1,5 @@ +from abc import ABC + +class Stage(ABC): + name: str + status_code: int \ No newline at end of file diff --git a/mlcubes/data_preparation/project/stages/utils.py b/mlcubes/data_preparation/project/stages/utils.py new file mode 100644 index 00000000..1e2a4c60 --- /dev/null +++ b/mlcubes/data_preparation/project/stages/utils.py @@ -0,0 +1,150 @@ +import os +import shutil +from tqdm import tqdm +from functools import reduce +from pathlib import Path +import hashlib + +# Taken from https://code.activestate.com/recipes/577879-create-a-nested-dictionary-from-oswalk/ +def get_directory_structure(rootdir): + """ + Creates a nested dictionary that represents the folder structure of rootdir + """ + dir = {} + rootdir = rootdir.rstrip(os.sep) + start = rootdir.rfind(os.sep) + 1 + for path, dirs, files in os.walk(rootdir): + folders = path[start:].split(os.sep) + subdir = dict.fromkeys(files) + parent = reduce(dict.get, folders[:-1], dir) + parent[folders[-1]] = subdir + return dir + + +def has_prepared_folder_structure(data_path, labels_path) -> bool: + data_struct = list(get_directory_structure(data_path).values())[0] + labels_struct = list(get_directory_structure(labels_path).values())[0] + + expected_data_files = ["brain_t1c.nii.gz", "brain_t1n.nii.gz", "brain_t2f.nii.gz", "brain_t2w.nii.gz"] + expected_labels_files = ["final_seg.nii.gz"] + + if "splits.csv" not in data_struct: + return False + + for id in data_struct.keys(): + if data_struct[id] is None: + # This is a file, ignore + continue + for tp in data_struct[id].keys(): + expected_subject_data_files = set(["_".join([id, tp, file]) for file in expected_data_files]) + expected_subject_labels_files = set(["_".join([id, tp, file]) for file in expected_labels_files]) + + found_data_files = set(data_struct[id][tp].keys()) + found_labels_files = set(labels_struct[id][tp].keys()) + + data_files_diff = len(expected_subject_data_files - found_data_files) + labels_files_diff = len(expected_subject_labels_files - found_labels_files) + if data_files_diff or labels_files_diff: + return False + + # Passed all checks + return True + + +def normalize_path(path: str) -> str: + """Remove mlcube-specific components from the given path + + Args: + path (str): mlcube path + + Returns: + str: normalized path + """ + # for this specific problem, we know that all paths start with `/mlcube_io*` + # and that this pattern won't change, shrink or grow. We can therefore write a + # simple, specific solution + if path.startswith("/mlcube_io"): + return path[12:] + + # In case the path has already been normalized + return path + +def unnormalize_path(path: str, parent: str) -> str: + """Add back mlcube-specific components to the given path + + Args: + path (str): normalized path + + Returns: + str: mlcube-specific path + """ + if path.startswith(os.path.sep): + path = path[1:] + return os.path.join(parent, path) + +def update_row_with_dict(df, d, idx): + for key in d.keys(): + df.loc[idx, key] = d.get(key) + + +def get_id_tp(index: str): + return index.split("|") + + +def set_files_read_only(path): + for root, dirs, files in os.walk(path): + for file_name in files: + file_path = os.path.join(root, file_name) + os.chmod(file_path, 0o444) # Set read-only permission for files + + for dir_name in dirs: + dir_path = os.path.join(root, dir_name) + set_files_read_only( + dir_path + ) # Recursively call the function for subdirectories + + +def cleanup_storage(remove_folders): + for folder in remove_folders: + shutil.rmtree(folder, ignore_errors=True) + + +def copy_files(src_dir, dest_dir): + # Ensure the destination directory exists + os.makedirs(dest_dir, exist_ok=True) + + # Iterate through the files in the source directory + for filename in os.listdir(src_dir): + src_file = os.path.join(src_dir, filename) + dest_file = os.path.join(dest_dir, filename) + + # Check if the item is a file (not a directory) + if os.path.isfile(src_file): + shutil.copy2(src_file, dest_file) # Copy the file + + +# Taken from https://stackoverflow.com/questions/24937495/how-can-i-calculate-a-hash-for-a-filesystem-directory-using-python +def md5_update_from_dir(directory, hash): + assert Path(directory).is_dir() + for path in sorted(Path(directory).iterdir(), key=lambda p: str(p).lower()): + hash.update(path.name.encode()) + if path.is_file(): + with open(path, "rb") as f: + for chunk in iter(lambda: f.read(4096), b""): + hash.update(chunk) + elif path.is_dir(): + hash = md5_update_from_dir(path, hash) + return hash + + +def md5_dir(directory): + return md5_update_from_dir(directory, hashlib.md5()).hexdigest() + + +def md5_file(filepath): + return hashlib.md5(open(filepath,'rb').read()).hexdigest() + + +class MockTqdm(tqdm): + def __getattr__(self, attr): + return lambda *args, **kwargs: None diff --git a/mlcubes/data_preparation/project/statistics.py b/mlcubes/data_preparation/project/statistics.py new file mode 100644 index 00000000..d31ea02b --- /dev/null +++ b/mlcubes/data_preparation/project/statistics.py @@ -0,0 +1,53 @@ +import os +import yaml +import argparse +import pandas as pd + + +if __name__ == "__main__": + parser = argparse.ArgumentParser("MedPerf Statistics Example") + parser.add_argument( + "--data_path", + dest="data", + type=str, + help="directory containing the prepared data", + ) + parser.add_argument( + "--labels_path", + dest="labels", + ) + parser.add_argument( + "--out_file", dest="out_file", type=str, help="file to store statistics" + ) + parser.add_argument( + "--metadata_path", + dest="metadata_path", + type=str, + help="path to the local metadata folder", + ) + + args = parser.parse_args() + + splits_path = os.path.join(args.data, "splits.csv") + invalid_path = os.path.join(args.metadata_path, ".invalid.txt") + + invalid_subjects = [] + if os.path.exists(invalid_path): + with open(invalid_path, "r") as f: + invalid_subjects = f.readlines() + + splits_df = pd.read_csv(splits_path) + + num_train_subjects = len(splits_df[splits_df["Split"] == "Train"]) + num_val_subjects = len(splits_df[splits_df["Split"] == "Val"]) + + num_invalid_subjects = len(invalid_subjects) + + stats = { + "num_train_subjects": num_train_subjects, + "num_val_subjects": num_val_subjects, + "num_invalid_subjects": num_invalid_subjects + } + + with open(args.out_file, "w") as f: + yaml.dump(stats, f) diff --git a/scripts/linux-makeself b/scripts/linux-makeself index d1a95447..dc62fcc5 100644 --- a/scripts/linux-makeself +++ b/scripts/linux-makeself @@ -122,44 +122,54 @@ install_captk () { echo "Extracting binaries..." cd ${target_dir} ./FeTS/${ver}/fets --appimage-extract - ## openfl installation - cd ${target_dir}/squashfs-root/usr/bin/OpenFederatedLearning - doc_setup=https://fets-ai.github.io/Front-End/setup#set-up-the-environment - nnunet_link=https://upenn.box.com/shared/static/f7zt19d08c545qt3tcaeg7b37z6qafum.zip - max=8 - ver_max=`python3 -c 'import sys; print(sys.version_info.minor)'` - min=5 - ver_min=`python3 -c 'import sys; print(sys.version_info.micro)'` - if [ ("$ver_max" -gt "$max") || ("$ver_min" -lt "$min") ]; then - echo "Python version >3.8 detected, please change default version to either 3.6.5, 3.7 or 3.8" + # ## FeTS_Tool_Helper scripts installation + echo "Installing FeTS_Tool_Helper scripts..." + cd ${target_dir}/squashfs-root/usr/bin/ + python3 -m venv venv + if venv/bin/pip install -e .; then + echo "FeTS_Tool_Helper scripts installed successfully!" + else + echo "FeTS_Tool_Helper scripts installation failed! Please contact admin@fets.ai" exit 1 fi - - echo "Setting up the Python environment for OpenFederatedLearning..." - if make install_openfl ; then - ./venv/bin/pip install torch==1.7.1+cu92 torchvision==0.8.2+cu92 torchaudio==0.7.2 -f https://download.pytorch.org/whl/torch_stable.html - make install_fets - echo "Installing BrainMaGe..." - ./venv/bin/pip install ../BrainMaGe - - cd ../LabelFusion - echo "Setting up the Python environment for LabelFusion (this needs SimpleITK==2.0.2, but GaNDLF needs 1.2.4)..." - python3 -m venv venv - venv/bin/pip install -e . - - echo "Downloading weights for nnUNet..." - cd ${target_dir}/squashfs-root/usr/data/fets - wget ${nnunet_link} -O nnunet.zip - if unzip -qq nnunet.zip ; then - rm -rf nnunet.zip - fi - - cd .. - echo "Downloading FeTS Initiative final consensus model weights..." - wget https://upenn.box.com/shared/static/hhvn8nb9xtz6nxcilmdl8kbx9n1afkdu.zip -O ./fets_consensus_models.zip - if unzip -qq fets_consensus_models.zip ; then - rm -rf fets_consensus_models.zip - fi + # ## openfl installation + # cd ${target_dir}/squashfs-root/usr/bin/OpenFederatedLearning + # doc_setup=https://fets-ai.github.io/Front-End/setup#set-up-the-environment + # nnunet_link=https://upenn.box.com/shared/static/f7zt19d08c545qt3tcaeg7b37z6qafum.zip + # max=8 + # ver_max=`python3 -c 'import sys; print(sys.version_info.minor)'` + # min=5 + # ver_min=`python3 -c 'import sys; print(sys.version_info.micro)'` + # if [ ("$ver_max" -gt "$max") || ("$ver_min" -lt "$min") ]; then + # echo "Python version >3.8 detected, please change default version to either 3.6.5, 3.7 or 3.8" + # exit 1 + # fi + + # echo "Setting up the Python environment for OpenFederatedLearning..." + # if make install_openfl ; then + # ./venv/bin/pip install torch==1.7.1+cu92 torchvision==0.8.2+cu92 torchaudio==0.7.2 -f https://download.pytorch.org/whl/torch_stable.html + # make install_fets + # echo "Installing BrainMaGe..." + # ./venv/bin/pip install ../BrainMaGe + + # cd ../LabelFusion + # echo "Setting up the Python environment for LabelFusion (this needs SimpleITK==2.0.2, but GaNDLF needs 1.2.4)..." + # python3 -m venv venv + # venv/bin/pip install -e . + + # echo "Downloading weights for nnUNet..." + # cd ${target_dir}/squashfs-root/usr/data/fets + # wget ${nnunet_link} -O nnunet.zip + # if unzip -qq nnunet.zip ; then + # rm -rf nnunet.zip + # fi + + # cd .. + # echo "Downloading FeTS Initiative final consensus model weights..." + # wget https://upenn.box.com/shared/static/hhvn8nb9xtz6nxcilmdl8kbx9n1afkdu.zip -O ./fets_consensus_models.zip + # if unzip -qq fets_consensus_models.zip ; then + # rm -rf fets_consensus_models.zip + # fi fi ## display messages echo "fets_root_dir :: \"${target_dir}/squashfs-root/usr\"" diff --git a/src/applications/BraTSPipeline.cxx b/src/applications/BraTSPipeline.cxx index ce9096a9..5a49fce3 100644 --- a/src/applications/BraTSPipeline.cxx +++ b/src/applications/BraTSPipeline.cxx @@ -22,8 +22,10 @@ int main(int argc, char** argv) parser.addRequiredParameter("t2", "t2Image", cbica::Parameter::STRING, "Input Image (DICOM or NIfTI)", "Input structural T2-weighted contrast image"); parser.addRequiredParameter("fl", "flImage", cbica::Parameter::STRING, "Input Image (DICOM or NIfTI)", "Input structural FLAIR contrast image"); parser.addRequiredParameter("o", "outputDir", cbica::Parameter::DIRECTORY, "Directory", "Output directory for final output"); - parser.addOptionalParameter("s", "skullStrip", cbica::Parameter::BOOLEAN, "0 or 1", "Flag whether to skull strip or not", "Defaults to 1", "This uses BrainMaGe [https://github.com/CBICA/BrainMaGe/] and", "falls back to DeepMedic [https://cbica.github.io/CaPTk/seg_DL.html]"); - parser.addOptionalParameter("b", "brainTumor", cbica::Parameter::BOOLEAN, "0 or 1", "Flag whether to segment brain tumors or not", "Defaults to 0", "This uses DeepMedic: https://cbica.github.io/CaPTk/seg_DL.html"); + parser.addOptionalParameter("s", "skullStrip", cbica::Parameter::BOOLEAN, "0 or 1", "Flag whether to skull strip", "present only for compatibility purposes"); + parser.addOptionalParameter("b", "brainTumor", cbica::Parameter::BOOLEAN, "0 or 1", "Flag for brain tumor segmentation", "present only for compatibility purposes"); + // parser.addOptionalParameter("s", "skullStrip", cbica::Parameter::BOOLEAN, "0 or 1", "Flag whether to skull strip or not", "Defaults to 0", "This uses BrainMaGe [https://github.com/CBICA/BrainMaGe/] and", "falls back to DeepMedic [https://cbica.github.io/CaPTk/seg_DL.html]"); + // parser.addOptionalParameter("b", "brainTumor", cbica::Parameter::BOOLEAN, "0 or 1", "Flag whether to segment brain tumors or not", "Defaults to 0", "This uses DeepMedic: https://cbica.github.io/CaPTk/seg_DL.html"); parser.addOptionalParameter("d", "debug", cbica::Parameter::BOOLEAN, "0 or 1", "Print debugging information", "Defaults to 1"); parser.addOptionalParameter("i", "interFiles", cbica::Parameter::BOOLEAN, "0 or 1", "Save intermediate files", "Defaults to 1"); @@ -36,7 +38,7 @@ int main(int argc, char** argv) std::string outputDir; - bool debug = true, intermediateFiles = true, skullStrip = true, brainTumor = false; + bool debug = true, intermediateFiles = true, skullStrip = false, brainTumor = false; parser.getParameterValue("t1c", inputFiles["T1CE"]); parser.getParameterValue("t1", inputFiles["T1"]); @@ -46,14 +48,14 @@ int main(int argc, char** argv) cbica::createDir(outputDir); - if (parser.isPresent("s")) - { - parser.getParameterValue("s", skullStrip); - } - if (parser.isPresent("b")) - { - parser.getParameterValue("b", brainTumor); - } + // if (parser.isPresent("s")) + // { + // parser.getParameterValue("s", skullStrip); + // } + // if (parser.isPresent("b")) + // { + // parser.getParameterValue("b", brainTumor); + // } if (parser.isPresent("d")) { parser.getParameterValue("d", debug); @@ -153,7 +155,11 @@ int main(int argc, char** argv) return EXIT_FAILURE; } - auto tempOutput = cbica::createTemporaryDirectory(); + auto tempOutput = cbica::getEnvironmentVariableValue("OUTPUT_DIR_FROM_MEDPERF"); + if (tempOutput == "") + { + tempOutput = cbica::createTemporaryDirectory(); + } //construct command std::string fullCommandToRun = cbica::normPath(m_exe) + " -o " + cbica::normPath(tempOutput) + " -z y \"" + cbica::normPath(dicomFolderPath) + "\""; diff --git a/src/applications/CMakeLists.txt b/src/applications/CMakeLists.txt index f1d1a71c..f2efd43c 100644 --- a/src/applications/CMakeLists.txt +++ b/src/applications/CMakeLists.txt @@ -54,287 +54,288 @@ ELSE() SET( PLATFORM_STRING "linux" ) ENDIF() -# SET( FILENAME_TO_EXTRACT "binaries_${PLATFORM_STRING}") -# # SET( DOWNLOAD_LINK "ftp://www.nitrc.org/home/groups/captk/downloads/precompiledApps/${PLATFORM_STRING}.zip" ) -# SET( DOWNLOAD_LINK "https://github.com/CBICA/CaPTk/raw/master/binaries/precompiledApps/${PLATFORM_STRING}.zip" ) -# SET( LFS_FILE_TO_CHECK "${PROJECT_SOURCE_DIR}/binaries/precompiledApps/${PLATFORM_STRING}.zip" ) +SET( FILENAME_TO_EXTRACT "binaries_${PLATFORM_STRING}") +# SET( DOWNLOAD_LINK "ftp://www.nitrc.org/home/groups/captk/downloads/precompiledApps/${PLATFORM_STRING}.zip" ) +SET( DOWNLOAD_LINK "https://github.com/CBICA/CaPTk/raw/master/binaries/precompiledApps/${PLATFORM_STRING}.zip" ) +SET( LFS_FILE_TO_CHECK "${PROJECT_SOURCE_DIR}/binaries/precompiledApps/${PLATFORM_STRING}.zip" ) -# SET( FILE_TO_EXTRACT ${PROJECT_BINARY_DIR}/${FILENAME_TO_EXTRACT}.zip) +SET( FILE_TO_EXTRACT ${PROJECT_BINARY_DIR}/${FILENAME_TO_EXTRACT}.zip) -# # # putting this condition so that the extraction doesn't happen every single time -# # IF( NOT EXISTS "${DOWNLOADED_APPS_DIR}" ) +# # putting this condition so that the extraction doesn't happen every single time +# IF( NOT EXISTS "${DOWNLOADED_APPS_DIR}" ) -# # IF( NOT EXISTS "${FILE_TO_EXTRACT}" ) +# IF( NOT EXISTS "${FILE_TO_EXTRACT}" ) -# # # copy from LFS folder -# # IF( EXISTS ${LFS_FILE_TO_CHECK} ) -# # CONFIGURE_FILE( ${LFS_FILE_TO_CHECK} ${FILE_TO_EXTRACT} ) -# # ENDIF() +# # copy from LFS folder +# IF( EXISTS ${LFS_FILE_TO_CHECK} ) +# CONFIGURE_FILE( ${LFS_FILE_TO_CHECK} ${FILE_TO_EXTRACT} ) +# ENDIF() -# # # do not re-download if the LFS fetch worked -# # IF(NOT EXISTS ${FILE_TO_EXTRACT}) -# # # download exe from url -# # MESSAGE( STATUS "Downloading pre-compiled external applications" ) -# # FILE(DOWNLOAD "${DOWNLOAD_LINK}" "${FILE_TO_EXTRACT}" TIMEOUT 1000000 STATUS STATUS_CODE SHOW_PROGRESS) -# # IF(NOT STATUS_CODE EQUAL 0) -# # MESSAGE(FATAL_ERROR "Failed to download Precompiled packages. Status=${STATUS_CODE}") -# # ENDIF() -# # ENDIF() -# # ENDIF() +# # do not re-download if the LFS fetch worked +# IF(NOT EXISTS ${FILE_TO_EXTRACT}) +# # download exe from url +# MESSAGE( STATUS "Downloading pre-compiled external applications" ) +# FILE(DOWNLOAD "${DOWNLOAD_LINK}" "${FILE_TO_EXTRACT}" TIMEOUT 1000000 STATUS STATUS_CODE SHOW_PROGRESS) +# IF(NOT STATUS_CODE EQUAL 0) +# MESSAGE(FATAL_ERROR "Failed to download Precompiled packages. Status=${STATUS_CODE}") +# ENDIF() +# ENDIF() +# ENDIF() -# # # FILE(MAKE_DIRECTORY ${DOWNLOADED_APPS_DIR}) +# # FILE(MAKE_DIRECTORY ${DOWNLOADED_APPS_DIR}) -# # MESSAGE( STATUS "Extracting pre-compiled external applications" ) -# # IF( EXISTS "${FILE_TO_EXTRACT}" ) +# MESSAGE( STATUS "Extracting pre-compiled external applications" ) +# IF( EXISTS "${FILE_TO_EXTRACT}" ) -# # EXECUTE_PROCESS( COMMAND ${CMAKE_COMMAND} -E tar xfz ${FILE_TO_EXTRACT} -# # WORKING_DIRECTORY ${DOWNLOADED_APPS_DIR} -# # RESULT_VARIABLE RESULT_CODE -# # ) +# EXECUTE_PROCESS( COMMAND ${CMAKE_COMMAND} -E tar xfz ${FILE_TO_EXTRACT} +# WORKING_DIRECTORY ${DOWNLOADED_APPS_DIR} +# RESULT_VARIABLE RESULT_CODE +# ) -# # IF(NOT RESULT_CODE EQUAL 0) -# # MESSAGE( WARNING "Extracting the pre-compiled applications failed" ) -# # ENDIF() +# IF(NOT RESULT_CODE EQUAL 0) +# MESSAGE( WARNING "Extracting the pre-compiled applications failed" ) +# ENDIF() -# # ENDIF() -# # ENDIF() +# ENDIF() +# ENDIF() -# SUBDIRLIST(EXTERNAL_APPS ${DOWNLOADED_APPS_DIR}/${FILENAME_TO_EXTRACT}/) +SUBDIRLIST(EXTERNAL_APPS ${DOWNLOADED_APPS_DIR}/${FILENAME_TO_EXTRACT}/) -# # copy the prepared app into the appropriate folder -# # FOREACH(external_app ${EXTERNAL_APPS}) +# copy the prepared app into the appropriate folder +# FOREACH(external_app ${EXTERNAL_APPS}) -# # IF( NOT EXISTS "${SOURCE_APPLICATIONS_PATH}/${external_app}" ) +# IF( NOT EXISTS "${SOURCE_APPLICATIONS_PATH}/${external_app}" ) -# # FILE(MAKE_DIRECTORY ${SOURCE_APPLICATIONS_PATH}/${external_app}) +# FILE(MAKE_DIRECTORY ${SOURCE_APPLICATIONS_PATH}/${external_app}) -# # EXECUTE_PROCESS( COMMAND ${CMAKE_COMMAND} -E copy_directory -# # ${DOWNLOADED_APPS_DIR}/${FILENAME_TO_EXTRACT}/${external_app} ${SOURCE_APPLICATIONS_PATH}/${external_app} -# # ) +# EXECUTE_PROCESS( COMMAND ${CMAKE_COMMAND} -E copy_directory +# ${DOWNLOADED_APPS_DIR}/${FILENAME_TO_EXTRACT}/${external_app} ${SOURCE_APPLICATIONS_PATH}/${external_app} +# ) -# # ENDIF() - -# # ENDFOREACH() - -# SUBDIRLIST(SUBDIRECTORIES ${SOURCE_APPLICATIONS_PATH}) -# INCLUDE(${CMAKE_ROOT}/Modules/ExternalProject.cmake) - -# SET(BUILD_DEEPMEDIC false CACHE BOOL "disable deepmedic") - -# IF (BUILD_DEEPMEDIC) - -# SET( CAPTK_APP_LIST_PY_GUI "${CAPTK_APP_LIST_PY_GUI} deepmedic" CACHE STRING "Available stand-alone apps" FORCE) -# ### this should be removed and put in with application decoupling -# ## this is a TEMPORARY work-around until https://github.com/CBICA/CaPTk/pull/1043 is merged -# SET( DOWNLOAD_LINK "ftp://www.nitrc.org/home/groups/captk/downloads/deepMedicInference_${PLATFORM_STRING}.zip" ) -# SET( FILENAME_TO_EXTRACT "deepMedicInference_${PLATFORM_STRING}") -# SET( FILE_TO_EXTRACT ${PROJECT_BINARY_DIR}/${FILENAME_TO_EXTRACT}.zip) - -# SET( DOWNLOADED_APPS_DIR ${PROJECT_BINARY_DIR}/deepMedicInference ) - -# IF( NOT EXISTS "${DOWNLOADED_APPS_DIR}" ) - -# IF( NOT EXISTS "${FILE_TO_EXTRACT}" ) - -# # do not re-download if the LFS fetch worked -# IF(NOT EXISTS ${FILE_TO_EXTRACT}) -# # download exe from url -# MESSAGE( STATUS "Downloading pre-compiled deepMedicInference" ) -# FILE(DOWNLOAD "${DOWNLOAD_LINK}" "${FILE_TO_EXTRACT}" TIMEOUT 1000000 STATUS STATUS_CODE SHOW_PROGRESS) -# IF(NOT STATUS_CODE EQUAL 0) -# MESSAGE(FATAL_ERROR "Failed to download Precompiled deepMedicInference. Status=${STATUS_CODE}") -# ENDIF() -# ENDIF() -# ENDIF() - -# FILE(MAKE_DIRECTORY ${DOWNLOADED_APPS_DIR}) - -# MESSAGE( STATUS "Extracting pre-compiled deepMedicInference" ) -# IF( EXISTS "${FILE_TO_EXTRACT}" ) - -# EXECUTE_PROCESS( COMMAND ${CMAKE_COMMAND} -E tar xfz ${FILE_TO_EXTRACT} -# WORKING_DIRECTORY ${DOWNLOADED_APPS_DIR} -# RESULT_VARIABLE RESULT_CODE -# ) - -# IF(NOT RESULT_CODE EQUAL 0) -# MESSAGE( WARNING "Extracting the pre-compiled deepMedicInference failed" ) -# ENDIF() - -# ENDIF() # ENDIF() -# FILE(GLOB DMInferenceEXE "${DOWNLOADED_APPS_DIR}/*") +# ENDFOREACH() -# IF(APPLE) -# INSTALL(FILES ${DMInferenceEXE} -# DESTINATION ${EXE_NAME}.app/Contents/Resources/bin -# PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE -# ) -# ELSE() -# INSTALL(FILES ${DMInferenceEXE} -# DESTINATION bin -# PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE -# ) -# ENDIF() -# ENDIF() -# ### this should be removed and put in with application decoupling +SUBDIRLIST(SUBDIRECTORIES ${SOURCE_APPLICATIONS_PATH}) +INCLUDE(${CMAKE_ROOT}/Modules/ExternalProject.cmake) + +IF (BUILD_DEEPMEDIC) + + SET( CAPTK_APP_LIST_PY_GUI "${CAPTK_APP_LIST_PY_GUI} deepmedic" CACHE STRING "Available stand-alone apps" FORCE) + ### this should be removed and put in with application decoupling + ## this is a TEMPORARY work-around until https://github.com/CBICA/CaPTk/pull/1043 is merged + #SET( DOWNLOAD_LINK "ftp://www.nitrc.org/home/groups/captk/downloads/deepMedicInference_${PLATFORM_STRING}.zip" ) + SET( DOWNLOAD_LINK "https://captk.projects.nitrc.org/deepMedicInference_${PLATFORM_STRING}.zip" ) + SET( FILENAME_TO_EXTRACT "deepMedicInference_${PLATFORM_STRING}") + SET( FILE_TO_EXTRACT ${PROJECT_BINARY_DIR}/${FILENAME_TO_EXTRACT}.zip) + + SET( DOWNLOADED_APPS_DIR ${PROJECT_BINARY_DIR}/deepMedicInference ) + + IF( NOT EXISTS "${DOWNLOADED_APPS_DIR}" ) + + IF( NOT EXISTS "${FILE_TO_EXTRACT}" ) + + # do not re-download if the LFS fetch worked + IF(NOT EXISTS ${FILE_TO_EXTRACT}) + # download exe from url + MESSAGE( STATUS "Downloading pre-compiled deepMedicInference" ) + FILE(DOWNLOAD "${DOWNLOAD_LINK}" "${FILE_TO_EXTRACT}" TIMEOUT 1000000 STATUS STATUS_CODE SHOW_PROGRESS) + IF(NOT STATUS_CODE EQUAL 0) + MESSAGE(FATAL_ERROR "Failed to download Precompiled deepMedicInference. Status=${STATUS_CODE}") + ENDIF() + ENDIF() + ENDIF() + + FILE(MAKE_DIRECTORY ${DOWNLOADED_APPS_DIR}) + + MESSAGE( STATUS "Extracting pre-compiled deepMedicInference" ) + IF( EXISTS "${FILE_TO_EXTRACT}" ) + + EXECUTE_PROCESS( COMMAND ${CMAKE_COMMAND} -E tar xfz ${FILE_TO_EXTRACT} + WORKING_DIRECTORY ${DOWNLOADED_APPS_DIR} + RESULT_VARIABLE RESULT_CODE + ) + + IF(NOT RESULT_CODE EQUAL 0) + MESSAGE( WARNING "Extracting the pre-compiled deepMedicInference failed" ) + ENDIF() + + ENDIF() + ENDIF() + + FILE(GLOB DMInferenceEXE "${DOWNLOADED_APPS_DIR}/*") + + IF(APPLE) + INSTALL(FILES ${DMInferenceEXE} + DESTINATION ${EXE_NAME}.app/Contents/Resources/bin + PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE + ) + ELSE() + INSTALL(FILES ${DMInferenceEXE} + DESTINATION bin + PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE + ) + ENDIF() +ENDIF() +### this should be removed and put in with application decoupling -# FOREACH(subdir ${SUBDIRECTORIES}) +FOREACH(subdir ${SUBDIRECTORIES}) -# SET(SUBDIRPATH "${SOURCE_APPLICATIONS_PATH}/${subdir}") + SET(SUBDIRPATH "${SOURCE_APPLICATIONS_PATH}/${subdir}") -# IF (BUILD_DEEPMEDIC) - -# SET( CAPTK_APP_LIST_PY_GUI "${CAPTK_APP_LIST_PY_GUI} deepmedic" CACHE STRING "Available stand-alone apps" FORCE) -# ### this should be removed and put in with application decoupling -# ## this is a TEMPORARY work-around until https://github.com/CBICA/CaPTk/pull/1043 is merged -# SET( DOWNLOAD_LINK "ftp://www.nitrc.org/home/groups/captk/downloads/deepMedicInference_${PLATFORM_STRING}.zip" ) -# SET( FILENAME_TO_EXTRACT "deepMedicInference_${PLATFORM_STRING}") -# SET( FILE_TO_EXTRACT ${PROJECT_BINARY_DIR}/${FILENAME_TO_EXTRACT}.zip) + IF (BUILD_DEEPMEDIC) + + SET( CAPTK_APP_LIST_PY_GUI "${CAPTK_APP_LIST_PY_GUI} deepmedic" CACHE STRING "Available stand-alone apps" FORCE) + ### this should be removed and put in with application decoupling + ## this is a TEMPORARY work-around until https://github.com/CBICA/CaPTk/pull/1043 is merged + #SET( DOWNLOAD_LINK "ftp://www.nitrc.org/home/groups/captk/downloads/deepMedicInference_${PLATFORM_STRING}.zip" ) + SET( DOWNLOAD_LINK "https://captk.projects.nitrc.org/deepMedicInference_${PLATFORM_STRING}.zip" ) + SET( FILENAME_TO_EXTRACT "deepMedicInference_${PLATFORM_STRING}") + SET( FILE_TO_EXTRACT ${PROJECT_BINARY_DIR}/${FILENAME_TO_EXTRACT}.zip) -# SET( DOWNLOADED_APPS_DIR ${PROJECT_BINARY_DIR}/deepMedicInference ) + SET( DOWNLOADED_APPS_DIR ${PROJECT_BINARY_DIR}/deepMedicInference ) -# IF( NOT EXISTS "${DOWNLOADED_APPS_DIR}" ) + IF( NOT EXISTS "${DOWNLOADED_APPS_DIR}" ) -# IF( NOT EXISTS "${FILE_TO_EXTRACT}" ) + IF( NOT EXISTS "${FILE_TO_EXTRACT}" ) -# # do not re-download if the LFS fetch worked -# IF(NOT EXISTS ${FILE_TO_EXTRACT}) -# # download exe from url -# MESSAGE( STATUS "Downloading pre-compiled deepMedicInference" ) -# FILE(DOWNLOAD "${DOWNLOAD_LINK}" "${FILE_TO_EXTRACT}" TIMEOUT 1000000 STATUS STATUS_CODE SHOW_PROGRESS) -# IF(NOT STATUS_CODE EQUAL 0) -# MESSAGE(FATAL_ERROR "Failed to download Precompiled deepMedicInference. Status=${STATUS_CODE}") -# ENDIF() -# ENDIF() -# ENDIF() + # do not re-download if the LFS fetch worked + IF(NOT EXISTS ${FILE_TO_EXTRACT}) + # download exe from url + MESSAGE( STATUS "Downloading pre-compiled deepMedicInference" ) + FILE(DOWNLOAD "${DOWNLOAD_LINK}" "${FILE_TO_EXTRACT}" TIMEOUT 1000000 STATUS STATUS_CODE SHOW_PROGRESS) + IF(NOT STATUS_CODE EQUAL 0) + MESSAGE(FATAL_ERROR "Failed to download Precompiled deepMedicInference. Status=${STATUS_CODE}") + ENDIF() + ENDIF() + ENDIF() -# FILE(MAKE_DIRECTORY ${DOWNLOADED_APPS_DIR}) + FILE(MAKE_DIRECTORY ${DOWNLOADED_APPS_DIR}) -# MESSAGE( STATUS "Extracting pre-compiled deepMedicInference" ) -# IF( EXISTS "${FILE_TO_EXTRACT}" ) + MESSAGE( STATUS "Extracting pre-compiled deepMedicInference" ) + IF( EXISTS "${FILE_TO_EXTRACT}" ) -# EXECUTE_PROCESS( COMMAND ${CMAKE_COMMAND} -E tar xfz ${FILE_TO_EXTRACT} -# WORKING_DIRECTORY ${DOWNLOADED_APPS_DIR} -# RESULT_VARIABLE RESULT_CODE -# ) + EXECUTE_PROCESS( COMMAND ${CMAKE_COMMAND} -E tar xfz ${FILE_TO_EXTRACT} + WORKING_DIRECTORY ${DOWNLOADED_APPS_DIR} + RESULT_VARIABLE RESULT_CODE + ) -# IF(NOT RESULT_CODE EQUAL 0) -# MESSAGE( WARNING "Extracting the pre-compiled deepMedicInference failed" ) -# ENDIF() + IF(NOT RESULT_CODE EQUAL 0) + MESSAGE( WARNING "Extracting the pre-compiled deepMedicInference failed" ) + ENDIF() -# ENDIF() -# ENDIF() + ENDIF() + ENDIF() -# FILE(GLOB DMInferenceEXE "${DOWNLOADED_APPS_DIR}/*") + FILE(GLOB DMInferenceEXE "${DOWNLOADED_APPS_DIR}/*") -# IF(APPLE) -# INSTALL(FILES ${DMInferenceEXE} -# DESTINATION ${EXE_NAME}.app/Contents/Resources/bin -# PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE -# ) -# ELSE() -# INSTALL(FILES ${DMInferenceEXE} -# DESTINATION bin -# PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE -# ) -# ENDIF() -# ### this should be removed and put in with application decoupling - -# # IF(WIN32) -# # INSTALL(FILES ${SUBDIRPATH}/deepMedicRun.exe -# # DESTINATION bin -# # PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE -# # ) -# # ELSEIF(APPLE) -# # INSTALL(FILES ${SUBDIRPATH}/deepMedicRun -# # DESTINATION ${EXE_NAME}.app/Contents/Resources/bin -# # PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE -# # ) -# # ELSE() -# # INSTALL(FILES ${SUBDIRPATH}/deepMedicRun -# # DESTINATION bin -# # PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE -# # ) -# # ENDIF() - -# ENDIF() - -# ENDFOREACH() - -# #dcm2niix: Start -# #downloads to: [captk bin folder] -# #extracts to: [captk bin folder]/dcm2niix -# #copies to: [captk src folder]/src/applications/individualsApps/dcm2niix -# #installs to: [bin folder] - -# #make dir -# IF( NOT EXISTS "${SOURCE_APPLICATIONS_PATH}/dcm2niix" ) -# FILE(MAKE_DIRECTORY ${SOURCE_APPLICATIONS_PATH}/dcm2niix) -# ENDIF() - -# #specify download link -# SET( DOWNLOAD_LINK "ftp://www.nitrc.org/home/groups/captk/downloads/precompiledApps/dcm2niix_${PLATFORM_STRING}.zip" ) -# SET( FILENAME_TO_EXTRACT "dcm2niix_${PLATFORM_STRING}") -# SET( FILE_TO_EXTRACT ${PROJECT_BINARY_DIR}/${FILENAME_TO_EXTRACT}.zip) - -# SET( DOWNLOADED_APPS_DIR ${PROJECT_BINARY_DIR}/dcm2niix ) - -# IF( NOT EXISTS "${DOWNLOADED_APPS_DIR}" ) - -# IF( NOT EXISTS "${FILE_TO_EXTRACT}" ) - -# # do not re-download if file exists -# IF(NOT EXISTS ${FILE_TO_EXTRACT}) -# #Download from url -# MESSAGE( STATUS "Downloading pre-compiled dcm2niix" ) -# FILE(DOWNLOAD "${DOWNLOAD_LINK}" "${FILE_TO_EXTRACT}" TIMEOUT 1000000 STATUS STATUS_CODE SHOW_PROGRESS) -# IF(NOT STATUS_CODE EQUAL 0) -# MESSAGE(FATAL_ERROR "Failed to download Precompiled dcm2niix. Status=${STATUS_CODE}") -# ENDIF() -# ENDIF() -# ENDIF() - -# FILE(MAKE_DIRECTORY ${DOWNLOADED_APPS_DIR}) + IF(APPLE) + INSTALL(FILES ${DMInferenceEXE} + DESTINATION ${EXE_NAME}.app/Contents/Resources/bin + PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE + ) + ELSE() + INSTALL(FILES ${DMInferenceEXE} + DESTINATION bin + PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE + ) + ENDIF() + ### this should be removed and put in with application decoupling + + # IF(WIN32) + # INSTALL(FILES ${SUBDIRPATH}/deepMedicRun.exe + # DESTINATION bin + # PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE + # ) + # ELSEIF(APPLE) + # INSTALL(FILES ${SUBDIRPATH}/deepMedicRun + # DESTINATION ${EXE_NAME}.app/Contents/Resources/bin + # PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE + # ) + # ELSE() + # INSTALL(FILES ${SUBDIRPATH}/deepMedicRun + # DESTINATION bin + # PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE + # ) + # ENDIF() + + ENDIF() + +ENDFOREACH() + +#dcm2niix: Start +#downloads to: [captk bin folder] +#extracts to: [captk bin folder]/dcm2niix +#copies to: [captk src folder]/src/applications/individualsApps/dcm2niix +#installs to: [bin folder] + +#make dir +IF( NOT EXISTS "${SOURCE_APPLICATIONS_PATH}/dcm2niix" ) + FILE(MAKE_DIRECTORY ${SOURCE_APPLICATIONS_PATH}/dcm2niix) +ENDIF() -# #Extract -# MESSAGE( STATUS "Extracting pre-compiled dcm2niix" ) -# IF( EXISTS "${FILE_TO_EXTRACT}" ) -# EXECUTE_PROCESS( COMMAND ${CMAKE_COMMAND} -E tar xfz ${FILE_TO_EXTRACT} -# WORKING_DIRECTORY ${PROJECT_BINARY_DIR}/dcm2niix -# RESULT_VARIABLE RESULT_CODE) - -# IF(NOT RESULT_CODE EQUAL 0) #if extraction fails -# MESSAGE( WARNING "Extracting the pre-compiled dcm2niix failed" ) -# ELSE() # if extraction succeeds -# MESSAGE( STATUS "Extraction succeeded. Now copying to ${SOURCE_APPLICATIONS_PATH}/dcm2niix") -# FILE(GLOB allFiles ${PROJECT_BINARY_DIR}/dcm2niix/dcm2niix_${PLATFORM_STRING}/* ) -# #Copy -# FILE( COPY ${allFiles} -# DESTINATION ${SOURCE_APPLICATIONS_PATH}/dcm2niix ) -# ENDIF() -# ENDIF() -# ENDIF() +#specify download link +#SET( DOWNLOAD_LINK "ftp://www.nitrc.org/home/groups/captk/downloads/precompiledApps/dcm2niix_${PLATFORM_STRING}.zip" ) +SET( DOWNLOAD_LINK "https://captk.projects.nitrc.org/dcm2niix_${PLATFORM_STRING}.zip" ) +SET( FILENAME_TO_EXTRACT "dcm2niix_${PLATFORM_STRING}") +SET( FILE_TO_EXTRACT ${PROJECT_BINARY_DIR}/${FILENAME_TO_EXTRACT}.zip) + +SET( DOWNLOADED_APPS_DIR ${PROJECT_BINARY_DIR}/dcm2niix ) + +IF( NOT EXISTS "${DOWNLOADED_APPS_DIR}" ) + + IF( NOT EXISTS "${FILE_TO_EXTRACT}" ) + + # do not re-download if file exists + IF(NOT EXISTS ${FILE_TO_EXTRACT}) +#Download from url + MESSAGE( STATUS "Downloading pre-compiled dcm2niix" ) + FILE(DOWNLOAD "${DOWNLOAD_LINK}" "${FILE_TO_EXTRACT}" TIMEOUT 1000000 STATUS STATUS_CODE SHOW_PROGRESS) + IF(NOT STATUS_CODE EQUAL 0) + MESSAGE(FATAL_ERROR "Failed to download Precompiled dcm2niix. Status=${STATUS_CODE}") + ENDIF() + ENDIF() + ENDIF() + + FILE(MAKE_DIRECTORY ${DOWNLOADED_APPS_DIR}) + +#Extract + MESSAGE( STATUS "Extracting pre-compiled dcm2niix" ) + IF( EXISTS "${FILE_TO_EXTRACT}" ) + EXECUTE_PROCESS( COMMAND ${CMAKE_COMMAND} -E tar xfz ${FILE_TO_EXTRACT} + WORKING_DIRECTORY ${PROJECT_BINARY_DIR}/dcm2niix + RESULT_VARIABLE RESULT_CODE) + + IF(NOT RESULT_CODE EQUAL 0) #if extraction fails + MESSAGE( WARNING "Extracting the pre-compiled dcm2niix failed" ) + ELSE() # if extraction succeeds + MESSAGE( STATUS "Extraction succeeded. Now copying to ${SOURCE_APPLICATIONS_PATH}/dcm2niix") + FILE(GLOB allFiles ${PROJECT_BINARY_DIR}/dcm2niix/dcm2niix_${PLATFORM_STRING}/* ) +#Copy + FILE( COPY ${allFiles} + DESTINATION ${SOURCE_APPLICATIONS_PATH}/dcm2niix ) + ENDIF() + ENDIF() +ENDIF() -# #Install -# SET( DCM2NIIX_EXE "" ) -# IF (WIN32) -# FILE(GLOB allFiles ${SOURCE_APPLICATIONS_PATH}/dcm2niix/*.*) -# SET( DCM2NIIX_EXE ${allFiles} ) -# ELSE () -# SET( DCM2NIIX_EXE "${SOURCE_APPLICATIONS_PATH}/dcm2niix/dcm2niix" ) -# ENDIF() +#Install +SET( DCM2NIIX_EXE "" ) +IF (WIN32) + FILE(GLOB allFiles ${SOURCE_APPLICATIONS_PATH}/dcm2niix/*.*) + SET( DCM2NIIX_EXE ${allFiles} ) +ELSE () + SET( DCM2NIIX_EXE "${SOURCE_APPLICATIONS_PATH}/dcm2niix/dcm2niix" ) +ENDIF() -# IF (APPLE) -# SET(CAPTK_MACOSX_BUNDLE_FILES ${CAPTK_MACOSX_BUNDLE_FILES} ${DCM2NIIX_EXE}) -# ELSE() -# INSTALL(FILES ${DCM2NIIX_EXE} -# DESTINATION bin -# PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE -# ) -# ENDIF() -# #dcm2niix: End +IF (APPLE) + SET(CAPTK_MACOSX_BUNDLE_FILES ${CAPTK_MACOSX_BUNDLE_FILES} ${DCM2NIIX_EXE}) +ELSE() + INSTALL(FILES ${DCM2NIIX_EXE} + DESTINATION bin + PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE + ) +ENDIF() +#dcm2niix: End SET( CAPTK_APP_LIST ${CAPTK_APP_LIST} CACHE STRING "Available Code-level Applications" FORCE) SET( APPLICATION_SOURCES "${SOURCES}" CACHE STRING "Code-level Application sources" FORCE) diff --git a/src/applications/CreateCSVForDICOMs.py b/src/applications/CreateCSVForDICOMs.py new file mode 100644 index 00000000..aaf48a42 --- /dev/null +++ b/src/applications/CreateCSVForDICOMs.py @@ -0,0 +1,218 @@ +import os, argparse, sys, platform, posixpath, re +from pathlib import Path +from datetime import date +from tqdm import tqdm +import pandas as pd +import SimpleITK as sitk + +from .constants import MODALITY_ID_DICT + + +def verify_dicom_folder(dicom_folder: str) -> (bool, str): + """ + This function verifies that the folder is a valid DICOM folder. In the case of NIfTI file input, it will just verify if a 3D NIfTI is being passed in. + + Args: + dicom_folder (str): The path to the DICOM folder or NIfTI file. + + Returns: + bool: True if the folder is a valid DICOM folder or a 3D NIfTI file, False otherwise. + str: The path to the first DICOM file in the folder if the folder is a valid DICOM folder, the NIfTI file itself otherwise. + """ + + if os.path.isdir(dicom_folder): + series_IDs = sitk.ImageSeriesReader.GetGDCMSeriesIDs(dicom_folder) + if not series_IDs: + return False, None + + if len(series_IDs) > 1: + return False, None + + series_file_names = sitk.ImageSeriesReader.GetGDCMSeriesFileNames( + dicom_folder, series_IDs[0] + ) + series_reader = sitk.ImageSeriesReader() + series_reader.SetFileNames(series_file_names) + series_reader.MetaDataDictionaryArrayUpdateOn() + series_reader.LoadPrivateTagsOn() + image = series_reader.Execute() + else: + image = sitk.ReadImage(dicom_folder) + series_file_names = [dicom_folder] + + if image.GetDimension() != 3: + return False, None + + return True, series_file_names[0] + + +def setup_argparser(): + copyrightMessage = ( + "Contact: admin@fets.ai\n\n" + + "This program is NOT FDA/CE approved and NOT intended for clinical use.\nCopyright (c) " + + str(date.today().year) + + " University of Pennsylvania. All rights reserved." + ) + parser = argparse.ArgumentParser( + prog="CreateCSVForDICOMS", + formatter_class=argparse.RawTextHelpFormatter, + description="This application creates the CSV for the DICOM folder structure.\n\n" + + copyrightMessage, + ) + parser.add_argument( + "-inputDir", + type=str, + help="The absolute path to the input directory", + required=True, + ) + parser.add_argument( + "-outputCSV", + type=str, + help="The output csv file name", + required=True, + ) + + return parser.parse_args() + + +class CSVCreator: + def __init__(self, inputDir: str, outputCSV: str): + self.inputDir = inputDir + self.outputCSV = outputCSV + self.subject_timepoint_missing_modalities = [] + self.subject_timepoint_extra_modalities = [] + self.output_df_for_csv = pd.DataFrame( + columns=["SubjectID", "Timepoint", "T1", "T1GD", "T2", "FLAIR"] + ) + + def process_data(self): + for subject in tqdm(os.listdir(self.inputDir)): + self.process_row(subject) + + def process_row(self, subject): + inputDir = posixpath.normpath(self.inputDir) + current_subject_dir = posixpath.join(inputDir, subject) + + if not os.path.isdir(current_subject_dir): + return + + for timepoint in os.listdir(current_subject_dir): + self.process_timepoint(timepoint, subject, current_subject_dir) + + def process_timepoint(self, timepoint, subject, subject_dir): + timepoint_dir = posixpath.join(subject_dir, timepoint) + if not os.path.isdir(timepoint_dir): + return + + modality_folders = os.listdir(timepoint_dir) + # check if there are missing modalities + if len(modality_folders) < 4: + self.subject_timepoint_missing_modalities.append(subject + "_" + timepoint) + return + # check if there are extra modalities + if len(modality_folders) > 4: + self.subject_timepoint_extra_modalities.append(subject + "_" + timepoint) + return + + # goldilocks zone + detected_modalities = { + "T1": None, + "T1GD": None, + "T2": None, + "FLAIR": None, + } + for modality in modality_folders: + modality_path = posixpath.join(timepoint_dir, modality) + modality_lower = modality.lower() + modality_norm = re.sub('\.nii\.gz', '', modality_lower) + for modality_to_check in MODALITY_ID_DICT: + if detected_modalities[modality_to_check] is not None: + continue + + for modality_id in MODALITY_ID_DICT[modality_to_check]: + if modality_id != modality_norm: + continue + + valid_dicom, first_dicom_file = verify_dicom_folder(modality_path) + if valid_dicom: + detected_modalities[modality_to_check] = first_dicom_file + break + else: + self.subject_timepoint_missing_modalities.append( + subject + "_" + timepoint + "_" + modality + ) + + # check if any modalities are missing + modalities_missing = False + for modality in detected_modalities: + if detected_modalities[modality] is None: + modalities_missing = True + self.subject_timepoint_missing_modalities.append( + subject + "_" + timepoint + "_" + modality + ) + + if modalities_missing: + return + + # if no modalities are missing, then add to the output csv + dict_to_append = { + "SubjectID": subject, + "Timepoint": timepoint, + "T1": detected_modalities["T1"], + "T1GD": detected_modalities["T1GD"], + "T2": detected_modalities["T2"], + "FLAIR": detected_modalities["FLAIR"], + } + self.output_df_for_csv = pd.concat( + [ + self.output_df_for_csv, + pd.DataFrame( + [dict_to_append], + columns=[ + "SubjectID", + "Timepoint", + "T1", + "T1GD", + "T2", + "FLAIR", + ], + ), + ], + ) + + def write(self): + if self.output_df_for_csv.shape[0] > 0: + if not (self.outputCSV.endswith(".csv")): + self.outputCSV += ".csv" + self.output_df_for_csv.to_csv(self.outputCSV, index=False) + + +def main(inputDir: str, outputCSV: str): + inputDir = str(Path(inputDir).resolve()) + csv_creator = CSVCreator(inputDir, outputCSV) + csv_creator.process_data() + csv_creator.write() + + # print out the missing modalities + missing = csv_creator.subject_timepoint_missing_modalities + extra = csv_creator.subject_timepoint_extra_modalities + if len(missing) > 0: + print( + "WARNING: The following subject timepoints are missing modalities: ", + missing, + ) + if len(extra) > 0: + print( + "WARNING: The following subject timepoints have extra modalities: ", + extra, + ) + + print("Done!") + + +if __name__ == "__main__": + args = setup_argparser() + if platform.system().lower() == "darwin": + sys.exit("macOS is not supported") + else: + main(args.inputDir, args.outputCSV) diff --git a/src/applications/FeTS_CLI_Segment.cxx b/src/applications/FeTS_CLI_Segment.cxx deleted file mode 100644 index 0a76be49..00000000 --- a/src/applications/FeTS_CLI_Segment.cxx +++ /dev/null @@ -1,612 +0,0 @@ -#include - -#include "cbicaCmdParser.h" -#include "cbicaLogging.h" -#include "cbicaITKSafeImageIO.h" -#include "cbicaUtilities.h" -#include "cbicaITKUtilities.h" -#include "CaPTkGUIUtils.h" - -int runCollaboratorTraining(const std::string &fullCommandToRunWithArgs) -{ - auto returnCode = std::system(fullCommandToRunWithArgs.c_str()); - if (returnCode != 0) - { - if (returnCode == 154) - { - std::cout << "Special case, where the collaborator failing is expected, so automatically restarting.\n"; - return runCollaboratorTraining(fullCommandToRunWithArgs); - } - } -} - -int main(int argc, char** argv) -{ - cbica::CmdParser parser(argc, argv, "FeTS_CLI_Segment"); - - auto hardcodedNativeModelWeightPath = getCaPTkDataDir() + "/fets"; - auto allArchs = cbica::subdirectoriesInDirectory(hardcodedNativeModelWeightPath); - std::string allArchsString; - for (size_t i = 0; i < allArchs.size(); i++) - { - allArchsString += allArchs[i] + ","; - } - allArchsString.pop_back(); - allArchsString += ",fets_singlet,fets_triplet"; - - std::string dataDir, modelName, loggingDir, colName, archs = "fets_triplet", fusionMethod = "STAPLE", hardcodedPlanName = "fets_phase2_2"; - - parser.addRequiredParameter("d", "dataDir", cbica::Parameter::DIRECTORY, "Dir with Read/Write access", "Input data directory"); - parser.addOptionalParameter("L", "LoggingDir", cbica::Parameter::DIRECTORY, "Dir with write access", "Location of logging directory"); - parser.addOptionalParameter("a", "archs", cbica::Parameter::STRING, allArchsString, "The architecture(s) to infer on", "Only a single architecture is supported for training", "Comma-separated values for multiple options", "Defaults to: " + allArchsString); - parser.addOptionalParameter("lF", "labelFuse", cbica::Parameter::STRING, "STAPLE,ITKVoting,SIMPLE,MajorityVoting", "The label fusion strategy to follow for multi-arch inference", "Comma-separated values for multiple options", "Defaults to: " + fusionMethod); - parser.addOptionalParameter("g", "gpu", cbica::Parameter::BOOLEAN, "0-1", "Whether to run the process on GPU or not", "Defaults to '0'"); - // parser.addOptionalParameter("vp", "valPatch", cbica::Parameter::BOOLEAN, "0-1", "Whether to perform per-patch validation or not", "Used for training, defaults to '0'"); - - parser.addApplicationDescription("This is the CLI interface for FeTS"); - parser.addExampleUsage("-d /path/DataForFeTS -a deepMedic,nnUNet -lF STAPLE,ITKVoting,SIMPLE -g 1", "This command performs inference using deepMedic,nnUNet using multiple fusion strategies on GPU and saves in data directory"); - - bool gpuRequested = false, trainingRequested = false, patchValidation = true; - - parser.getParameterValue("d", dataDir); - - if (parser.isPresent("L")) - { - parser.getParameterValue("L", loggingDir); - } - else - { - loggingDir = dataDir + "/logs"; - std::cout << "Using the following directory as logging directory: " << loggingDir << "\n"; - cbica::createDir(loggingDir); - } - - if (trainingRequested) - { - if (parser.isPresent("c")) - { - parser.getParameterValue("c", colName); - } - else - { - std::cerr << "Collaborator name is required to beging training; please specify this using '-c'.\n"; - return EXIT_FAILURE; - } - // if (parser.isPresent("vp")) - // { - // parser.getParameterValue("vp", patchValidation); - // } - } - else - { - if (parser.isPresent("a")) - { - parser.getParameterValue("a", archs); - } - else - { - std::cerr << "Please specify at least 2 architectures on which to perform inference.\n"; - } - if (parser.isPresent("lF")) - { - parser.getParameterValue("lF", fusionMethod); - } - } - if (parser.isPresent("g")) - { - parser.getParameterValue("g", gpuRequested); - } - - std::string device_arg = " -md ", device_arg_for_second_script = " --device "; - if (gpuRequested) - { - device_arg += "cuda"; - device_arg_for_second_script += "cuda"; - } - else - { - device_arg += "cpu"; - device_arg_for_second_script += "cpu"; - } - - - // convert everything to lower-case for easier comparison - std::transform(archs.begin(), archs.end(), archs.begin(), ::tolower); - std::transform(fusionMethod.begin(), fusionMethod.end(), fusionMethod.begin(), ::tolower); - - auto fetsApplicationPath = cbica::getExecutablePath(); - auto deepMedicExe = getApplicationPath("DeepMedic"); - - auto archs_split = cbica::stringSplit(archs, ","); - auto fusion_split = cbica::stringSplit(fusionMethod, ","); - - auto subjectDirs = cbica::subdirectoriesInDirectory(dataDir); - - if (trainingRequested && (archs_split.size() > 1)) - { - std::cerr << "Training cannot be currently be performed on more than 1 architecture.\n"; - return EXIT_FAILURE; - } - - std::string - hardcodedOpenFLPath = fetsApplicationPath + "/OpenFederatedLearning/", - hardcodedOpenFLPlanPath = hardcodedOpenFLPath + "bin/federations/plans/fets_phase2_2.yaml", - hardcodedLabelFusionPath = fetsApplicationPath + "/LabelFusion/fusion_run", - hardcodedModelWeightPath = hardcodedOpenFLPath + "/bin/federations/weights/", // start with the common location - hardcodedPythonPath = hardcodedOpenFLPath + "/venv/bin/python", // this needs to change for Windows (wonder what happens for macOS?) - hardcodedPythonPath_fusion = fetsApplicationPath + "/LabelFusion/venv/bin/python", // this needs to change for Windows (wonder what happens for macOS?) - scriptToCall = hardcodedOpenFLPath + "/submodules/fets_ai/Algorithms/fets/bin/brainmage_validation_outputs_to_disk_newer.py"; // the script that does the inference and scoring - auto fets_dataDir = getCaPTkDataDir(); - auto hardcodedFinalModelsWeightsPath = fets_dataDir + "/fets_consensus"; - auto hardcodedFinalModelsSeriesWeightsPath = fets_dataDir + "/fets_consensus_models/"; -#if WIN32 - hardcodedPythonPath = hardcodedOpenFLPath + "/venv/python.exe"; - hardcodedPythonPath_fusion = fetsApplicationPath + "/LabelFusion/venv/python.exe"; -#endif - - auto pythonEnvironmentFound = false; - if (cbica::isFile(hardcodedPythonPath)) - { - pythonEnvironmentFound = true; - } - - if (!trainingRequested) - { - std::string subjectsWithMissingModalities, subjectsWithErrors; // string to store error cases - - std::cout << "Starting subject directory iteration...\n"; - for (size_t s = 0; s < subjectDirs.size(); s++) // iterate through all subjects - { - if (subjectDirs[s].find("logs") == std::string::npos) - { - auto currentSubjectIsProblematic = false; - std::string file_t1gd, file_t1, file_t2, file_flair; - - auto filesInDir = cbica::filesInDirectory(dataDir + "/" + subjectDirs[s]); // get files in current subject directory - // iterate through all files and pick up individual modalities - for (size_t f = 0; f < filesInDir.size(); f++) - { - if (file_t1gd.empty()) - { - if ((filesInDir[f].find("_t1ce.nii.gz") != std::string::npos) || (filesInDir[f].find("_t1gd.nii.gz") != std::string::npos)) - { - file_t1gd = filesInDir[f]; - } - } - if (file_t1.empty()) - { - if (filesInDir[f].find("_t1.nii.gz") != std::string::npos) - { - file_t1 = filesInDir[f]; - } - } - if (file_t2.empty()) - { - if (filesInDir[f].find("_t2.nii.gz") != std::string::npos) - { - file_t2 = filesInDir[f]; - } - } - if (file_flair.empty()) - { - if ((filesInDir[f].find("_flair.nii.gz") != std::string::npos) || (filesInDir[f].find("_fl.nii.gz") != std::string::npos)) - { - file_flair = filesInDir[f]; - } - } - } - - // ensure problematic cases are detected - if (file_t1gd.empty()) - { - subjectsWithMissingModalities += subjectDirs[s] + ",t1ce\n"; - currentSubjectIsProblematic = true; - } - if (file_t1.empty()) - { - subjectsWithMissingModalities += subjectDirs[s] + ",t1\n"; - currentSubjectIsProblematic = true; - } - if (file_t2.empty()) - { - subjectsWithMissingModalities += subjectDirs[s] + ",t2\n"; - currentSubjectIsProblematic = true; - } - if (file_flair.empty()) - { - subjectsWithMissingModalities += subjectDirs[s] + ",flair\n"; - currentSubjectIsProblematic = true; - } - - if (!currentSubjectIsProblematic) // proceed only if all modalities for the current subject are present - { - std::cout << "= Starting inference for subject: " << subjectDirs[s] << "\n"; - auto currentSubjectOutputDir = dataDir + "/" + subjectDirs[s] + "/SegmentationsForQC/"; - cbica::createDir(currentSubjectOutputDir); - for (size_t a = 0; a < archs_split.size(); a++) // iterate through all requested architectures - { - if (archs_split[a] == "deepmedic") // special case - { - std::cout << "== Starting inference using DeepMedic...\n"; - auto brainMaskFile = dataDir + "/" + subjectDirs[s] + "/" + subjectDirs[s] + "_deepmedic_seg.nii.gz"; - auto fileToCheck_2 = currentSubjectOutputDir + subjectDirs[s] + "_deepmedic_seg.nii.gz"; - if (!(cbica::isFile(brainMaskFile) || cbica::isFile(fileToCheck_2))) - { - auto dm_tempOut = dataDir + "/" + subjectDirs[s] + "/dmOut/mask.nii.gz"; - auto fullCommand = deepMedicExe + " -md " + hardcodedNativeModelWeightPath + "/deepMedic/saved_models/brainTumorSegmentation/ " + - "-i " + file_t1 + "," + - file_t1gd + "," + - file_t2 + "," + - file_flair + " -o " + - dm_tempOut; - - if (std::system(fullCommand.c_str()) != 0) - { - std::cerr << "=== Couldn't complete the inference for deepmedic for subject " << subjectDirs[s] << ".\n"; - subjectsWithErrors += subjectDirs[s] + ",inference,deepmedic\n"; - } - else - { - cbica::copyFile(dm_tempOut, brainMaskFile); - } - } - } // deepmedic check - else - { - auto fullCommandToRun = hardcodedPythonPath + " " + hardcodedOpenFLPath + "/bin/run_inference_from_flplan.py"; - auto args = " -d " + dataDir + device_arg + " -ld " + loggingDir + " -ip " + subjectDirs[s]; - if (pythonEnvironmentFound) - { - std::string command_to_run; - - auto current_temp_output = cbica::createTmpDir(); - auto current_subject_temp_output = current_temp_output + "/" + subjectDirs[s]; - cbica::createDir(current_subject_temp_output); - // std::string file_t1gd, file_t1, file_t2, file_flair; - auto file_t1gd_temp = current_subject_temp_output + "/_t1gd.nii.gz", - file_t1_temp = current_subject_temp_output + "/_t1.nii.gz", - file_t2_temp = current_subject_temp_output + "/_t2.nii.gz", - file_flair_temp = current_subject_temp_output + "/_flair.nii.gz"; - cbica::copyFile(file_t1gd, file_t1gd_temp); - cbica::copyFile(file_t1, file_t1_temp); - cbica::copyFile(file_t2, file_t2_temp); - cbica::copyFile(file_flair, file_flair_temp); - // check for all other models here - - // check between different architectures - if (archs_split[a] == "fets_singlet") - { - std::cout << "== Starting inference using FeTS Singlet Consensus model...\n"; - auto fileNameToCheck = "fets_singlet.nii.gz"; - auto current_output_file_to_check = currentSubjectOutputDir + fileNameToCheck; - - if (!cbica::isFile(current_output_file_to_check)) - { - auto current_outputDir = currentSubjectOutputDir + "/fets_singlet"; - cbica::createDir(current_outputDir); - command_to_run = hardcodedPythonPath + " " + scriptToCall - // et, tc, wt - + " -ET " + hardcodedFinalModelsSeriesWeightsPath + "52" - + " -TC " + hardcodedFinalModelsSeriesWeightsPath + "52" - + " -WT " + hardcodedFinalModelsSeriesWeightsPath + "52" - + " -pp " + hardcodedOpenFLPlanPath + " -mot _fets_singlet" + " -op " + current_outputDir + device_arg_for_second_script + " -dp " + current_temp_output; - if (std::system(command_to_run.c_str()) != 0) - { - std::cerr << "WARNING: The singlet model '52' did not run, please contact admin@fets.ai with this error.\n\n"; - } - else - { - auto current_output_file = current_outputDir + "/" + subjectDirs[s] + "/" + subjectDirs[s] + "_fets_singlet_seg.nii.gz"; - if (cbica::isFile(current_output_file)) - { - cbica::copyFile(current_output_file, current_output_file_to_check); - } - } - } // end of if file exists - } // end of fets_singlet check - else if (archs_split[a] == "fets_triplet") - { - std::cout << "== Starting inference using FeTS Triplet Consensus model...\n"; - - auto fileNameToCheck = "fets_triplet.nii.gz"; - auto current_output_file_to_check = currentSubjectOutputDir + fileNameToCheck; - - if (!cbica::isFile(current_output_file_to_check)) - { - auto current_outputDir = currentSubjectOutputDir + "/fets_triplet"; - cbica::createDir(current_outputDir); - command_to_run = hardcodedPythonPath + " " + scriptToCall - // et, tc, wt - + " -ET " + hardcodedFinalModelsSeriesWeightsPath + "69" - + " -TC " + hardcodedFinalModelsSeriesWeightsPath + "72" - + " -WT " + hardcodedFinalModelsSeriesWeightsPath + "52" - + " -pp " + hardcodedOpenFLPlanPath + " -mot _fets_triplet" + " -op " + current_outputDir + device_arg_for_second_script + " -dp " + current_temp_output; - if (std::system(command_to_run.c_str()) != 0) - { - std::cerr << "WARNING: The triplet model '[69,72,52]' did not run, please contact admin@fets.ai with this error.\n\n"; - } - else - { - auto current_output_file = current_outputDir + "/" + subjectDirs[s] + "/" + subjectDirs[s] + "_fets_triplet_seg.nii.gz"; - if (cbica::isFile(current_output_file)) - { - cbica::copyFile(current_output_file, current_output_file_to_check); - } - } - } // end of if file exists - } // end of fets_triplet check - else - { - std::string hardcodedPlanName; - if (archs_split[a].find("nnunet") != std::string::npos) - { - hardcodedPlanName = "nnunet"; - std::cout << "== Starting inference using nnUNet...\n"; - } - else if (archs_split[a].find("deepscan") != std::string::npos) - { - hardcodedPlanName = "deepscan"; - std::cout << "== Starting inference using DeepScan...\n"; - } - if (!hardcodedPlanName.empty()) - { - auto fileNameToCheck = subjectDirs[s] + "_" + hardcodedPlanName + "_seg.nii.gz"; - auto fileToCheck_1 = dataDir + "/" + subjectDirs[s] + "/" + fileNameToCheck; - auto fileToCheck_2 = currentSubjectOutputDir + fileNameToCheck; - if (!(cbica::isFile(fileToCheck_1) || cbica::isFile(fileToCheck_2))) // don't run if file is present - { - // structure according to what is needed - might need to create a function that can call run_inference_from_flplan for different hardcodedModelName - auto args_to_run = args + " -nmwf " + hardcodedNativeModelWeightPath + "/" + hardcodedPlanName // - + " -p " + hardcodedPlanName + "_inference.yaml" - + " -pwai"; - - /// remove before final packaging - std::cerr << "=== \n=== Command to run: \n" << fullCommandToRun + " " + args_to_run << "\n===\n"; - - if (std::system((fullCommandToRun + " " + args_to_run).c_str()) != 0) - { - std::cerr << "=== Couldn't complete the inference for " << archs_split[a] << " for subject " << subjectDirs[s] << ".\n"; - subjectsWithErrors += subjectDirs[s] + ",inference," + archs_split[a] + "\n"; - } - } // end of previous run file check - } // end of hardcodedPlanName check - } // end of non-3dresunet check - } // end of python check - } // end of non-DM archs check - } // end of archs_split - - /// fusion - if (pythonEnvironmentFound && archs_split.size() > 1) - { - if (cbica::isFile(hardcodedLabelFusionPath)) - { - std::cout << "== Starting label fusion...\n"; - auto filesInSubjectDir = cbica::filesInDirectory(dataDir + "/" + subjectDirs[s]); - auto labelFusion_command = hardcodedPythonPath_fusion + " " + hardcodedLabelFusionPath + " "; - std::string filesForFusion; - auto dm_folder = dataDir + "/" + subjectDirs[s] + "/dmOut"; - if (cbica::isDir(dm_folder)) - { - cbica::copyDir(dm_folder, currentSubjectOutputDir); - cbica::removeDirectoryRecursively(dm_folder, true); - } - - for (size_t f = 0; f < filesInSubjectDir.size(); f++) - { - if (filesInSubjectDir[f].find("_seg.nii.gz") != std::string::npos) // find all files that have "_seg.nii.gz" in file name - { - if (filesInSubjectDir[f].find("final") == std::string::npos) // only do fusion for the files where "final" is not present - { - auto fileToCopy = currentSubjectOutputDir + cbica::getFilenameBase(filesInSubjectDir[f]) + ".nii.gz"; - cbica::copyFile(filesInSubjectDir[f], fileToCopy); - filesForFusion += fileToCopy + ","; - std::remove(filesInSubjectDir[f].c_str()); - } - } - } // files loop in subject directory - filesInSubjectDir = cbica::filesInDirectory(currentSubjectOutputDir); - for (size_t f = 0; f < filesInSubjectDir.size(); f++) - { - auto fileToCopy = currentSubjectOutputDir + cbica::getFilenameBase(filesInSubjectDir[f]) + ".nii.gz"; - if (filesInSubjectDir[f].find("fused") == std::string::npos) // only consider those files for fusion that are arch outputs - { - filesForFusion += fileToCopy + ","; - } - } // files loop in subject directory - - if (!filesForFusion.empty()) - { - filesForFusion.pop_back(); // remove last "," - } - - for (size_t f = 0; f < fusion_split.size(); f++) - { - auto final_fused_file = currentSubjectOutputDir + "/" + subjectDirs[s] + "_fused_" + fusion_split[f] + "_seg.nii.gz"; - auto full_fusion_command = labelFusion_command + "-inputs " + filesForFusion + " -classes 0,1,2,4 " // this needs to change after different segmentation algorithms are put in place - + " -method " + fusion_split[f] + " -output " + final_fused_file; - if (std::system(full_fusion_command.c_str()) != 0) - { - std::cerr << "=== Something went wrong with fusion for subject '" << subjectDirs[s] << "' using fusion method '" << fusion_split[f] << "'\n"; - subjectsWithErrors += subjectDirs[s] + ",fusion," + fusion_split[f] + "\n"; - } - } - } // end of label fusion script check - } // end of python check - } // end of currentSubjectIsProblematic - } // end of logs check - } // end of subjectDirs - - // provide error message - if (!subjectsWithMissingModalities.empty()) - { - std::cerr << "\nThe following subjects did not have all the 4 structural modalities to proceed with preprocessing:\nSubjectID,Modality\n" << subjectsWithMissingModalities; - } - if (!subjectsWithErrors.empty()) - { - std::cerr << "\nThe following subjects were problematic:\nSubjectID,Application,Algorithm\n" << subjectsWithErrors; - } - } // end of trainingRequested check - else // for training - { - /// start validation of nnunet/deepscan/deepmedic on all validation cases - auto split_info_val = dataDir + "/split_info/fets_phase2_split_1/val.csv", // revisit in case we change split in the future - validation_to_send = dataDir + "/validation.yaml", - validation_internal = dataDir + "/validation_internal.yaml"; - - if (!cbica::isFile(hardcodedPythonPath)) - { - std::cerr << "The python virtual environment was not found, please refer to documentation to initialize it.\n"; - return EXIT_FAILURE; - } - - if (!cbica::fileExists(split_info_val)) - { - auto full_plan_path = hardcodedOpenFLPath + "/bin/federations/plans/" + hardcodedPlanName + ".yaml"; - auto command_to_run = hardcodedPythonPath + " " + hardcodedOpenFLPath + "submodules/fets_ai/Algorithms/fets/bin/initialize_split_info.py -pp " + full_plan_path + " -dp " + dataDir; - if (std::system(command_to_run.c_str()) != 0) - { - std::cerr << "Initialize split did not work, continuing with validation.\n"; - } - } - - if (cbica::fileExists(split_info_val)) - { - std::ifstream file(split_info_val.c_str()); - bool firstRow = true; - int row_index = -1; - auto regions_of_interest = { "WT", "TC", "ET" }, - measures_of_interest = { "Dice", "Hausdorff95", "Sensitivity", "Specificity" }; - - auto yaml_config_to_send = YAML::Node(); - auto yaml_config_internal = YAML::Node(); - - if (cbica::isFile(validation_internal)) // load previous internal validation file - { - yaml_config_internal = YAML::LoadFile(validation_internal); - } - - while (file) - { - std::string line; - std::getline(file, line); - // fix line ending problems - std::remove_copy(line.begin(), line.end(), line.begin(), '\r'); - std::stringstream lineStream(line); - std::vector row; - std::string cell; - while (getline(lineStream, cell, ',')) - { - if (row_index > -1) - { - auto subject_id = cell; - auto subject_index_str = std::to_string(row_index); - - bool previous_validation_file_is_okay = true; - - if (yaml_config_internal[subject_id]) // check if subject is present in internal validation file - { - yaml_config_to_send[subject_index_str] = yaml_config_internal[subject_id]; // if present, take all stats from there - auto to_check = yaml_config_internal[subject_id]["WT"]; - if (!yaml_config_internal[subject_id]["WT"]["Sensitivity"]) // check if sensitivity is present for subject - { - previous_validation_file_is_okay = false; - } - } - else - { - previous_validation_file_is_okay = false; - } - - if (!previous_validation_file_is_okay) - { - auto current_subject_folder = dataDir + "/" + subject_id; - auto final_seg = current_subject_folder + "/" + subject_id + "_final_seg.nii.gz"; - std::map< std::string, std::string > archs_to_check; - archs_to_check["deepmedic"] = current_subject_folder + "/SegmentationsForQC/" + subject_id + "_deepmedic_seg.nii.gz"; - archs_to_check["nnunet"] = current_subject_folder + "/SegmentationsForQC/" + subject_id + "_nnunet_seg.nii.gz"; - archs_to_check["deepscan"] = current_subject_folder + "/SegmentationsForQC/" + subject_id + "_deepscan_seg.nii.gz"; - if (!cbica::isFile(final_seg)) - { - std::cerr << "The subject '" << subject_id << "' does not have a final_seg file present.\n"; - } - else - { - using DefaultImageType = itk::Image< unsigned int, 3 >; - auto final_seg_image = cbica::ReadImage< DefaultImageType >(final_seg); - for (auto& current_arch : archs_to_check) - { - if (cbica::isFile(current_arch.second)) - { - auto image_to_check = cbica::ReadImage< DefaultImageType >(current_arch.second); - - auto stats = cbica::GetBraTSLabelStatistics< DefaultImageType >(final_seg_image, image_to_check); - - for (auto& region : regions_of_interest) - { - for (auto& measure : measures_of_interest) - { - yaml_config_to_send[subject_index_str][current_arch.first][region][measure] = stats[region][measure]; - yaml_config_internal[subject_id][current_arch.first][region][measure] = stats[region][measure]; - } // end measure loop - } // end region loop - } // end file-check loop - } // end arch-loop - } // end final_seg check - } // end internal validation check loop - } // end header check if-loop - row_index++; // increment subject id counter - } // end csv-read while loop - } - std::ofstream fout_int(validation_internal); - fout_int << yaml_config_internal; // dump it back into the file - fout_int.close(); - - std::ofstream fout(validation_to_send); - fout << yaml_config_to_send; // dump it back into the file - fout.close(); - } - - std::string args = " -d " + dataDir + " -ld " + loggingDir + " -col " + colName + device_arg, - hardcodedModelName; - - if (!patchValidation) - { - args += " -vwop"; - } - - { - std::cout << "Starting model validation of 3DResUNet trained on BraTS20 training data...\n"; - - // brats20 model validation - std::string fullCommandToRun = hardcodedPythonPath + " " + fetsApplicationPath; - fullCommandToRun += "/OpenFederatedLearning/bin/run_fets_validation.py"; - - auto temp_args = args + " -p fets_phase1_validate_full_brats_trained_model_1.yaml"; - - if (std::system((fullCommandToRun + " " + temp_args).c_str()) != 0) - { - std::cerr << "Couldn't complete the BraTS20 model validation task, please email admin@fets.ai\n"; - } - } - - std::string fullCommandToRun = hardcodedPythonPath + " " + fetsApplicationPath; - fullCommandToRun += "/OpenFederatedLearning/bin/run_collaborator_from_flplan.py"; - - auto temp_args = args + " -p " + hardcodedPlanName + ".yaml" + " -bsuf " + validation_to_send + " -nlo"; - - std::cout << "Starting training...\n"; - - if (runCollaboratorTraining(fullCommandToRun + " " + temp_args) != 0) - { - std::cerr << "Couldn't complete the training task, please email admin@fets.ai\n"; - return EXIT_FAILURE; - } - - } // end of trainingRequested check - - std::cout << "Finished.\n"; - - return EXIT_SUCCESS; -} - - diff --git a/src/applications/FeTS_CLI_Train.cxx b/src/applications/FeTS_CLI_Train.cxx deleted file mode 100644 index 6dbc9d8a..00000000 --- a/src/applications/FeTS_CLI_Train.cxx +++ /dev/null @@ -1,533 +0,0 @@ -#include - -#include "cbicaCmdParser.h" -#include "cbicaLogging.h" -#include "cbicaITKSafeImageIO.h" -#include "cbicaUtilities.h" -#include "cbicaITKUtilities.h" -#include "CaPTkGUIUtils.h" - -int runCollaboratorTraining(const std::string &fullCommandToRunWithArgs) -{ - auto returnCode = std::system(fullCommandToRunWithArgs.c_str()); - if (returnCode != 0) - { - if (returnCode == 154) - { - std::cout << "Special case, where the collaborator failing is expected, so automatically restarting.\n"; - return runCollaboratorTraining(fullCommandToRunWithArgs); - } - } -} - -int main(int argc, char** argv) -{ - cbica::CmdParser parser(argc, argv, "FeTS_CLI_Train"); - - auto hardcodedNativeModelWeightPath = getCaPTkDataDir() + "/fets"; - auto allArchs = cbica::subdirectoriesInDirectory(hardcodedNativeModelWeightPath); - std::string allArchsString; - for (size_t i = 0; i < allArchs.size(); i++) - { - allArchsString += allArchs[i] + ","; - } - allArchsString.pop_back(); - - std::string dataDir, modelName, loggingDir, colName, archs = "3dresunet", fusionMethod = "STAPLE", hardcodedPlanName = "fets_phase2_2"; - - parser.addRequiredParameter("d", "dataDir", cbica::Parameter::DIRECTORY, "Dir with Read/Write access", "Input data directory"); - parser.addRequiredParameter("t", "training", cbica::Parameter::BOOLEAN, "0 or 1", "Whether performing training or inference", "1==Train and 0==Inference"); - parser.addOptionalParameter("tp", "trainPlan", cbica::Parameter::BOOLEAN, "YAML file", "Training plan", "Defaults to '" + hardcodedPlanName + "'"); - parser.addOptionalParameter("L", "LoggingDir", cbica::Parameter::DIRECTORY, "Dir with write access", "Location of logging directory"); - parser.addOptionalParameter("g", "gpu", cbica::Parameter::BOOLEAN, "0-1", "Whether to run the process on GPU or not", "Defaults to '0'"); - parser.addOptionalParameter("c", "colName", cbica::Parameter::STRING, "", "Common name of collaborator", "Required for training"); - // parser.addOptionalParameter("vp", "valPatch", cbica::Parameter::BOOLEAN, "0-1", "Whether to perform per-patch validation or not", "Used for training, defaults to '0'"); - - parser.addApplicationDescription("This is the CLI interface for FeTS"); - parser.addExampleUsage("-d /path/DataForFeTS -a deepMedic,nnUNet -lF STAPLE,ITKVoting,SIMPLE -g 1 -t 0", "This command performs inference using deepMedic,nnUNet using multiple fusion strategies on GPU and saves in data directory"); - parser.addExampleUsage("-d /path/DataForFeTS -t 1 -g 1 -c upenn", "This command starts training performs inference using deepMedic,nnUNet using multiple fusion strategies on GPU and saves in data directory"); - - bool gpuRequested = false, trainingRequested = true, patchValidation = true; - - parser.getParameterValue("d", dataDir); - - if (parser.isPresent("L")) - { - parser.getParameterValue("L", loggingDir); - } - else - { - loggingDir = dataDir + "/logs"; - std::cout << "Using the following directory as logging directory: " << loggingDir << "\n"; - cbica::createDir(loggingDir); - } - - if (parser.isPresent("c")) - { - parser.getParameterValue("c", colName); - } - else - { - std::cerr << "Collaborator name is required to beging training; please specify this using '-c'.\n"; - return EXIT_FAILURE; - } - - std::string device_arg = " -md "; - if (gpuRequested) - { - device_arg += "cuda"; - } - else - { - device_arg += "cpu"; - } - - - // convert everything to lower-case for easier comparison - std::transform(archs.begin(), archs.end(), archs.begin(), ::tolower); - std::transform(fusionMethod.begin(), fusionMethod.end(), fusionMethod.begin(), ::tolower); - - auto fetsApplicationPath = cbica::getExecutablePath(); - auto deepMedicExe = getApplicationPath("DeepMedic"); - - auto archs_split = cbica::stringSplit(archs, ","); - auto fusion_split = cbica::stringSplit(fusionMethod, ","); - - auto subjectDirs = cbica::subdirectoriesInDirectory(dataDir); - - std::string - hardcodedOpenFLPath = fetsApplicationPath + "/OpenFederatedLearning/", - hardcodedLabelFusionPath = fetsApplicationPath + "/LabelFusion/fusion_run", - hardcodedModelWeightPath = hardcodedOpenFLPath + "/bin/federations/weights/", // start with the common location - //hardcodedNativeModelWeightPath = hardcodedOpenFLPath + "/bin/federations/weights/native/", // the native weights are going in fets_data_dir/fets - hardcodedPythonPath = hardcodedOpenFLPath + "/venv/bin/python", // this needs to change for Windows (wonder what happens for macOS?) - hardcodedPythonPath_fusion = fetsApplicationPath + "/LabelFusion/venv/bin/python"; // this needs to change for Windows (wonder what happens for macOS?) -#if WIN32 - hardcodedPythonPath = hardcodedOpenFLPath + "/venv/python.exe"; -#endif - - auto pythonEnvironmentFound = false; - if (cbica::isFile(hardcodedPythonPath)) - { - pythonEnvironmentFound = true; - } - - { - std::string subjectsWithMissingModalities, subjectsWithErrors; // string to store error cases - - std::cout << "Starting subject directory iteration...\n"; - for (size_t s = 0; s < subjectDirs.size(); s++) // iterate through all subjects - { - if (subjectDirs[s].find("logs") == std::string::npos) - { - auto currentSubjectIsProblematic = false; - std::string file_t1gd, file_t1, file_t2, file_flair; - - auto filesInDir = cbica::filesInDirectory(dataDir + "/" + subjectDirs[s]); // get files in current subject directory - // iterate through all files and pick up individual modalities - for (size_t f = 0; f < filesInDir.size(); f++) - { - if (file_t1gd.empty()) - { - if ((filesInDir[f].find("_t1ce.nii.gz") != std::string::npos) || (filesInDir[f].find("_t1gd.nii.gz") != std::string::npos)) - { - file_t1gd = filesInDir[f]; - } - } - if (file_t1.empty()) - { - if (filesInDir[f].find("_t1.nii.gz") != std::string::npos) - { - file_t1 = filesInDir[f]; - } - } - if (file_t2.empty()) - { - if (filesInDir[f].find("_t2.nii.gz") != std::string::npos) - { - file_t2 = filesInDir[f]; - } - } - if (file_flair.empty()) - { - if ((filesInDir[f].find("_flair.nii.gz") != std::string::npos) || (filesInDir[f].find("_fl.nii.gz") != std::string::npos)) - { - file_flair = filesInDir[f]; - } - } - } - - // ensure problematic cases are detected - if (file_t1gd.empty()) - { - subjectsWithMissingModalities += subjectDirs[s] + ",t1ce\n"; - currentSubjectIsProblematic = true; - } - if (file_t1.empty()) - { - subjectsWithMissingModalities += subjectDirs[s] + ",t1\n"; - currentSubjectIsProblematic = true; - } - if (file_t2.empty()) - { - subjectsWithMissingModalities += subjectDirs[s] + ",t2\n"; - currentSubjectIsProblematic = true; - } - if (file_flair.empty()) - { - subjectsWithMissingModalities += subjectDirs[s] + ",flair\n"; - currentSubjectIsProblematic = true; - } - - if (!currentSubjectIsProblematic) // proceed only if all modalities for the current subject are present - { - std::cout << "= Starting inference for subject: " << subjectDirs[s] << "\n"; - auto currentSubjectOutputDir = dataDir + "/" + subjectDirs[s] + "/SegmentationsForQC/"; - cbica::createDir(currentSubjectOutputDir); - for (size_t a = 0; a < archs_split.size(); a++) // iterate through all requested architectures - { - if (archs_split[a] == "deepmedic") // special case - { - std::cout << "== Starting inference using DeepMedic...\n"; - auto brainMaskFile = dataDir + "/" + subjectDirs[s] + "/" + subjectDirs[s] + "_deepmedic_seg.nii.gz"; - auto fileToCheck_2 = currentSubjectOutputDir + subjectDirs[s] + "_deepmedic_seg.nii.gz"; - if (!(cbica::isFile(brainMaskFile) || cbica::isFile(fileToCheck_2))) - { - auto dm_tempOut = dataDir + "/" + subjectDirs[s] + "/dmOut/mask.nii.gz"; - auto fullCommand = deepMedicExe + " -md " + hardcodedNativeModelWeightPath + "/deepMedic/saved_models/brainTumorSegmentation/ " + - "-i " + file_t1 + "," + - file_t1gd + "," + - file_t2 + "," + - file_flair + " -o " + - dm_tempOut; - - if (std::system(fullCommand.c_str()) != 0) - { - std::cerr << "=== Couldn't complete the inference for deepmedic for subject " << subjectDirs[s] << ".\n"; - subjectsWithErrors += subjectDirs[s] + ",inference,deepmedic\n"; - } - else - { - cbica::copyFile(dm_tempOut, brainMaskFile); - } - } - } // deepmedic check - else - { - auto fullCommandToRun = hardcodedPythonPath + " " + hardcodedOpenFLPath + "/bin/run_inference_from_flplan.py"; - auto args = " -d " + dataDir + device_arg + " -ld " + loggingDir + " -ip " + subjectDirs[s]; - if (pythonEnvironmentFound) - { - // check for all other models written in pytorch here - - // check between different architectures - if (archs_split[a] == "3dunet") - { - // this is currently not defined - } - else if (archs_split[a] == "3dresunet") - { - std::cout << "3DResUNet inference is disabled for this release.\n"; - //auto fileNameToCheck = subjectDirs[s] + "_resunet_seg.nii.gz"; - //auto fileToCheck_1 = dataDir + "/" + subjectDirs[s] + "/" + fileNameToCheck; - //auto fileToCheck_2 = currentSubjectOutputDir + fileNameToCheck; - //if (!(cbica::isFile(fileToCheck_1) || cbica::isFile(fileToCheck_2))) // don't run if file is present - //{ - // std::cout << "== Starting inference using 3DResUNet...\n"; - // hardcodedPlanName = "pt_3dresunet_brainmagebrats"; - // auto hardcodedModelName = hardcodedPlanName + "_best.pbuf"; - // if (!cbica::isFile((hardcodedModelWeightPath + "/" + hardcodedModelName))) // in case the "best" model is not present, use the "init" model that is distributed with FeTS installation - // { - // hardcodedModelName = hardcodedPlanName + "_init.pbuf"; - // if (!cbica::isFile((hardcodedModelWeightPath + "/" + hardcodedModelName))) - // { - // std::cerr << "=== A compatible model weight file for the architecture '" << archs_split[a] << "' was not found. Please contact admin@fets.ai for help.\n"; - // } - // } - - // auto args_to_run = args + " -mwf " + hardcodedModelName - // + " -p " + hardcodedPlanName + ".yaml"; - // //<< "-mwf" << hardcodedModelWeightPath // todo: doing customized solution above - change after model weights are using full paths for all - - // if (std::system((fullCommandToRun + " " + args_to_run).c_str()) != 0) - // { - // std::cerr << "=== Couldn't complete the inference for 3dresunet for subject " << subjectDirs[s] << ".\n"; - // subjectsWithErrors += subjectDirs[s] + ",inference,3dresunet\n"; - // } - //} // end of previous run file check - } // end of 3dresunet check - else - { - std::string hardcodedPlanName; - if (archs_split[a].find("nnunet") != std::string::npos) - { - hardcodedPlanName = "nnunet"; - std::cout << "== Starting inference using nnUNet...\n"; - } - else if (archs_split[a].find("deepscan") != std::string::npos) - { - hardcodedPlanName = "deepscan"; - std::cout << "== Starting inference using DeepScan...\n"; - } - if (!hardcodedPlanName.empty()) - { - auto fileNameToCheck = subjectDirs[s] + "_" + hardcodedPlanName + "_seg.nii.gz"; - auto fileToCheck_1 = dataDir + "/" + subjectDirs[s] + "/" + fileNameToCheck; - auto fileToCheck_2 = currentSubjectOutputDir + fileNameToCheck; - if (!(cbica::isFile(fileToCheck_1) || cbica::isFile(fileToCheck_2))) // don't run if file is present - { - // structure according to what is needed - might need to create a function that can call run_inference_from_flplan for different hardcodedModelName - auto args_to_run = args + " -nmwf " + hardcodedNativeModelWeightPath + "/" + hardcodedPlanName // - + " -p " + hardcodedPlanName + "_inference.yaml" - + " -pwai"; - - /// remove before final packaging - std::cerr << "=== \n=== Command to run: \n" << fullCommandToRun + " " + args_to_run << "\n===\n"; - - if (std::system((fullCommandToRun + " " + args_to_run).c_str()) != 0) - { - std::cerr << "=== Couldn't complete the inference for " << archs_split[a] << " for subject " << subjectDirs[s] << ".\n"; - subjectsWithErrors += subjectDirs[s] + ",inference," + archs_split[a] + "\n"; - } - } // end of previous run file check - } // end of hardcodedPlanName check - } // end of non-3dresunet check - } // end of python check - } // end of non-DM archs check - } // end of archs_split - - /// fusion - if (pythonEnvironmentFound) - { - if (cbica::isFile(hardcodedLabelFusionPath)) - { - std::cout << "== Starting label fusion...\n"; - auto filesInSubjectDir = cbica::filesInDirectory(dataDir + "/" + subjectDirs[s]); - auto labelFusion_command = hardcodedPythonPath_fusion + " " + hardcodedLabelFusionPath + " "; - std::string filesForFusion; - auto dm_folder = dataDir + "/" + subjectDirs[s] + "/dmOut"; - if (cbica::isDir(dm_folder)) - { - cbica::copyDir(dm_folder, currentSubjectOutputDir); - cbica::removeDirectoryRecursively(dm_folder, true); - } - - for (size_t f = 0; f < filesInSubjectDir.size(); f++) - { - if (filesInSubjectDir[f].find("_seg.nii.gz") != std::string::npos) // find all files that have "_seg.nii.gz" in file name - { - if (filesInSubjectDir[f].find("final") == std::string::npos) // only do fusion for the files where "final" is not present - { - auto fileToCopy = currentSubjectOutputDir + cbica::getFilenameBase(filesInSubjectDir[f]) + ".nii.gz"; - cbica::copyFile(filesInSubjectDir[f], fileToCopy); - filesForFusion += fileToCopy + ","; - std::remove(filesInSubjectDir[f].c_str()); - } - } - } // files loop in subject directory - filesInSubjectDir = cbica::filesInDirectory(currentSubjectOutputDir); - for (size_t f = 0; f < filesInSubjectDir.size(); f++) - { - auto fileToCopy = currentSubjectOutputDir + cbica::getFilenameBase(filesInSubjectDir[f]) + ".nii.gz"; - if (filesInSubjectDir[f].find("fused") == std::string::npos) // only consider those files for fusion that are arch outputs - { - filesForFusion += fileToCopy + ","; - } - } // files loop in subject directory - - if (!filesForFusion.empty()) - { - filesForFusion.pop_back(); // remove last "," - } - - for (size_t f = 0; f < fusion_split.size(); f++) - { - auto final_fused_file = currentSubjectOutputDir + "/" + subjectDirs[s] + "_fused_" + fusion_split[f] + "_seg.nii.gz"; - auto full_fusion_command = labelFusion_command + "-inputs " + filesForFusion + " -classes 0,1,2,4 " // this needs to change after different segmentation algorithms are put in place - + " -method " + fusion_split[f] + " -output " + final_fused_file; - if (std::system(full_fusion_command.c_str()) != 0) - { - std::cerr << "=== Something went wrong with fusion for subject '" << subjectDirs[s] << "' using fusion method '" << fusion_split[f] << "'\n"; - subjectsWithErrors += subjectDirs[s] + ",fusion," + fusion_split[f] + "\n"; - } - } - } // end of label fusion script check - } // end of python check - } // end of currentSubjectIsProblematic - } // end of logs check - } // end of subjectDirs - - // provide error message - if (!subjectsWithMissingModalities.empty()) - { - std::cerr << "\nThe following subjects did not have all the 4 structural modalities to proceed with preprocessing:\nSubjectID,Modality\n" << subjectsWithMissingModalities; - } - if (!subjectsWithErrors.empty()) - { - std::cerr << "\nThe following subjects were problematic:\nSubjectID,Application,Algorithm\n" << subjectsWithErrors; - } - } // end of trainingRequested check - // for training - { - /// start validation of nnunet/deepscan/deepmedic on all validation cases - auto split_info_val = dataDir + "/split_info/fets_phase2_split_1/val.csv", // revisit in case we change split in the future - validation_to_send = dataDir + "/validation.yaml", - validation_internal = dataDir + "/validation_internal.yaml"; - - if (!cbica::isFile(hardcodedPythonPath)) - { - std::cerr << "The python virtual environment was not found, please refer to documentation to initialize it.\n"; - return EXIT_FAILURE; - } - - if (!cbica::fileExists(split_info_val)) - { - auto full_plan_path = hardcodedOpenFLPath + "/bin/federations/plans/" + hardcodedPlanName + ".yaml"; - auto command_to_run = hardcodedPythonPath + " " + hardcodedOpenFLPath + "submodules/fets_ai/Algorithms/fets/bin/initialize_split_info.py -pp " + full_plan_path + " -dp " + dataDir; - if (std::system(command_to_run.c_str()) != 0) - { - std::cerr << "Initialize split did not work, continuing with validation.\n"; - } - } - - if (cbica::fileExists(split_info_val)) - { - std::ifstream file(split_info_val.c_str()); - bool firstRow = true; - int row_index = -1; - auto regions_of_interest = { "WT", "TC", "ET" }, - measures_of_interest = { "Dice", "Hausdorff95", "Sensitivity", "Specificity" }; - - auto yaml_config_to_send = YAML::Node(); - auto yaml_config_internal = YAML::Node(); - - if (cbica::isFile(validation_internal)) // load previous internal validation file - { - yaml_config_internal = YAML::LoadFile(validation_internal); - } - - while (file) - { - std::string line; - std::getline(file, line); - // fix line ending problems - std::remove_copy(line.begin(), line.end(), line.begin(), '\r'); - std::stringstream lineStream(line); - std::vector row; - std::string cell; - while (getline(lineStream, cell, ',')) - { - if (row_index > -1) - { - auto subject_id = cell; - auto subject_index_str = std::to_string(row_index); - - bool previous_validation_file_is_okay = true; - - if (yaml_config_internal[subject_id]) // check if subject is present in internal validation file - { - yaml_config_to_send[subject_index_str] = yaml_config_internal[subject_id]; // if present, take all stats from there - auto to_check = yaml_config_internal[subject_id]["WT"]; - if (!yaml_config_internal[subject_id]["WT"]["Sensitivity"]) // check if sensitivity is present for subject - { - previous_validation_file_is_okay = false; - } - } - else - { - previous_validation_file_is_okay = false; - } - - if (!previous_validation_file_is_okay) - { - auto current_subject_folder = dataDir + "/" + subject_id; - auto final_seg = current_subject_folder + "/" + subject_id + "_final_seg.nii.gz"; - std::map< std::string, std::string > archs_to_check; - archs_to_check["deepmedic"] = current_subject_folder + "/SegmentationsForQC/" + subject_id + "_deepmedic_seg.nii.gz"; - archs_to_check["nnunet"] = current_subject_folder + "/SegmentationsForQC/" + subject_id + "_nnunet_seg.nii.gz"; - archs_to_check["deepscan"] = current_subject_folder + "/SegmentationsForQC/" + subject_id + "_deepscan_seg.nii.gz"; - if (!cbica::isFile(final_seg)) - { - std::cerr << "The subject '" << subject_id << "' does not have a final_seg file present.\n"; - } - else - { - using DefaultImageType = itk::Image< unsigned int, 3 >; - auto final_seg_image = cbica::ReadImage< DefaultImageType >(final_seg); - for (auto& current_arch : archs_to_check) - { - if (cbica::isFile(current_arch.second)) - { - auto image_to_check = cbica::ReadImage< DefaultImageType >(current_arch.second); - - auto stats = cbica::GetBraTSLabelStatistics< DefaultImageType >(final_seg_image, image_to_check); - - for (auto& region : regions_of_interest) - { - for (auto& measure : measures_of_interest) - { - yaml_config_to_send[subject_index_str][current_arch.first][region][measure] = stats[region][measure]; - yaml_config_internal[subject_id][current_arch.first][region][measure] = stats[region][measure]; - } // end measure loop - } // end region loop - } // end file-check loop - } // end arch-loop - } // end final_seg check - } // end internal validation check loop - } // end header check if-loop - row_index++; // increment subject id counter - } // end csv-read while loop - } - std::ofstream fout_int(validation_internal); - fout_int << yaml_config_internal; // dump it back into the file - fout_int.close(); - - std::ofstream fout(validation_to_send); - fout << yaml_config_to_send; // dump it back into the file - fout.close(); - } - - std::string args = " -d " + dataDir + " -ld " + loggingDir + " -col " + colName + device_arg, - hardcodedModelName; - - if (!patchValidation) - { - args += " -vwop"; - } - - { - std::cout << "Starting model validation of 3DResUNet trained on BraTS20 training data...\n"; - - // brats20 model validation - std::string fullCommandToRun = hardcodedPythonPath + " " + fetsApplicationPath; - fullCommandToRun += "/OpenFederatedLearning/bin/run_fets_validation.py"; - - auto temp_args = args + " -p fets_phase1_validate_full_brats_trained_model_1.yaml"; - - if (std::system((fullCommandToRun + " " + temp_args).c_str()) != 0) - { - std::cerr << "Couldn't complete the BraTS20 model validation task, please email admin@fets.ai\n"; - } - } - - std::string fullCommandToRun = hardcodedPythonPath + " " + fetsApplicationPath; - fullCommandToRun += "/OpenFederatedLearning/bin/run_collaborator_from_flplan.py"; - - auto temp_args = args + " -p " + hardcodedPlanName + ".yaml" + " -bsuf " + validation_to_send + " -nlo"; - - std::cout << "Starting training...\n"; - - if (runCollaboratorTraining(fullCommandToRun + " " + temp_args) != 0) - { - std::cerr << "Couldn't complete the training task, please email admin@fets.ai\n"; - return EXIT_FAILURE; - } - - } // end of trainingRequested check - - std::cout << "Finished.\n"; - - return EXIT_SUCCESS; -} - - diff --git a/src/applications/FullProcessingPipeline.cxx b/src/applications/FullProcessingPipeline.cxx deleted file mode 100644 index eee54fbc..00000000 --- a/src/applications/FullProcessingPipeline.cxx +++ /dev/null @@ -1,102 +0,0 @@ -#include -#include -#include -#include - -#include "cbicaCmdParser.h" -#include "cbicaUtilities.h" -#include "CaPTkGUIUtils.h" -#include "cbicaITKImageInfo.h" -#include "cbicaITKSafeImageIO.h" - - -int main(int argc, char** argv) -{ - cbica::CmdParser parser(argc, argv, "FullProcessingPipeline"); - - auto hardcodedNativeModelWeightPath = getCaPTkDataDir() + "/fets"; - auto allArchs = cbica::subdirectoriesInDirectory(hardcodedNativeModelWeightPath); - std::string allArchsString; - for (size_t i = 0; i < allArchs.size(); i++) - { - allArchsString += allArchs[i] + ","; - } - allArchsString.pop_back(); - - std::string modelName, loggingDir, archs = "3dresunet", fusionMethod = "STAPLE"; - - parser.addApplicationDescription("This application calls the BraTSPipeline for all input images and stores the final and intermediate files separately"); - parser.addRequiredParameter("i", "inputCSV", cbica::Parameter::FILE, "Input CSV file", "Input CSV file which contains paths to structural images", "Headers should be 'PatientID,T1,T1GD,T2,T2FLAIR'"); - parser.addRequiredParameter("o", "outputDir", cbica::Parameter::DIRECTORY, "Directory", "Output directory for final output", "This will write 2 folders: 'DataForFeTS' and 'DataForQC'", - "Former contains only the files needed for FeTS inference/training and ", "latter contains all intermediate files from this processing"); - parser.addOptionalParameter("a", "archs", cbica::Parameter::STRING, allArchsString, "The architecture(s) to infer/train on", "Only a single architecture is supported for training", "Comma-separated values for multiple options", "Defaults to: " + archs); - parser.addOptionalParameter("lF", "labelFuse", cbica::Parameter::STRING, "STAPLE,ITKVoting,SIMPLE,MajorityVoting", "The label fusion strategy to follow for multi-arch inference", "Comma-separated values for multiple options", "Defaults to: " + fusionMethod); - parser.addOptionalParameter("g", "gpu", cbica::Parameter::BOOLEAN, "0-1", "Whether to run the process on GPU or not", "Defaults to '0'"); - parser.addOptionalParameter("L", "LoggingDir", cbica::Parameter::DIRECTORY, "Dir with write access", "Location of logging directory"); - - auto preparedataset_path = getApplicationPath("PrepareDataset"); - - std::string inputCSV, outputDir, gpu_request = "0"; - - // parser CLI parameters - parser.getParameterValue("i", inputCSV); - parser.getParameterValue("o", outputDir); - - if (parser.isPresent("L")) - { - parser.getParameterValue("L", loggingDir); - } - else - { - loggingDir = cbica::createTemporaryDirectory() + "/logs"; - std::cout << "Using the following directory as logging directory: " << loggingDir << "\n"; - cbica::createDir(loggingDir); - } - - if (parser.isPresent("a")) - { - parser.getParameterValue("a", archs); - } - else - { - std::cerr << "Please specify at least 2 architectures on which to perform inference.\n"; - } - if (parser.isPresent("lF")) - { - parser.getParameterValue("lF", fusionMethod); - } - if (parser.isPresent("g")) - { - parser.getParameterValue("g", gpu_request); - } - - // run preparedataset - auto full_command = preparedataset_path + " -i " + inputCSV + " -o " + outputDir; - if (std::system(full_command.c_str()) != 0) - { - // in case of failure, try to run the python version - auto hardcodedPythonPath = cbica::getExecutablePath() + "/OpenFederatedLearning/venv/bin/python"; // this needs to change for Windows (wonder what happens for macOS?) - - preparedataset_path = cbica::stringReplace(preparedataset_path, ".exe", ""); - preparedataset_path += ".py"; - full_command = hardcodedPythonPath + " " + preparedataset_path + " -i " + inputCSV + " -o " + outputDir; - if (std::system(full_command.c_str()) != 0) - { - std::cerr << "There was an issue running PrepareDataset, contact 'software@cbica.upenn.edu' for troubleshooting.\n"; - return EXIT_FAILURE; - } - } - - // if it doesn't exit, run the fets_cli for inference - auto fets_cli_path = getApplicationPath("FeTS_CLI"); - full_command = fets_cli_path + " -t 0 -d " + outputDir + "/DataForFeTS" + " -a " + archs + " -lF " + fusionMethod + " -g " + gpu_request; - if (std::system(full_command.c_str()) != 0) - { - std::cerr << "There was an issue running FeTS_CLI, contact 'software@cbica.upenn.edu' for troubleshooting.\n"; - return EXIT_FAILURE; - } - - std::cout << "Successfully finished.\n"; - - return EXIT_SUCCESS; -} \ No newline at end of file diff --git a/src/applications/Phase2_IntensityCheck.py b/src/applications/Phase2_IntensityCheck.py deleted file mode 100644 index 3cd2d134..00000000 --- a/src/applications/Phase2_IntensityCheck.py +++ /dev/null @@ -1,252 +0,0 @@ -import os, argparse, sys, platform -from copy import deepcopy -from datetime import date -import SimpleITK as sitk -import numpy as np - -from skimage.measure import label - -max_negative_count_threshold = 5000 # the threshold above which an error is displayed, otherwise, the intensities are scaled - -def read_image_with_min_check(filename): - ''' - this function fixes negatives by scaling - if min(input) < 0: - for all x in image: - if x != 0: - x -= min - ''' - input_image = sitk.ReadImage(filename) - input_image_array = sitk.GetArrayFromImage(input_image) - min = np.min(input_image_array) - - # fixme: apply following logic - # check for connected components with less than 100 voxels - ## if less than the threshold, then apply above logic to the negative voxels - ## else, give error to user for manual QC - if min < 0: - blobs = input_image_array < 0 - all_labels_nonZero = np.nonzero(label(blobs)) - _, counts = np.unique(all_labels_nonZero, return_counts=True) - - if np.max(counts) < max_negative_count_threshold: - output_array = deepcopy(input_image_array) - mask = output_array != 0 - output_array[mask] = output_array[mask]-min - output_image = sitk.GetImageFromArray(output_array) - output_image.CopyInformation(input_image) - return output_image, None - else: - return input_image, str(counts) - - return input_image, None - -def imageSanityCheck(targetImageFile, inputImageFile) -> bool: - ''' - This function does sanity checking of 2 images - ''' - targetImage = sitk.ReadImage(targetImageFile) - inputImage = sitk.ReadImage(inputImageFile) - - size = targetImage.GetSize() - size_expected = np.array([240,240,155]) - if not(np.array_equal(size, size_expected)): - print('Size for target image, \'' + targetImageFile + '\' is not in the BraTS format', size_expected, file = sys.stderr) - return False - - if targetImage.GetDimension() != 3: - print('Dimension for target image, \'' + targetImageFile + '\' is not 3', file = sys.stderr) - return False - - if inputImage.GetDimension() != 3: - print('Dimension for input image, \'' + inputImageFile + '\' is not 3', file = sys.stderr) - return False - - commonMessage = ' mismatch for target image, \'' + targetImageFile + '\' and input image, \'' + inputImageFile + '\'' - problemsIn = '' - returnTrue = True - - if targetImage.GetSize() != inputImage.GetSize(): - if not problemsIn: - problemsIn += 'Size' - else: - problemsIn += ', Size' - returnTrue = False - - if targetImage.GetOrigin() != inputImage.GetOrigin(): - if not problemsIn: - problemsIn += 'Origin' - else: - problemsIn += ', Origin' - returnTrue = False - - if targetImage.GetSpacing() != inputImage.GetSpacing(): - if not problemsIn: - problemsIn += 'Spacing' - else: - problemsIn += ', Spacing' - returnTrue = False - - if returnTrue: - return True - else: - print(problemsIn + commonMessage, file = sys.stderr) - return False - -def checkBraTSLabels(subject_id, currentLabelFile, label_values_expected = np.array([0,1,2,4])) -> str: - ''' - This function checks for the expected labels and returns a string that will be provided as output for user - ''' - returnString = '' - mask_array = sitk.GetArrayFromImage(sitk.ReadImage(currentLabelFile)) - unique, counts = np.unique(mask_array, return_counts=True) # get unique elements and their counts - if not(np.array_equal(unique,label_values_expected)): # this is for the case where the label contains numbers other than 0,1,2,4 - for j in range(0,len(unique)): # iterate over a range to get counts easier - if not(unique[j] in label_values_expected): - if counts[j] > 1000: # threshold for mis-labelling, anything less is ignored - returnString += subject_id + ',' + currentLabelFile + ',' + str(unique[j]) + ',' + str(counts[j]) + '\n' - - return returnString - -def fixForLabelThree(currentLabelFile): - ''' - This function checks for the label '3' and changes it to '4' and save it in the same location - ''' - base_image = sitk.ReadImage(currentLabelFile) - mask_array = sitk.GetArrayFromImage(sitk.ReadImage(currentLabelFile)) - unique = np.sort(np.unique(mask_array)) - if unique[-1] == 3: - mask_array[mask_array == 3] = 4 - image_to_write = sitk.GetImageFromArray(mask_array) - image_to_write.CopyInformation(base_image) - sitk.WriteImage(image_to_write, currentLabelFile) - -def main(): - copyrightMessage = 'Contact: software@cbica.upenn.edu/n/n' + 'This program is NOT FDA/CE approved and NOT intended for clinical use./nCopyright (c) ' + str(date.today().year) + ' University of Pennsylvania. All rights reserved.' - parser = argparse.ArgumentParser(prog='SanityCheck', formatter_class=argparse.RawTextHelpFormatter, description = 'This application performs rudimentary sanity checks the input data folder for FeTS training./n/n' + copyrightMessage) - parser.add_argument('-inputDir', type=str, help = 'The input directory (DataForFeTS) that needs to be checked', required=True) - parser.add_argument('-outputFile', type=str, help = 'The CSV file of outputs, which is only generated if there are problematic cases', required=True) - - args = parser.parse_args() - inputDir = args.inputDir - - if not os.path.isdir(inputDir): - sys.exit( - 'The specified inputDir is not present, please try again') - - errorMessage = 'Subject_ID,Recommendation_for_initial_annotations\n' - numberOfProblematicCases = 0 - - # initialize modality dict - files_to_check = { - 'T1': '_t1.nii.gz', - 'T1CE': '_t1ce.nii.gz', - 'T2': '_t2.nii.gz', - 'FL': '_flair.nii.gz', - 'MASK': '_final_seg.nii.gz' - } - - label_values_expected = np.array([0,1,2,4]) # initialize label array - - for dirs in os.listdir(inputDir): - if os.path.isdir(inputDir + '/' + dirs): - if (dirs != 'logs') and (dirs != 'split_info'): # don't perform sanity check for the 'logs' folder - currentSubjectDir = os.path.join(inputDir, dirs) - if os.path.isdir(currentSubjectDir): # for detected subject dir - filesInDir = os.listdir(currentSubjectDir) # get all files in each directory - files_for_subject = {} - for i in range(len(filesInDir)): - for modality in files_to_check: # check all modalities - if filesInDir[i].endswith(files_to_check[modality]): # if modality detected, populate subject dict - files_for_subject[modality] = os.path.abspath(os.path.join(currentSubjectDir, filesInDir[i])) - - currentSubjectsLabelIsAbsent = False # check if current subject's final_seg is present or not - all_modalities_present = True - if len(files_for_subject) != 5: # if all modalities are not present, add exit statement - if ((len(files_for_subject) == 4) and ('MASK' in files_for_subject)) or (len(files_for_subject) < 4): - numberOfProblematicCases += 1 - errorMessage += dirs + ',All_required_modalities_are_not_present.\n' - all_modalities_present = False - - # check for negatives - for mod in files_for_subject: - _, count = read_image_with_min_check(files_for_subject[mod]) - if count is not None: - errorMessage += dirs + ',Negatives_in_' + mod + '_Count=' + count - - if all_modalities_present and len(files_for_subject) > 0: - first, *rest = files_for_subject.items() # split the dict - - for i in range(0, len(rest)): - if not(imageSanityCheck(first[1], rest[i][1])): # image sanity check - numberOfProblematicCases += 1 - errorMessage += dirs + ',Image_dimension/size/origin/spacing_mismatch_between_' + first[0] + '_and_' + rest[i][0] + '\n' - - currentSubjectsLabelIsProblematic = False # check if current subject's label has issues - if 'MASK' in files_for_subject: - currentLabelFile = files_for_subject['MASK'] - fixForLabelThree(currentLabelFile) - returnString = checkBraTSLabels(dirs, currentLabelFile) - if returnString: # if there is something present in the return string - numberOfProblematicCases += 1 - currentSubjectsLabelIsProblematic = True - errorMessage += returnString - else: - currentSubjectsLabelIsAbsent = True - - fusionToRecommend = '' - segmentationsForQCPresent = True - problematicSegmentationMessage = '' - if currentSubjectsLabelIsProblematic or currentSubjectsLabelIsAbsent: # if final_seg is absent or is problematic - segmentationsFolder = os.path.join(currentSubjectDir, 'SegmentationsForQC') - if os.path.isdir(segmentationsFolder): - segmentationFiles = os.listdir(segmentationsFolder) # get all files in each directory - for i in range(len(segmentationFiles)): - if 'fused' in segmentationFiles[i]: # only perform checks for fusion results - currentLabelFile = os.path.join(segmentationsFolder, segmentationFiles[i]) - returnString = checkBraTSLabels(dirs, currentLabelFile) - if returnString: # if there is something present in the return string - problematicSegmentationMessage += returnString - else: - if not('staple' in fusionToRecommend): # overwrite the fusion result to recommend if not staple that was fine - fusionToRecommend = currentLabelFile - - if not fusionToRecommend: - errorMessage += problematicSegmentationMessage - if not('staple' in fusionToRecommend): # recommend nnunet or deepscan if not staple - if not('itkvoting' in fusionToRecommend): - if not('majorityvoting' in fusionToRecommend): - fusionToRecommend = 'nnunet_or_deepscan' - else: - fusionToRecommend = 'majorityvoting' - else: - fusionToRecommend = 'itkvoting' - else: - fusionToRecommend = 'staple' - - else: - errorMessage += dirs + ',SegmentationsForQC_folder_is_absent\n' - numberOfProblematicCases += 1 - segmentationsForQCPresent = False - # errorMessage += dirs + ',Label_file_absent,N.A.,N.A.\n' - - if currentSubjectsLabelIsAbsent and segmentationsForQCPresent: - numberOfProblematicCases += 1 - if fusionToRecommend: - errorMessage += dirs + ',' + fusionToRecommend + '\n' - else: - errorMessage += dirs + ',final_seg_absent_and_use_either_nnunet_or_deepscan,N.A.,N.A.\n' - - if numberOfProblematicCases > 0: - # print(errorMessage) - with open(args.outputFile, 'a') as the_file: - the_file.write(errorMessage) - sys.exit('There were subjects with either missing annotations or where annotations had problematic labels. Please see the recommendation(s) for new initialization in the outputFile: \'' + args.outputFile + '\'') - else: - print('Congratulations, all subjects are fine and ready to train!') - -if __name__ == '__main__': - if platform.system() == 'Darwin': - sys.exit('macOS is not supported') - else: - main() diff --git a/src/applications/PrepareDataset.cxx b/src/applications/PrepareDataset.cxx deleted file mode 100644 index 6a1e9205..00000000 --- a/src/applications/PrepareDataset.cxx +++ /dev/null @@ -1,598 +0,0 @@ -#include -#include -#include -#include - -#include "cbicaCmdParser.h" -#include "cbicaUtilities.h" -#include "CaPTkGUIUtils.h" -#include "cbicaITKImageInfo.h" -#include "cbicaITKSafeImageIO.h" - -#include "BiasCorrection.hpp" - -std::vector< std::map< std::string, std::string > > GetCSVContents(const std::string &fileName) -{ - std::vector< std::map< std::string, std::string > > csvContents; - std::ifstream data(fileName.c_str()); - std::string line, cell; - std::vector< std::string > headers; // csv headers - - size_t i = 0, j = 0; - while (std::getline(data, line)) - { - j = 0; - std::map< std::string, std::string > currentRow; - std::stringstream lineStream(line); - while (std::getline(lineStream, cell, ',')) - { - if (i == 0) - { - auto temp = cell; - std::transform(temp.begin(), temp.end(), temp.begin(), ::tolower); - temp = cbica::stringReplace(temp, "-", ""); // remove all hyphens - temp = cbica::stringReplace(temp, "_", ""); // remove all underscores - - if ((temp == "patientid") || (temp == "subjectid") || (temp == "subject") || (temp == "subid")) - { - headers.push_back("ID"); - } - else if ((temp == "t1gd") || (temp == "t1ce") || (temp == "t1post")) - { - headers.push_back("T1GD"); - } - else if ((temp == "t1") || (temp == "t1pre")) - { - headers.push_back("T1"); - } - else if (temp == "t2") - { - headers.push_back("T2"); - } - else if ((temp == "t2flair") || (temp == "flair") || (temp == "fl") || (temp.find("fl") != std::string::npos) || (temp.find("t2fl") != std::string::npos)) - { - headers.push_back("FLAIR"); - } - } - else - { - if (headers.size() != 5) - { - std::cerr << "All required headers were not found in CSV. Please ensure the following are present: 'PatientID,T1,T1GD,T2,T2FLAIR'"; - return csvContents; - } - if (cell.find(" ") != std::string::npos) - { - std::cerr << "Please ensure that there are no spaces in the file paths."; - return csvContents; - } - //auto temp = cbica::stringReplace(cell, "\"", ""); // remove all double quotes - //temp = cbica::stringReplace(temp, "'", ""); // remove all single quotes - currentRow[headers[j]] = cell; - } - j++; - } - if (i != 0) - { - csvContents.push_back(currentRow); - } - i++; - } - - return csvContents; -} - -std::string getStdoutFromCommand(const std::string command) -{ - std::array buffer; - std::string result; -#ifdef WIN32 -#define PCLOSE _pclose -#define POPEN _popen -#else -#define PCLOSE pclose -#define POPEN popen -#endif - std::unique_ptr pipe(POPEN(command.c_str(), "r"), PCLOSE); - if (!pipe) - { - throw std::runtime_error("popen() failed!"); - } - while (fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr) - { - result += buffer.data(); - } - return result; -} - -bool BraTSPipeline(std::map< std::string, std::string > inputFiles, const std::string& outputDir) -{ - auto debug = false; - // sanity checks - for (auto it = inputFiles.begin(); it != inputFiles.end(); it++) - { - if (!cbica::exists(it->second)) - { - std::cerr << "Couldn't find the modality '" << it->first << "', denoted by '" << it->second << "'.\n"; - return false; - } - - auto inputImageInfo = cbica::ImageInfo(it->second); - if (inputImageInfo.GetImageDimensions() != 3) - { - std::cerr << "The BraTS pipeline is only valid for 3D images, whereas the image '" - << it->second << "' for modality '" << it->first << "' has " << - inputImageInfo.GetImageDimensions() << " dimentions.\n"; - return false; - } - } - using ImageType = itk::Image< float, 3 >; // default image type - - // variables to store various images - std::map< std::string, ImageType::Pointer > inputImages, inputImages_processed; - std::map< std::string, std::string > inputModalities_orientation; - - // default names - std::map< std::string, std::string > outputNames, - inputReorientedFiles, inputReorientedBiasFiles, // filenames for reoriented and bias-corrected files - outputMatFiles, outputRegisteredImages, outputRegisteredMaskedImages; // filenames for matrices and images - - if (debug) - { - std::cout << "Reading input images.\n"; - } - - // construct path to dcm2niix for debug/release modes and different OS - std::string m_exe; -#ifdef CAPTK_PACKAGE_PROJECT -#if WIN32 - m_exe = cbica::getExecutablePath() + "/dcm2niix.exe"; -#else - m_exe = cbica::getExecutablePath() + "/dcm2niix"; -#endif -#else -#if WIN32 - m_exe = std::string(PROJECT_SOURCE_DIR) + "/src/applications/individualApps/dcm2niix/dcm2niix.exe"; -#else - m_exe = std::string(PROJECT_SOURCE_DIR) + "/src/applications/individualApps/dcm2niix/dcm2niix"; -#endif -#endif - - for (auto it = inputFiles.begin(); it != inputFiles.end(); it++) - { - auto modality = it->first; - /// [1] read image - DICOM to NIfTI conversion, if applicable - inputImages[modality] = cbica::ReadImage< ImageType >(it->second); - - if (inputImages[modality].IsNull() && cbica::IsDicom(it->second)) - { - auto dicomFolderPath = cbica::getFilenamePath(it->second); - - if (!cbica::isFile(m_exe)) - { - std::cerr << "Couldn't find the dcm2niix executable, which was expected in '" << cbica::normalizePath(m_exe) << "'.\n"; - return false; - } - //else - //{ - - //} - - auto tempOutput = cbica::createTemporaryDirectory(); - //construct command - std::string fullCommandToRun = cbica::normPath(m_exe) + " -o " + cbica::normPath(tempOutput) + " -z y \"" + cbica::normPath(dicomFolderPath) + "\""; - - //run command via system call - if (std::system((fullCommandToRun).c_str()) != 0) - { - std::cerr << "Something went wrong during dicom to nifti conversion, please re-try or contact sofware@cbica.upenn.edu.\n"; - return false; - } - - auto filesInDir = cbica::filesInDirectory(tempOutput); - for (size_t i = 0; i < filesInDir.size(); i++) - { - if (cbica::getFilenameExtension(filesInDir[i]) == ".nii.gz") - { - inputImages[modality] = cbica::ReadImage< ImageType >(filesInDir[i]); - break; - } - } - cbica::removeDirectoryRecursively(tempOutput, true); - } - - if (inputImages[modality].IsNotNull()) - { - auto fileToWrite = outputDir + "/raw_" + modality + ".nii.gz"; - cbica::WriteImage< ImageType >(inputImages[modality], fileToWrite); - } - else - { - if (cbica::IsDicom(it->second)) - { - std::cerr << "Something went wrong with the DICOM to NIfTI conversion for modality '" << - modality << "' with filename '" << it->second << "'" - << ", please use another package to conver to NIfTI and try again.\n"; - return false; - } - else - { - std::cerr << "Something went wrong with reading the raw input image, please re-try or contact sofware@cbica.upenn.edu.\n"; - return false; - } - } - - /// [2] LPS/RAI re-orientation - if (debug) - { - std::cout << "Performing re-orientation to LPS for modality '" << modality << "'.\n"; - } - - auto temp = cbica::GetImageOrientation(inputImages[modality], "RAI"); - inputModalities_orientation[modality] = temp.first; - inputImages_processed[modality] = temp.second; - if (inputImages_processed[modality].IsNull()) - { - std::cerr << "Something went wrong with re-orienting the input image, please re-try or contact sofware@cbica.upenn.edu.\n"; - return false; - } - else - { - inputReorientedFiles[modality] = outputDir + "/raw_rai_" + modality + ".nii.gz"; - // the re-oriented images need to be written because these are passed on to greedy - cbica::WriteImage< ImageType >(inputImages_processed[modality], inputReorientedFiles[modality]); - } - - /// [3] N4 bias correction - - if (debug) - { - std::cout << "Starting bias correction for modality '" << modality << "'.\n"; - } - - // the bias-corrected images need to be written because these are passed on to greedy - inputReorientedBiasFiles[modality] = outputDir + "/raw_rai_n4_" + modality + ".nii.gz"; - - if (!cbica::fileExists(inputReorientedBiasFiles[modality])) - { - BiasCorrection biasCorrector; - { - using MaskImageType = itk::Image; - typename MaskImageType::Pointer maskImage; // mask inits to null - auto outputImage = biasCorrector.Run("n4", - inputImages_processed[modality], - maskImage, - BiasCorrection::default_splineOrder, - BiasCorrection::default_maxIterations, - BiasCorrection::default_fittingLevels, - BiasCorrection::default_filterNoise, - BiasCorrection::default_fwhm, - BiasCorrection::default_otsuBins); - if (outputImage.IsNotNull()) - { - inputImages_processed[modality] = outputImage; - inputImages_processed[modality]->DisconnectPipeline(); - } - else - { - std::cerr << "Something went wrong with bias-correcting the re-oriented image, please re-try or contact sofware@cbica.upenn.edu.\n"; - return false; - } - } - cbica::WriteImage< ImageType >(inputImages_processed[modality], inputReorientedBiasFiles[modality]); - } - - if (modality != "T1CE") - { - outputNames[modality] = modality + "_to_T1CE"; // all output names can be controlled from here - } - else - { - outputNames[modality] = modality + "_to_SRI"; // all output names can be controlled from here - } - } // end inputFiles iterator - - /// [4] Registration using Greedy - // we do T1CE to Atlas registration first because other registrations are dependent on this - if (debug) - { - std::cout << "Registering T1CE to SRI atlas.\n"; - } - - auto greedyPathAndDim = getApplicationPath("greedy") + " -d 3"; - - auto captkDataDir = getCaPTkDataDir(); - auto atlasImage = captkDataDir + "/sri24/atlasImage.nii.gz"; - outputMatFiles["T1CE"] = outputDir + "/" + outputNames["T1CE"] + ".mat"; - outputRegisteredImages["T1CE"] = outputDir + "/" + outputNames["T1CE"] + ".nii.gz"; - outputRegisteredMaskedImages["T1CE"] = outputDir + "/brain_T1CE.nii.gz"; - - std::string fullCommand; - - if (!cbica::exists(outputMatFiles["T1CE"])) - { - fullCommand = " -a -m NMI -i " + atlasImage + " " + inputReorientedBiasFiles["T1CE"] - + " -o " + outputMatFiles["T1CE"] + " -ia-image-centers -n 100x50x10 -dof 6"; - - if (debug) - { - std::cout << "Greedy command: " << greedyPathAndDim + fullCommand << "\n"; - } - if (std::system((greedyPathAndDim + fullCommand).c_str()) != 0) - { - std::cerr << "Something went wrong when registering T1CE image to SRI atlas, please re-try or contact sofware@cbica.upenn.edu.\n"; - return false; - } - } // end outputMatFiles["T1CE"] check - - if (!cbica::exists(outputRegisteredImages["T1CE"])) - { - fullCommand = " -rf " + atlasImage + " -ri LINEAR -rm " + - inputReorientedFiles["T1CE"] + " " + outputRegisteredImages["T1CE"] + " -r " + - outputMatFiles["T1CE"]; - - if (debug) - { - std::cout << "Greedy command: " << greedyPathAndDim + fullCommand << "\n"; - } - - if (std::system((greedyPathAndDim + fullCommand).c_str()) != 0) - { - std::cerr << "Something went wrong when applying registration matrix to generate T1CE image in SRI atlas space, please re-try or contact sofware@cbica.upenn.edu.\n"; - return false; - } - } // end outputRegisteredImages["T1CE"] check - - for (auto it = inputFiles.begin(); it != inputFiles.end(); it++) - { - auto modality = it->first; - if (modality != "T1CE") // T1CE registration has happened before - { - outputMatFiles[modality] = outputDir + "/" + outputNames[modality] + ".mat"; - outputRegisteredImages[modality] = outputDir + "/" + modality + "_to_SRI.nii.gz"; - outputRegisteredMaskedImages[modality] = outputDir + "/brain_" + modality + ".nii.gz"; - - if (!cbica::exists(outputMatFiles[modality])) - { - // we use the bias-corrected image for registration as it is easier localize transformations - fullCommand = " -a -m NMI -i " + inputReorientedBiasFiles["T1CE"] + " " + inputReorientedBiasFiles[modality] - + " -o " + outputMatFiles[modality] + " -ia-image-centers -n 100x50x10 -dof 6"; - if (debug) - { - std::cout << "Registering " << modality << " to T1CE.\n"; - std::cout << "Greedy command: " << greedyPathAndDim + fullCommand << "\n"; - } - - if (std::system((greedyPathAndDim + fullCommand).c_str()) != 0) - { - std::cerr << "Something went wrong when registering " << modality - << "to T1CE image, please re-try or contact sofware@cbica.upenn.edu.\n"; - return false; - } - } // end outputMatFiles[modality] check - - if (debug) - { - std::cout << "Generating image for " << modality << " registered to the atlas.\n"; - std::cout << "Greedy command: " << greedyPathAndDim + fullCommand << "\n"; - } - - if (!cbica::exists(outputRegisteredImages[modality])) - { - // the final registration is applied on the original image after re-orientation (not bias-corrected) to - // ensure maximum fidelity with original image - fullCommand = " -rf " + atlasImage + " -ri LINEAR -rm " + inputReorientedFiles[modality] + " " + - outputRegisteredImages[modality] + " -r " - + outputMatFiles["T1CE"] + " " - + outputMatFiles[modality]; - - if (std::system((greedyPathAndDim + fullCommand).c_str()) != 0) - { - std::cerr << "Something went wrong when applying registration matrix to generate " << modality << " image in SRI atlas space, please re-try or contact sofware@cbica.upenn.edu.\n"; - return false; - } - } // end outputRegisteredImages[modality] check - } // end modality check - } // end modality loop - - // variables that are used later on - auto finalBrainMask = cbica::normalizePath(outputDir + "/brainMask_SRI.nii.gz"); - auto deepMedicExe = getApplicationPath("DeepMedic"); - auto brainMaskFile = outputDir + "/dmOut_skull/brainMask_SRI.nii.gz"; - - /// [5] Skull-stripping using DeepMedic - if (debug) - { - std::cout << "Starting skull-stripping using DeepMedic.\n"; - } - - if (!cbica::exists(brainMaskFile)) - { - fullCommand = " -md " + captkDataDir + "/deepMedic/saved_models/skullStripping/ " + - "-i " + outputRegisteredImages["T1"] + "," + - outputRegisteredImages["T1CE"] + "," + - outputRegisteredImages["T2"] + "," + - outputRegisteredImages["FL"] + " -o " + - brainMaskFile; - - if (debug) - { - std::cout << "Command for DeepMedic: " << deepMedicExe + fullCommand << "\n"; - } - - if (std::system((deepMedicExe + fullCommand).c_str()) != 0) - { - std::cerr << "Something went wrong when performing skull-stripping using DeepMedic, please re-try or contact sofware@cbica.upenn.edu.\n"; - return false; - } - } // end brainMask check - - if (!cbica::exists(brainMaskFile)) - { - std::cerr << "Brain Mask was not written, cannot proceed.\n"; - return false; - } - - // variables to store outputs in patient space - std::map< std::string, std::string > outputFiles_withoutOrientationFix, outputFiles_withOrientationFix; - - cbica::WriteImage< TImageType >( - cbica::ReadImage< TImageType >(brainMaskFile), - finalBrainMask - ); - - // iterate over outputRegisteredMaskedImages - for (auto it = outputRegisteredMaskedImages.begin(); it != outputRegisteredMaskedImages.end(); it++) - { - auto modality = it->first; - auto maskFilter = itk::MaskImageFilter< ImageType, ImageType >::New(); - maskFilter->SetInput(cbica::ReadImage< ImageType >(outputRegisteredImages[modality])); - maskFilter->SetMaskImage(cbica::ReadImage< TImageType >(finalBrainMask)); - try - { - maskFilter->Update(); - } - catch (const std::exception& e) - { - std::cerr << "Something went wrong when applying the brain mask to modality '" - << modality << "': " << e.what(); - return false; - } - cbica::WriteImage< ImageType >(maskFilter->GetOutput(), it->second); // write the masked image - } - -} - -bool copyFilesToCorrectLocation(const std::string& interimOutputDir, const std::string& finalSubjectOutputDir, const std::string& subjectID) -{ - bool runBratsPipeline = false; - - auto fileToCopy = interimOutputDir + "/brain_T1CE.nii.gz"; - auto fileDestination = finalSubjectOutputDir + "/" + subjectID + "_brain_t1ce.nii.gz"; - - if (!cbica::isFile(fileDestination)) - { - if (cbica::isFile(fileToCopy)) - { - cbica::copyFile(fileToCopy, fileDestination); - } - else - { - fileToCopy = interimOutputDir + "/brain_T1GD.nii.gz"; - if (cbica::isFile(fileToCopy)) - { - cbica::copyFile(fileToCopy, fileDestination); - } - else - { - runBratsPipeline = true; - } - } - } - - fileToCopy = interimOutputDir + "/brain_T1.nii.gz"; - fileDestination = finalSubjectOutputDir + "/" + subjectID + "_brain_t1.nii.gz"; - if (!cbica::isFile(fileDestination)) - { - if (cbica::isFile(fileToCopy)) - { - cbica::copyFile(fileToCopy, fileDestination); - } - else - { - runBratsPipeline = true; - } - } - - fileToCopy = interimOutputDir + "/brain_T2.nii.gz"; - fileDestination = finalSubjectOutputDir + "/" + subjectID + "_brain_t2.nii.gz"; - if (cbica::isFile(fileToCopy)) - { - cbica::copyFile(fileToCopy, fileDestination); - } - else - { - runBratsPipeline = true; - } - - fileToCopy = interimOutputDir + "/brain_FL.nii.gz"; - fileDestination = finalSubjectOutputDir + "/" + subjectID + "_brain_flair.nii.gz"; - if (cbica::isFile(fileToCopy)) - { - cbica::copyFile(fileToCopy, fileDestination); - } - else - { - runBratsPipeline = true; - } - - return runBratsPipeline; -} - -int main(int argc, char** argv) -{ - cbica::CmdParser parser(argc, argv, "PrepareDataset"); - - parser.addApplicationDescription("This application calls the BraTSPipeline for all input images and stores the final and intermediate files separately"); - parser.addRequiredParameter("i", "inputCSV", cbica::Parameter::FILE, "Input CSV file", "Input CSV file which contains paths to structural images", "Headers should be 'PatientID,T1,T1GD,T2,T2FLAIR'"); - parser.addRequiredParameter("o", "outputDir", cbica::Parameter::DIRECTORY, "Directory", "Output directory for final output", "This will write 2 folders: 'DataForFeTS' and 'DataForQC'", - "Former contains only the files needed for FeTS inference/training and ", "latter contains all intermediate files from this processing"); - - std::string inputCSV, outputDir; - - parser.getParameterValue("i", inputCSV); - parser.getParameterValue("o", outputDir); - - auto csvContents = GetCSVContents(inputCSV); - - if (csvContents.empty()) - { - std::cerr << "Parsed CSV data structure is empty, cannot proceed.\n"; - return EXIT_FAILURE; - } - - // set up the output directories - auto outputDir_qc = cbica::normPath(outputDir + "/DataForQC"); - auto outputDir_final = cbica::normPath(outputDir + "/DataForFeTS"); - cbica::createDir(outputDir); - cbica::createDir(outputDir_qc); - cbica::createDir(outputDir_final); - - auto bratsPipeline_exe = getApplicationPath("BraTSPipeline"); - - if (!cbica::isFile(bratsPipeline_exe)) - { - std::cerr << "BraTSPipeline was not found in the installation, cannot proceed.\n"; - return EXIT_FAILURE; - } - // iterate through all subjects - for (size_t i = 0; i < csvContents.size(); i++) - { - std::cout << "Started processing subject '" << csvContents[i]["ID"] << "'\n"; - - auto interimOutputDir = outputDir_qc + "/" + csvContents[i]["ID"]; - auto finalSubjectOutputDir = outputDir_final + "/" + csvContents[i]["ID"]; - cbica::createDir(interimOutputDir); - cbica::createDir(finalSubjectOutputDir); - - auto runBratsPipeline = copyFilesToCorrectLocation(interimOutputDir, finalSubjectOutputDir, csvContents[i]["ID"]); - - if (runBratsPipeline) - { - auto command = bratsPipeline_exe + " -t1 " + csvContents[i]["T1"] + " -t1c " + csvContents[i]["T1GD"] + " -t2 " + csvContents[i]["T2"] + " -fl " + csvContents[i]["FLAIR"] + " -o " + interimOutputDir + " -s 1"; - - auto log = getStdoutFromCommand(command); - std::ofstream myfile; - myfile.open(interimOutputDir + "/log.txt"); - myfile << log; - myfile.close(); - } - - if (!copyFilesToCorrectLocation(interimOutputDir, finalSubjectOutputDir, csvContents[i]["ID"])) - { - std::cerr <<"BraTSPipeline failed for subject '" << csvContents[i]["ID"] << "'\n"; - } - } - - return EXIT_SUCCESS; -} \ No newline at end of file diff --git a/src/applications/PrepareDataset.py b/src/applications/PrepareDataset.py index 40b9079c..2edef449 100644 --- a/src/applications/PrepareDataset.py +++ b/src/applications/PrepareDataset.py @@ -1,136 +1,859 @@ -import os, argparse, sys, csv, platform, subprocess, shutil +import os, argparse, sys, csv, platform, subprocess, shutil, posixpath, yaml +from typing import Union from pathlib import Path from datetime import date +import pandas as pd +import SimpleITK as sitk +from tqdm import tqdm +import numpy as np +from skimage.measure import label +from copy import deepcopy -def GetCSVContents(filename): - ''' - Read filename and return a list of dictionaries that have the csv contents - ''' - with open(filename, 'r') as csvfile: - datareader = csv.reader(csvfile) - - parserHeader = True - headers = [] # save headers - csvContents = [] # csv contents - for row in datareader: - - if parserHeader: # parser headers first - - for col in row: - temp = col.lower() # convert to lower case - if ((temp == 'patientid') or (temp == 'subjectid') or (temp == 'subject') or (temp == 'subid')): - headers.append('ID') - elif ((temp == 't1gd') or (temp == 't1ce') or (temp == 't1post')): - headers.append('T1GD') - elif ((temp == 't1') or (temp == 't1pre')): - headers.append('T1') - elif ((temp == 't2')): - headers.append('T2') - elif ((temp == 't2flair') or (temp == 'flair') or (temp == 'fl') or ('fl' in temp) or ('t2fl' in temp)): - headers.append('FLAIR') - - parserHeader = False - - else: - if len(headers) != 5: - sys.exit('All required headers were not found in CSV. Please ensure the following are present: \'PatientID,T1,T1GD,T2,T2FLAIR\'') - - col_counter = 0 - currentRow = {} - for col in row: # iterate through columns - if ' ' in col: - sys.exit('Please ensure that there are no spaces in the file paths.') - else: - currentRow[headers[col_counter]] = col # populate header with specific identifiers - col_counter += 1 - - csvContents.append(currentRow) # populate csv rows - - return csvContents - -def copyFilesToCorrectLocation(interimOutputDir, finalSubjectOutputDir, subjectID): - ''' - This function copies the intermediate files and final outputs to correct location and if these are absent, returns a bool flag stating that brats pipeline needs to run again - ''' - - # copy files to correct location for inference and training - runBratsPipeline = False - output_t1c_brain_file_inter = os.path.join(interimOutputDir, "brain_T1CE.nii.gz") - output_t1c_brain_file_final = os.path.join(finalSubjectOutputDir, subjectID + "_brain_t1ce.nii.gz") - if not os.path.exists(output_t1c_brain_file_final): - if os.path.exists(output_t1c_brain_file_inter): - shutil.copyfile(output_t1c_brain_file_inter, output_t1c_brain_file_final) - else: - output_t1c_brain_file_inter = os.path.join(interimOutputDir, "brain_T1GD.nii.gz") - if os.path.exists(output_t1c_brain_file_inter): - shutil.copyfile(output_t1c_brain_file_inter, output_t1c_brain_file_final) - else: - runBratsPipeline = True +from FigureGenerator.screenshot_maker import figure_generator +from GANDLF.cli import main_run +from LabelFusion.wrapper import fuse_images +from .constants import * + + +def setup_parser(): + copyrightMessage = ( + "Contact: admin@fets.ai\n\n" + + "This program is NOT FDA/CE approved and NOT intended for clinical use.\nCopyright (c) " + + str(date.today().year) + + " University of Pennsylvania. All rights reserved." + ) + parser = argparse.ArgumentParser( + prog="PrepareDataset", + formatter_class=argparse.RawTextHelpFormatter, + description="This application calls the BraTSPipeline for all input images and stores the final and intermediate files separately.\n\n" + + copyrightMessage, + ) + parser.add_argument( + "-inputCSV", + type=str, + help="The absolute path of the input CSV file containing the list of subjects and their corresponding images", + required=True, + ) + parser.add_argument( + "-outputDir", + type=str, + help="The output dir to write the results", + required=True, + ) + parser.add_argument( + "-executablePath", + type=str, + help="The path to the BraTSPipeline executable. If not given, will infer from current script's location.", + nargs="?", + const=None, + ) + + return parser + + +def _get_relevant_dicom_tags(filename: str) -> dict: + """ + This function reads the relevant DICOM tags from the input DICOM directory. + + Args: + filename (str): The input DICOM filename. + + Returns: + dict: The relevant DICOM tags. + """ + input_dicom_dir = filename + if os.path.isfile(filename): + input_dicom_dir = os.path.dirname(filename) + + output_dict = {} + try: + series_IDs = sitk.ImageSeriesReader.GetGDCMSeriesIDs(input_dicom_dir) + # if len(series_IDs) > 1: + # print( + # f"WARNING: Multiple series IDs detected in {input_dicom_dir}.", + # file=sys.stderr, + # ) + + series_file_names = sitk.ImageSeriesReader.GetGDCMSeriesFileNames( + input_dicom_dir, series_IDs[0] + ) + series_reader = sitk.ImageSeriesReader() + series_reader.SetFileNames(series_file_names) + series_reader.MetaDataDictionaryArrayUpdateOn() + series_reader.LoadPrivateTagsOn() + itk_image = series_reader.Execute() + output_dict = { + "Resolution": str(itk_image.GetSpacing()).replace(" ", ""), + } + # although _technically_ the metadata is different for each slice, we'll just use the first slice's metadata, since the rest is not relevant for our purposes + ## reference: https://simpleitk.readthedocs.io/en/master/link_DicomSeriesReadModifyWrite_docs.html + keys_to_extract = { + "0008|0008": "Image Type", + "0008|0070": "Manufacturer", + "0008|1090": "Manufacturer's Model Name", + "0008|0022": "Acquisition Date", + "0008|0032": "Acquisition Time", + "0010|1010": "Patient's Age", + "0010|0040": "Patient's Sex", + "0018|0020": "Scanning Sequence", + "0018|0021": "Sequence Variant", + "0018|0022": "Scan Options", + "0018|0023": "MR Acquisition Type", + "0018|0080": "Repetition Time", + "0018|0081": "Echo Time", + "0018|0082": "Inversion Time", + "0018|1310": "Acquisition Matrix", + "0018|1314": "Flip Angle", + "0018|0087": "Magnetic Field Strength", + "0018|1050": "Slice Thickness", + "0018|0088": "Spacing Between Slices", + "0020|1002": "Images in Acquisition", + } + for key in keys_to_extract: + output_dict[keys_to_extract[key]] = series_reader.GetMetaData(0, key) + except RuntimeError as e: + # print( + # f"WARNING: Could not read DICOM tags from {input_dicom_dir}.", + # ) + pass + + return output_dict + + +def save_screenshot( + input_images: dict, output_filename: str = None, input_mask: str = None +) -> None: + """ + This function saves the screenshot of the input images and mask. + + Args: + input_images (dict): The input multi-modal images. + output_filename (str, optional): The output filename to save the screenshot. Defaults to None. + input_mask (str, optional): The input mask filename. Defaults to None. + """ + # save the screenshot + images = (",").join( + [ + input_images["T1"], + input_images["T1GD"], + input_images["T2"], + input_images["FLAIR"], + ] + ) + ylabels = (",").join(MODALITIES_LIST) + + figure_generator( + input_images=images, + ylabels=ylabels, + output=output_filename, + input_mask=input_mask, + flip_sagittal=True, + flip_coronal=True, + ) + + +def _read_image_with_min_check(filename): + """ + This function fixes negatives by scaling the image according to the following logic: + if min(input) < 0: + for all x in image: + if x != 0: + x -= min + + Args: + filename (str): The input filename. + + Returns: + sitk.Image: The read image. + int: The negative count. + """ + input_image = sitk.ReadImage(filename) + input_image_array = sitk.GetArrayFromImage(input_image) + min = np.min(input_image_array) + + # the threshold above which an error is displayed, otherwise, the intensities are scaled + max_negative_count_threshold = 5000 + + if min < 0: + blobs = input_image_array < 0 + all_labels_nonZero = np.nonzero(label(blobs)) + _, counts = np.unique(all_labels_nonZero, return_counts=True) + + if np.max(counts) < max_negative_count_threshold: + output_array = deepcopy(input_image_array) + mask = output_array != 0 + output_array[mask] = output_array[mask] - min + output_image = sitk.GetImageFromArray(output_array) + output_image.CopyInformation(input_image) + sitk.WriteImage(output_image, filename) + return 0 + else: + return counts.astype(int) + + return 0 + + +def _parse_csv_header(filename): + """ + Read filename and return the parsed headers. + + Args: + filename (str): The input filename. + + Returns: + dict: The parsed headers. + """ + with open(filename, "r") as csvfile: + datareader = csv.reader(csvfile) + + headers = {} # save headers + for row in datareader: + for col in row: + temp = col.lower() # convert to lower case + temp = temp.replace(" ", "") # remove spaces + temp = temp.replace("_", "") # remove underscores + temp = temp.replace("-", "") # remove dashes + if temp in SUBJECT_NAMES: + headers["ID"] = col + elif temp in TIMEPOINT_NAMES: + headers["Timepoint"] = col + else: + for key in MODALITY_ID_DICT.keys(): + if temp in MODALITY_ID_DICT[key]: + headers[key] = col + break + + if "Timepoint" not in headers: + headers["Timepoint"] = None + return headers + + +def _copy_files_to_correct_location(interimOutputDir, finalSubjectOutputDir, subjectID): + """ + This function copies the intermediate files and final outputs to correct location and if these are absent, returns a bool flag stating that brats pipeline needs to run again + + Args: + interimOutputDir (str): The interim output directory. + finalSubjectOutputDir (str): The final subject output directory. + subjectID (str): The subject ID. + + Returns: + bool, dict: The flag stating whether brats pipeline needs to run again and the output files in the expected location. + """ + + # copy files to correct location for inference and training + runBratsPipeline = False + input_files = { + k: posixpath.join(interimOutputDir, v) for k, v in INPUT_FILENAMES.items() + } + expected_outputs = get_expected_outputs(subjectID, finalSubjectOutputDir) + + for key in input_files.keys(): + if not os.path.exists(expected_outputs[key]): + if os.path.exists(input_files[key]): + shutil.copyfile(input_files[key], expected_outputs[key]) + else: + runBratsPipeline = True + + return runBratsPipeline, expected_outputs + + +def get_expected_outputs(subjectID: str, output_dir: str) -> dict: + expected_outputs = { + "ID": subjectID, + "T1": posixpath.join(output_dir, subjectID + "_t1.nii.gz"), + "T1GD": posixpath.join(output_dir, subjectID + "_t1c.nii.gz"), + "T2": posixpath.join(output_dir, subjectID + "_t2w.nii.gz"), + "FLAIR": posixpath.join(output_dir, subjectID + "_t2f.nii.gz"), + } + return expected_outputs + + +def get_brain_mask_files(subject_id, output_dir) -> dict: + files = {} + for modality in MODALITIES_LIST: + files[modality] = posixpath.join( + output_dir, + f"{subject_id}_brain_{MODALITY_ID_MAPPING[modality]}.nii.gz", + ) + return files + + +def _run_brain_extraction_using_gandlf( + subject_id: str, + input_oriented_images: dict, + models_to_infer: Union[str, list], + base_output_dir: str, +) -> sitk.Image: + """ + This function runs brain extraction using gandlf. + + Args: + subject_id (str): The subject ID. + input_oriented_images (dict): The input oriented images. + models_to_infer (Union[str, list]): The models to infer as list or as comma-separated string. + base_output_dir (str): The base output directory. + + Returns: + sitk.Image: The fused brain mask. + """ + df_for_gandlf = pd.DataFrame(columns=GANDLF_DF_COLUMNS) + for key in MODALITIES_LIST: + current_modality = { + "SubjectID": subject_id + "_" + key, + "Channel_0": input_oriented_images[key], + } + df_for_gandlf = pd.concat( + [df_for_gandlf, pd.DataFrame(current_modality, index=[0])] + ) + data_path = posixpath.join(base_output_dir, BRAIN_FILENAME) + df_for_gandlf.to_csv( + data_path, + index=False, + ) + + models_to_run = ( + models_to_infer + if isinstance(models_to_infer, list) + else models_to_infer.split(",") + ) + + images_for_fusion = [] + for model_dir in models_to_run: + model_id = os.path.basename(model_dir) + model_output_dir = posixpath.join( + base_output_dir, "brain_extraction_" + str(model_id) + ) + file_list = os.listdir(model_dir) + for file in file_list: + if file.endswith(".yaml") or file.endswith(".yml"): + config_file = posixpath.join(model_dir, file) + break + + main_run( + data_csv=data_path, + config_file=config_file, + model_dir=model_dir, + train_mode=False, + device="cpu", + resume=False, + reset=False, + output_dir=model_output_dir, + ) + + model_output_dir_testing = posixpath.join(model_output_dir, TESTING_FOLDER) + modality_outputs = os.listdir(model_output_dir_testing) + for modality in modality_outputs: + modality_output_dir = posixpath.join(model_output_dir_testing, modality) + files_in_modality = os.listdir(modality_output_dir) + for file in files_in_modality: # this loop may not be necessary + if file.endswith(".nii.gz"): + file_path = posixpath.join(modality_output_dir, file) + shutil.copyfile( + file_path, + posixpath.join( + base_output_dir, + f"brainMask_{model_id}_{modality}.nii.gz", + ), + ) + images_for_fusion.append(sitk.ReadImage(file_path, sitk.sitkUInt8)) + + return fuse_images(images_for_fusion, "staple", [0, 1]) + + +def _run_tumor_segmentation_using_gandlf( + subject_id: str, + input_oriented_brain_images: dict, + models_to_infer: Union[str, list], + base_output_dir: str, +) -> sitk.Image: + """ + This function runs tumor segmentation using gandlf. + + Args: + subject_id (str): The subject ID. + input_oriented_brain_images (dict): The input oriented brain images. + models_to_infer (Union[str, list]): The models to infer as list or as comma-separated string. + base_output_dir (str): The base output directory. + + Returns: + sitk.Image: The fused tumor mask. + """ + df_for_gandlf = pd.DataFrame(columns=GANDLF_DF_COLUMNS) + current_subject = {"SubjectID": subject_id} + channel_idx = 0 + # modality order (trained according to EC): t1,t2,flair,t1c + modality_order = ["T1", "T2", "FLAIR", "T1GD"] + # todo: confirm the order for modalities + for key in modality_order: + current_subject[f"Channel_{channel_idx}"] = input_oriented_brain_images[key] + channel_idx += 1 + df_for_gandlf = pd.DataFrame(current_subject, index=[0]) + data_path = posixpath.join(base_output_dir, TUMOR_FILENAME) + df_for_gandlf.to_csv( + data_path, + index=False, + ) + + models_to_run = ( + models_to_infer + if isinstance(models_to_infer, list) + else models_to_infer.split(",") + ) + + tumor_masks_to_return = [] + images_for_fusion = [] + mask_output_dir = posixpath.join(base_output_dir, TUMOR_MASK_FOLDER) + os.makedirs(mask_output_dir, exist_ok=True) + for model_dir in models_to_run: + model_id = os.path.basename(model_dir) + model_output_dir = posixpath.join( + base_output_dir, "tumor_segmentation_" + str(model_id) + ) + file_list = os.listdir(model_dir) + for file in file_list: + if file.endswith(".yaml") or file.endswith(".yml"): + config_file = posixpath.join(model_dir, file) + break + + # ensure the openvino version is used + # NOTE: if we need this, make sure there are proper permissions + # when rewriting the config file + # parameters = yaml.safe_load(open(config_file, "r")) + # parameters["model"]["type"] = "openvino" + # yaml.safe_dump(parameters, open(config_file, "w")) + + main_run( + data_csv=data_path, + config_file=config_file, + model_dir=model_dir, + train_mode=False, + device="cpu", + resume=False, + reset=False, + output_dir=model_output_dir, + ) + + model_output_dir_testing = posixpath.join(model_output_dir, TESTING_FOLDER) + # We expect one subject (one output modality, one file). + subject = os.listdir(model_output_dir_testing)[0] + subject_output_dir = posixpath.join(model_output_dir_testing, subject) + files_in_modality = os.listdir(subject_output_dir) + for file in files_in_modality: # this loop may not be necessary + if file.endswith(".nii.gz"): + file_path = posixpath.join(subject_output_dir, file) + renamed_path = posixpath.join( + mask_output_dir, + f"{subject_id}_tumorMask_model-{model_id}.nii.gz", + ) + shutil.copyfile(file_path, renamed_path) + # Append the renamed path to keep track of model IDs + tumor_masks_to_return.append(renamed_path) + images_for_fusion.append(sitk.ReadImage(file_path, sitk.sitkUInt8)) + + fused_masks_to_return = generate_tumor_segmentation_fused_images(images_for_fusion, mask_output_dir, subject_id) + return tumor_masks_to_return + fused_masks_to_return - output_t1_brain_file_inter = os.path.join(interimOutputDir, "brain_T1.nii.gz") - output_t1_brain_file_final = os.path.join(finalSubjectOutputDir, subjectID + "_brain_t1.nii.gz") - if not os.path.exists(output_t1_brain_file_final): - if os.path.exists(output_t1_brain_file_inter): - shutil.copyfile(output_t1_brain_file_inter, output_t1_brain_file_final) - else: - runBratsPipeline = True - - output_t2_brain_file_inter = os.path.join(interimOutputDir, "brain_T2.nii.gz") - output_t2_brain_file_final = os.path.join(finalSubjectOutputDir, subjectID + "_brain_t2.nii.gz") - if not os.path.exists(output_t2_brain_file_final): - if os.path.exists(output_t2_brain_file_inter): - shutil.copyfile(output_t2_brain_file_inter, output_t2_brain_file_final) - else: - runBratsPipeline = True - - output_fl_brain_file_inter = os.path.join(interimOutputDir, "brain_FL.nii.gz") - output_fl_brain_file_final = os.path.join(finalSubjectOutputDir, subjectID + "_brain_flair.nii.gz") - if not os.path.exists(output_fl_brain_file_final): - if os.path.exists(output_fl_brain_file_inter): - shutil.copyfile(output_fl_brain_file_inter, output_fl_brain_file_final) - else: - runBratsPipeline = True +def generate_tumor_segmentation_fused_images(images_for_fusion, mask_output_dir, subject_id): + tumor_class_list = [0, 1, 2, 3, 4] + fused_masks_to_return = [] + + if len(images_for_fusion) > 1: + for fusion_type in ["staple", "simple", "voting"]: + fused_mask = fuse_images(images_for_fusion, fusion_type, tumor_class_list) + fused_mask_file = posixpath.join( + mask_output_dir, + f"{subject_id}_tumorMask_fused-{fusion_type}.nii.gz", + ) + sitk.WriteImage(fused_mask, fused_mask_file) + fused_masks_to_return.append(fused_mask_file) + + return fused_masks_to_return + + +class Preparator: + def __init__(self, input_csv: str, output_dir: str, executablePath: str): + self.input_csv = input_csv + self.input_dir = str(Path(input_csv).parent) + self.output_dir = os.path.normpath(output_dir) + self.interim_output_dir = posixpath.join(self.output_dir, INTERIM_FOLDER) + self.final_output_dir = posixpath.join(self.output_dir, FINAL_FOLDER) + self.subjects_file = posixpath.join(self.final_output_dir, SUBJECTS_FILENAME) + self.neg_subjects_file = posixpath.join( + self.final_output_dir, NEG_SUBJECTS_FILENAME + ) + self.failing_subjects_file = posixpath.join( + self.final_output_dir, FAIL_SUBJECTS_FILENAME + ) + self.dicom_tag_information_to_write_anon_file = posixpath.join( + self.final_output_dir, DICOM_ANON_FILENAME + ) + self.dicom_tag_information_to_write_collab_file = posixpath.join( + self.final_output_dir, DICOM_COLLAB_FILENAME + ) + self.__init_out_dfs() + self.stdout_log = posixpath.join(self.output_dir, STDOUT_FILENAME) + self.stderr_log = posixpath.join(self.output_dir, STDERR_FILENAME) + self.dicom_tag_information_to_write_collab = {} + self.dicom_tag_information_to_write_anon = {} + self.brats_pipeline_exe = executablePath + if self.brats_pipeline_exe is None: + self.brats_pipeline_exe = posixpath.join( + Path(__file__).parent.resolve(), EXEC_NAME + ) + + if platform.system() == "Windows": + if not self.brats_pipeline_exe.endswith(".exe"): + self.brats_pipeline_exe += ".exe" + + def __init_out_dfs(self): + self.subjects = pd.DataFrame( + columns=["SubjectID", "Timepoint", "T1", "T1GD", "T2", "FLAIR"] + ) + + self.neg_subjects = pd.DataFrame( + columns=["SubjectID", "Timepoint", "Modality", "Count"] + ) + self.failing_subjects = pd.DataFrame(columns=["SubjectID", "Timepoint"]) + + def validate(self): + assert os.path.exists(self.input_csv), "Input CSV file not found" + + assert ( + shutil.which(self.brats_pipeline_exe) is not None + ), "BraTS Pipeline executable not found, please contact admin@fets.ai for help." + + def process_data(self): + items = self.subjects_df.iterrows() + total = self.subjects_df.shape[0] + pbar = tqdm(range(total), desc="Preparing Dataset (1-10 min per subject)") + for idx, (_, row) in enumerate(items): + self.process_row(idx, row, pbar) + + def process_row(self, idx: int, row: pd.Series, pbar: tqdm): + self.convert_to_dicom(idx, row, pbar) + self.extract_brain(row, pbar) + self.extract_tumor(row, pbar) + + def __get_row_information(self, row: pd.Series): + parsed_headers = self.parsed_headers + subject_id = row[self.parsed_headers["ID"]] + subject_id_timepoint = subject_id + + # create QC and Final output dirs for each subject + interimOutputDir_actual = posixpath.join( + self.interim_output_dir, subject_id_timepoint + ) + finalSubjectOutputDir_actual = posixpath.join( + self.final_output_dir, subject_id_timepoint + ) + + # per the data ingestion step, we are creating a new folder called timepoint, can join timepoint to subjectid if needed + if parsed_headers["Timepoint"] is not None: + timepoint = row[parsed_headers["Timepoint"]] + subject_id_timepoint += "_" + timepoint + interimOutputDir_actual = posixpath.join(interimOutputDir_actual, timepoint) + finalSubjectOutputDir_actual = posixpath.join( + finalSubjectOutputDir_actual, timepoint + ) + + return ( + subject_id, + timepoint, + subject_id_timepoint, + interimOutputDir_actual, + finalSubjectOutputDir_actual, + ) + + def convert_to_dicom(self, idx: int, row: pd.Series, pbar: tqdm): + parsed_headers = self.parsed_headers + bratsPipeline_exe = self.brats_pipeline_exe + + ( + subject_id, + timepoint, + subject_id_timepoint, + interimOutputDir_actual, + finalSubjectOutputDir_actual, + ) = self.__get_row_information(row) + + # create QC and Final output dirs for each subject + Path(interimOutputDir_actual).mkdir(parents=True, exist_ok=True) + Path(finalSubjectOutputDir_actual).mkdir(parents=True, exist_ok=True) + + pbar.set_description(f"Processing {subject_id_timepoint}") + + # get the relevant dicom tags + self.dicom_tag_information_to_write_collab[subject_id_timepoint] = {} + self.dicom_tag_information_to_write_anon[str(idx)] = {} + for modality in MODALITIES_LIST: + tags_from_modality = _get_relevant_dicom_tags(row[parsed_headers[modality]]) + self.dicom_tag_information_to_write_collab[subject_id_timepoint][ + modality + ] = tags_from_modality + with open( + posixpath.join( + interimOutputDir_actual, f"dicom_tag_information_{modality}.yaml" + ), + "w", + ) as f: + yaml.safe_dump(tags_from_modality, f, allow_unicode=True) + self.dicom_tag_information_to_write_anon[str(idx)][ + modality + ] = tags_from_modality + + interimOutputDir_actual_reoriented = posixpath.join( + interimOutputDir_actual, REORIENTED_FOLDER + ) + Path(interimOutputDir_actual_reoriented).mkdir(parents=True, exist_ok=True) + # if files already exist in DataForQC, then copy to "reorient" folder, and if files exist in "reorient" folder, then skip + runBratsPipeline, _ = _copy_files_to_correct_location( + interimOutputDir_actual, + interimOutputDir_actual_reoriented, + subject_id_timepoint, + ) + + # check if the files exist already, if so, skip + if runBratsPipeline: + pbar.set_description(f"Running BraTSPipeline") + + command = ( + bratsPipeline_exe + + " -t1 " + + row[parsed_headers["T1"]] + + " -t1c " + + row[parsed_headers["T1GD"]] + + " -t2 " + + row[parsed_headers["T2"]] + + " -fl " + + row[parsed_headers["FLAIR"]] + + " -s 0 -o " + + interimOutputDir_actual + ) + + with open(self.stdout_log, "a+") as out, open(self.stderr_log, "a+") as err: + out.write(f"***\n{command}\n***") + err.write(f"***\n{command}\n***") + subprocess.Popen(command, stdout=out, stderr=err, shell=True).wait() + + runBratsPipeline, outputs_reoriented = _copy_files_to_correct_location( + interimOutputDir_actual, + interimOutputDir_actual_reoriented, + subject_id_timepoint, + ) + + if runBratsPipeline: + # The BraTS command failed, and no files were found + # flag this subject as failing + failing_data = {"SubjectID": subject_id, "Timepoint": timepoint} + failing_subject = pd.DataFrame(failing_data, index=[0]) + self.failing_subjects = pd.concat([self.failing_subjects, failing_subject]) + return + + # store the outputs in a dictionary when there are no errors + negatives_detected = False + for modality in MODALITIES_LIST: + count = _read_image_with_min_check(outputs_reoriented[modality]) + # if there are any negative values, then store the subjectid, timepoint, modality and count of negative values + if count == 0: + continue + neg_data = { + "SubjectID": subject_id, + "Timepoint": timepoint, + "Modality": modality, + "Count": count, + } + neg_subject = pd.DataFrame(neg_data, index=[0]) + self.neg_subjects = pd.concat([self.neg_subjects, neg_subject]) + negatives_detected = True + + # store the outputs in a dictionary when there are no errors + if negatives_detected: + return + + subject_data = { + "SubjectID": subject_id, + "Timepoint": timepoint, + "T1": outputs_reoriented["T1"], + "T1GD": outputs_reoriented["T1GD"], + "T2": outputs_reoriented["T2"], + "FLAIR": outputs_reoriented["FLAIR"], + } + subject = pd.DataFrame(subject_data, index=[0]) + self.subjects = pd.concat( + [ + self.subjects, + subject, + ] + ) + + pbar.set_description(f"Saving screenshot") + + screenshot_path = posixpath.join( + interimOutputDir_actual_reoriented, + f"{subject_id_timepoint}_summary_coregistration.png", + ) + # save the screenshot + save_screenshot(outputs_reoriented, screenshot_path) + + if os.path.exists(screenshot_path): + shutil.copyfile( + screenshot_path, + posixpath.join( + interimOutputDir_actual, + f"{subject_id_timepoint}_summary_coregistration.png", + ), + ) + + def extract_brain(self, row: pd.Series, pbar: tqdm): + ( + *_, + subject_id_timepoint, + interimOutputDir_actual, + finalSubjectOutputDir_actual, + ) = self.__get_row_information(row) + interimOutputDir_actual_reoriented = posixpath.join( + interimOutputDir_actual, REORIENTED_FOLDER + ) + outputs_reoriented = get_expected_outputs( + subject_id_timepoint, interimOutputDir_actual_reoriented + ) + + # Check for existence of brain mask. + # That way, we can pass corrected brain masks and proceed without + # overwriting the mask. + brain_mask_path = posixpath.join( + interimOutputDir_actual, "brainMask_fused.nii.gz" + ) + if not os.path.exists(brain_mask_path): + pbar.set_description(f"Brain Extraction") + + models_dir = posixpath.join(Path(__file__).parent.resolve(), "data_prep_models") + + brain_extraction_models_dir = posixpath.join(models_dir, "brain_extraction") + brain_extraction_models = [ + posixpath.join(brain_extraction_models_dir, model_dir) + for model_dir in os.listdir(brain_extraction_models_dir) + ] + + brain_mask = _run_brain_extraction_using_gandlf( + subject_id_timepoint, + outputs_reoriented, + brain_extraction_models, + interimOutputDir_actual, + ) + sitk.WriteImage(brain_mask, brain_mask_path) + else: + brain_mask = sitk.ReadImage(brain_mask_path) + + # this is to ensure that the mask and reoriented images are in the same byte order + # brain_mask = sitk.Cast(brain_mask, sitk.sitkFloat32) + input_for_tumor_models = get_brain_mask_files( + subject_id_timepoint, finalSubjectOutputDir_actual + ) + for modality in MODALITIES_LIST: + image = sitk.ReadImage(outputs_reoriented[modality]) + masked_image = sitk.Mask(image, brain_mask) + file_to_save = input_for_tumor_models[modality] + sitk.WriteImage(masked_image, file_to_save) + + # save the screenshot + save_screenshot( + input_for_tumor_models, + posixpath.join( + interimOutputDir_actual, + f"{subject_id_timepoint}_summary_brain-extraction.png", + ), + brain_mask_path, + ) + + def extract_tumor(self, row: pd.Series, pbar: tqdm): + ( + *_, + subject_id_timepoint, + interimOutputDir_actual, + finalSubjectOutputDir_actual, + ) = self.__get_row_information(row) + input_for_tumor_models = get_brain_mask_files( + subject_id_timepoint, finalSubjectOutputDir_actual + ) + + pbar.set_description(f"Brain Tumor Segmentation") + + models_dir = posixpath.join(Path(__file__).parent.resolve(), "data_prep_models") + + tumor_segmentation_models_dir = posixpath.join(models_dir, "tumor_segmentation") + tumor_segmentation_models = [ + posixpath.join(tumor_segmentation_models_dir, model_dir) + for model_dir in os.listdir(tumor_segmentation_models_dir) + ] + + tumor_masks_for_qc = _run_tumor_segmentation_using_gandlf( + subject_id_timepoint, + input_for_tumor_models, + tumor_segmentation_models, + interimOutputDir_actual, + ) + + for tumor_mask in tumor_masks_for_qc: + tumor_mask_id = os.path.basename(tumor_mask).replace(".nii.gz", "") + # save the screenshot + save_screenshot( + input_for_tumor_models, + posixpath.join(interimOutputDir_actual, f"{tumor_mask_id}_summary.png"), + tumor_mask, + ) + + with open(self.stdout_log, "a+") as f: + f.write(f"***\nTumor Masks For QC:\n{tumor_masks_for_qc}\n***") + + def write(self): + if self.subjects.shape[0]: + self.subjects.to_csv(self.subjects_file, index=False) + if self.neg_subjects.shape[0]: + self.neg_subjects.to_csv(self.neg_subjects_file, index=False) + if self.failing_subjects.shape[0]: + self.failing_subjects.to_csv(self.failing_subjects_file, index=False) + with open(self.dicom_tag_information_to_write_collab_file, "w") as f: + yaml.safe_dump( + self.dicom_tag_information_to_write_collab, f, allow_unicode=True + ) + with open(self.dicom_tag_information_to_write_anon_file, "w") as f: + yaml.safe_dump( + self.dicom_tag_information_to_write_anon, f, allow_unicode=True + ) + + def read(self): + self.parsed_headers = _parse_csv_header(self.input_csv) + self.subjects_df = pd.read_csv(self.input_csv, dtype=str) + if os.path.exists(self.subjects_file): + self.subjects = pd.read_csv(self.subjects_file) + if os.path.exists(self.neg_subjects_file): + self.neg_subjects = pd.read_csv(self.neg_subjects_file) + if os.path.exists(self.failing_subjects_file): + self.failing_subjects = pd.read_csv(self.failing_subjects_file) + if os.path.exists(self.dicom_tag_information_to_write_collab_file): + with open(self.dicom_tag_information_to_write_collab_file, "r") as f: + self.dicom_tag_information_to_write_collab = yaml.safe_load(f) + if os.path.exists(self.dicom_tag_information_to_write_anon_file): + with open(self.dicom_tag_information_to_write_anon_file, "r") as f: + self.dicom_tag_information_to_write_anon = yaml.safe_load(f) - return runBratsPipeline def main(): - copyrightMessage = 'Contact: software@cbica.upenn.edu/n/n' + 'This program is NOT FDA/CE approved and NOT intended for clinical use./nCopyright (c) ' + str(date.today().year) + ' University of Pennsylvania. All rights reserved.' - parser = argparse.ArgumentParser(prog='PrepareDataset', formatter_class=argparse.RawTextHelpFormatter, description = 'This application calls the BraTSPipeline for all input images and stores the final and intermediate files separately./n/n' + copyrightMessage) - parser.add_argument('-inputCSV', type=str, help = 'The absolute, comma-separated paths of labels that need to be fused', required=True) - parser.add_argument('-outputDir', type=str, help = 'The output file to write the results', required=True) - - args = parser.parse_args() - outputDir_qc = os.path.normpath(args.outputDir + '/DataForQC') - outputDir_final = os.path.normpath(args.outputDir + '/DataForFeTS') - - Path(args.outputDir).mkdir(parents=True, exist_ok=True) - Path(outputDir_qc).mkdir(parents=True, exist_ok=True) - Path(outputDir_final).mkdir(parents=True, exist_ok=True) - - bratsPipeline_exe = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'BraTSPipeline') - if platform.system() == 'Windows': - bratsPipeline_exe += '.exe' - - csvContents = GetCSVContents(args.inputCSV) - - for row in csvContents: - interimOutputDir_actual = os.path.join(outputDir_qc, row['ID']) - finalSubjectOutputDir_actual = os.path.join(outputDir_final, row['ID']) - Path(interimOutputDir_actual).mkdir(parents=True, exist_ok=True) - Path(finalSubjectOutputDir_actual).mkdir(parents=True, exist_ok=True) - runBratsPipeline = copyFilesToCorrectLocation(interimOutputDir_actual, finalSubjectOutputDir_actual, row['ID']) - - if runBratsPipeline: - command = bratsPipeline_exe + ' -t1 ' + row['T1'] + ' -t1c ' + row['T1GD'] + ' -t2 ' + row['T2'] + ' -fl ' + row['FLAIR'] + ' -o ' + interimOutputDir_actual + ' -s 1' - print('Command: ', command) - subprocess.Popen(command, shell=True).wait() - - if copyFilesToCorrectLocation(interimOutputDir_actual, finalSubjectOutputDir_actual, row['ID']): - print('BraTSPipeline failed for subject \'', row['ID'], file = sys.stderr) - -if __name__ == '__main__': - if platform.system() == 'Darwin': - sys.exit('macOS is not supported') - else: - main() + parser = setup_parser() + args = parser.parse_args() + + prep = Preparator(args.inputCSV, args.outputDir, args.executablePath) + prep.validate() + prep.read() + prep.process_data() + prep.write() + + +if __name__ == "__main__": + if platform.system().lower() == "darwin": + sys.exit("macOS is not supported") + else: + main() diff --git a/src/applications/SanityCheck.py b/src/applications/SanityCheck.py index cf9a1b47..7219bc60 100644 --- a/src/applications/SanityCheck.py +++ b/src/applications/SanityCheck.py @@ -4,225 +4,340 @@ import SimpleITK as sitk import numpy as np + def read_image_with_min_check(filename): - ''' - this function fixes negatives by scaling - if min(input) < 0: - for all x in image: - if x != 0: - x -= min - ''' - input_image = sitk.ReadImage(filename) - input_image_array = sitk.GetArrayFromImage(input_image) - min = np.min(input_image_array) - - # fixme: apply following logic - # check for connected components with less than 100 voxels - ## if less than the threshold, then apply above logic to the negative voxels - ## else, give error to user for manual QC - if min < 0: - print('Negative values in', filename) - - return input_image + """ + this function fixes negatives by scaling + if min(input) < 0: + for all x in image: + if x != 0: + x -= min + """ + input_image = sitk.ReadImage(filename) + input_image_array = sitk.GetArrayFromImage(input_image) + min = np.min(input_image_array) + + # fixme: apply following logic + # check for connected components with less than 100 voxels + ## if less than the threshold, then apply above logic to the negative voxels + ## else, give error to user for manual QC + if min < 0: + print("Negative values in", filename) + + return input_image + def imageSanityCheck(targetImageFile, inputImageFile) -> bool: - ''' - This function does sanity checking of 2 images - ''' - targetImage = read_image_with_min_check(targetImageFile) - inputImage = read_image_with_min_check(inputImageFile) - - size = targetImage.GetSize() - size_expected = np.array([240,240,155]) - if not(np.array_equal(size, size_expected)): - print('Size for target image, \'' + targetImageFile + '\' is not in the BraTS format', size_expected, file = sys.stderr) - return False - - if targetImage.GetDimension() != 3: - print('Dimension for target image, \'' + targetImageFile + '\' is not 3', file = sys.stderr) - return False - - if inputImage.GetDimension() != 3: - print('Dimension for input image, \'' + inputImageFile + '\' is not 3', file = sys.stderr) - return False - - commonMessage = ' mismatch for target image, \'' + targetImageFile + '\' and input image, \'' + inputImageFile + '\'' - problemsIn = '' - returnTrue = True - - if targetImage.GetSize() != inputImage.GetSize(): - if not problemsIn: - problemsIn += 'Size' - else: - problemsIn += ', Size' - returnTrue = False - - if targetImage.GetOrigin() != inputImage.GetOrigin(): - if not problemsIn: - problemsIn += 'Origin' - else: - problemsIn += ', Origin' - returnTrue = False + """ + This function does sanity checking of 2 images + """ + targetImage = read_image_with_min_check(targetImageFile) + inputImage = read_image_with_min_check(inputImageFile) + + size = targetImage.GetSize() + size_expected = np.array([240, 240, 155]) + if not (np.array_equal(size, size_expected)): + print( + "Size for target image, '" + + targetImageFile + + "' is not in the BraTS format", + size_expected, + file=sys.stderr, + ) + return False + + if targetImage.GetDimension() != 3: + print( + "Dimension for target image, '" + targetImageFile + "' is not 3", + file=sys.stderr, + ) + return False + + if inputImage.GetDimension() != 3: + print( + "Dimension for input image, '" + inputImageFile + "' is not 3", + file=sys.stderr, + ) + return False - if targetImage.GetSpacing() != inputImage.GetSpacing(): - if not problemsIn: - problemsIn += 'Spacing' + commonMessage = ( + " mismatch for target image, '" + + targetImageFile + + "' and input image, '" + + inputImageFile + + "'" + ) + problemsIn = "" + returnTrue = True + + if targetImage.GetSize() != inputImage.GetSize(): + if not problemsIn: + problemsIn += "Size" + else: + problemsIn += ", Size" + returnTrue = False + + if targetImage.GetOrigin() != inputImage.GetOrigin(): + if not problemsIn: + problemsIn += "Origin" + else: + problemsIn += ", Origin" + returnTrue = False + + if targetImage.GetSpacing() != inputImage.GetSpacing(): + if not problemsIn: + problemsIn += "Spacing" + else: + problemsIn += ", Spacing" + returnTrue = False + + if returnTrue: + return True else: - problemsIn += ', Spacing' - returnTrue = False - - if returnTrue: - return True - else: - print(problemsIn + commonMessage, file = sys.stderr) - return False - -def checkBraTSLabels(subject_id, currentLabelFile, label_values_expected = np.array([0,1,2,4])) -> str: - ''' - This function checks for the expected labels and returns a string that will be provided as output for user - ''' - returnString = '' - mask_array = sitk.GetArrayFromImage(sitk.ReadImage(currentLabelFile)) - unique, counts = np.unique(mask_array, return_counts=True) # get unique elements and their counts - if not(np.array_equal(unique,label_values_expected)): # this is for the case where the label contains numbers other than 0,1,2,4 - for j in range(0,len(unique)): # iterate over a range to get counts easier - if not(unique[j] in label_values_expected): - if counts[j] > 1000: # threshold for mis-labelling, anything less is ignored - returnString += subject_id + ',' + currentLabelFile + ',' + str(unique[j]) + ',' + str(counts[j]) + '\n' - - return returnString + print(problemsIn + commonMessage, file=sys.stderr) + return False + + +def checkBraTSLabels( + subject_id, currentLabelFile, label_values_expected=np.array([0, 1, 2, 4]) +) -> str: + """ + This function checks for the expected labels and returns a string that will be provided as output for user + """ + returnString = "" + mask_array = sitk.GetArrayFromImage(sitk.ReadImage(currentLabelFile)) + # get unique elements and their counts + unique, counts = np.unique(mask_array, return_counts=True) + # this is for the case where the label contains numbers other than 0,1,2,4 + if not (np.array_equal(unique, label_values_expected)): + for j in range(0, len(unique)): # iterate over a range to get counts easier + if not (unique[j] in label_values_expected): + if ( + counts[j] > 1000 + ): # threshold for mis-labelling, anything less is ignored + returnString += ( + subject_id + + "," + + currentLabelFile + + "," + + str(unique[j]) + + "," + + str(counts[j]) + + "\n" + ) + + return returnString + def fixForLabelThree(currentLabelFile): - ''' - This function checks for the label '3' and changes it to '4' and save it in the same location - ''' - base_image = sitk.ReadImage(currentLabelFile) - mask_array = sitk.GetArrayFromImage(sitk.ReadImage(currentLabelFile)) - unique = np.sort(np.unique(mask_array)) - if unique[-1] == 3: - mask_array[mask_array == 3] = 4 - image_to_write = sitk.GetImageFromArray(mask_array) - image_to_write.CopyInformation(base_image) - sitk.WriteImage(image_to_write, currentLabelFile) + """ + This function checks for the label '3' and changes it to '4' and save it in the same location + """ + base_image = sitk.ReadImage(currentLabelFile) + mask_array = sitk.GetArrayFromImage(sitk.ReadImage(currentLabelFile)) + unique = np.sort(np.unique(mask_array)) + if unique[-1] == 3: + mask_array[mask_array == 3] = 4 + image_to_write = sitk.GetImageFromArray(mask_array) + image_to_write.CopyInformation(base_image) + sitk.WriteImage(image_to_write, currentLabelFile) + def main(): - copyrightMessage = 'Contact: software@cbica.upenn.edu/n/n' + 'This program is NOT FDA/CE approved and NOT intended for clinical use./nCopyright (c) ' + str(date.today().year) + ' University of Pennsylvania. All rights reserved.' - parser = argparse.ArgumentParser(prog='SanityCheck', formatter_class=argparse.RawTextHelpFormatter, description = 'This application performs rudimentary sanity checks the input data folder for FeTS training./n/n' + copyrightMessage) - parser.add_argument('-inputDir', type=str, help = 'The input directory (DataForFeTS) that needs to be checked', required=True) - parser.add_argument('-outputFile', type=str, help = 'The CSV file of outputs, which is only generated if there are problematic cases', required=True) - - args = parser.parse_args() - inputDir = args.inputDir - - if not os.path.isdir(inputDir): - sys.exit( - 'The specified inputDir is not present, please try again') - - errorMessage = 'Subject_ID,Recommendation_for_initial_annotations\n' - numberOfProblematicCases = 0 - - # initialize modality dict - files_to_check = { - 'T1': '_t1.nii.gz', - 'T1CE': '_t1ce.nii.gz', - 'T2': '_t2.nii.gz', - 'FL': '_flair.nii.gz', - 'MASK': '_final_seg.nii.gz' - } - - label_values_expected = np.array([0,1,2,4]) # initialize label array - - for dirs in os.listdir(inputDir): - if (dirs != 'logs') and (dirs != 'split_info'): # don't perform sanity check for the 'logs' folder - currentSubjectDir = os.path.join(inputDir, dirs) - if os.path.isdir(currentSubjectDir): # for detected subject dir - filesInDir = os.listdir(currentSubjectDir) # get all files in each directory - files_for_subject = {} - for i in range(len(filesInDir)): - for modality in files_to_check: # check all modalities - if filesInDir[i].endswith(files_to_check[modality]): # if modality detected, populate subject dict - files_for_subject[modality] = os.path.abspath(os.path.join(currentSubjectDir, filesInDir[i])) - - currentSubjectsLabelIsAbsent = False # check if current subject's final_seg is present or not - all_modalities_present = True - if len(files_for_subject) != 5: # if all modalities are not present, add exit statement - if ((len(files_for_subject) == 4) and ('MASK' in files_for_subject)) or (len(files_for_subject) < 4): - numberOfProblematicCases += 1 - errorMessage += dirs + ',All_required_modalities_are_not_present.\n' - all_modalities_present = False - - if all_modalities_present and len(files_for_subject) > 0: - first, *rest = files_for_subject.items() # split the dict - for i in range(0, len(rest)): - if not(imageSanityCheck(first[1], rest[i][1])): # image sanity check - numberOfProblematicCases += 1 - errorMessage += dirs + ',Image_dimension/size/origin/spacing_mismatch_between_' + first[0] + '_and_' + rest[i][0] + '\n' - - currentSubjectsLabelIsProblematic = False # check if current subject's label has issues - if 'MASK' in files_for_subject: - currentLabelFile = files_for_subject['MASK'] - fixForLabelThree(currentLabelFile) - returnString = checkBraTSLabels(dirs, currentLabelFile) - if returnString: # if there is something present in the return string - numberOfProblematicCases += 1 - currentSubjectsLabelIsProblematic = True - errorMessage += returnString - else: - currentSubjectsLabelIsAbsent = True - - fusionToRecommend = '' - segmentationsForQCPresent = True - problematicSegmentationMessage = '' - if currentSubjectsLabelIsProblematic or currentSubjectsLabelIsAbsent: # if final_seg is absent or is problematic - segmentationsFolder = os.path.join(currentSubjectDir, 'SegmentationsForQC') - if os.path.isdir(segmentationsFolder): - segmentationFiles = os.listdir(segmentationsFolder) # get all files in each directory - for i in range(len(segmentationFiles)): - if 'fused' in segmentationFiles[i]: # only perform checks for fusion results - currentLabelFile = os.path.join(segmentationsFolder, segmentationFiles[i]) - returnString = checkBraTSLabels(dirs, currentLabelFile) - if returnString: # if there is something present in the return string - problematicSegmentationMessage += returnString - else: - if not('staple' in fusionToRecommend): # overwrite the fusion result to recommend if not staple that was fine - fusionToRecommend = currentLabelFile - - if not fusionToRecommend: - errorMessage += problematicSegmentationMessage - if not('staple' in fusionToRecommend): # recommend nnunet or deepscan if not staple - if not('itkvoting' in fusionToRecommend): - if not('majorityvoting' in fusionToRecommend): - fusionToRecommend = 'nnunet_or_deepscan' - else: - fusionToRecommend = 'majorityvoting' - else: - fusionToRecommend = 'itkvoting' - else: - fusionToRecommend = 'staple' - - else: - errorMessage += dirs + ',SegmentationsForQC_folder_is_absent\n' - numberOfProblematicCases += 1 - segmentationsForQCPresent = False - # errorMessage += dirs + ',Label_file_absent,N.A.,N.A.\n' - - if currentSubjectsLabelIsAbsent and segmentationsForQCPresent: - numberOfProblematicCases += 1 - if fusionToRecommend: - errorMessage += dirs + ',' + fusionToRecommend + '\n' - else: - errorMessage += dirs + ',final_seg_absent_and_use_either_nnunet_or_deepscan,N.A.,N.A.\n' - - if numberOfProblematicCases > 0: - # print(errorMessage) - with open(args.outputFile, 'a') as the_file: - the_file.write(errorMessage) - sys.exit('There were subjects with either missing annotations or where annotations had problematic labels. Please see the recommendation(s) for new initialization in the outputFile: \'' + args.outputFile + '\'') - else: - print('Congratulations, all subjects are fine and ready to train!') - -if __name__ == '__main__': - if platform.system() == 'Darwin': - sys.exit('macOS is not supported') - else: - main() + copyrightMessage = ( + "Contact: admin@fets.ai\n\n" + + "This program is NOT FDA/CE approved and NOT intended for clinical use.\nCopyright (c) " + + str(date.today().year) + + " University of Pennsylvania. All rights reserved." + ) + parser = argparse.ArgumentParser( + prog="SanityCheck", + formatter_class=argparse.RawTextHelpFormatter, + description="This application performs rudimentary sanity checks the input data folder for FeTS training.\n\n" + + copyrightMessage, + ) + parser.add_argument( + "-inputDir", + type=str, + help="The input directory (DataForFeTS) that needs to be checked", + required=True, + ) + parser.add_argument( + "-outputFile", + type=str, + help="The CSV file of outputs, which is only generated if there are problematic cases", + required=True, + ) + + args = parser.parse_args() + inputDir = args.inputDir + + if not os.path.isdir(inputDir): + sys.exit("The specified inputDir is not present, please try again") + + errorMessage = "Subject_ID,Recommendation_for_initial_annotations\n" + numberOfProblematicCases = 0 + + # initialize modality dict + files_to_check = { + "T1": "_t1.nii.gz", + "T1CE": "_t1ce.nii.gz", + "T2": "_t2.nii.gz", + "FL": "_flair.nii.gz", + "MASK": "_final_seg.nii.gz", + } + + label_values_expected = np.array([0, 1, 2, 4]) # initialize label array + + for dirs in os.listdir(inputDir): + if (dirs != "logs") and ( + dirs != "split_info" + ): # don't perform sanity check for the 'logs' folder + currentSubjectDir = os.path.join(inputDir, dirs) + if os.path.isdir(currentSubjectDir): # for detected subject dir + filesInDir = os.listdir( + currentSubjectDir + ) # get all files in each directory + files_for_subject = {} + for i in range(len(filesInDir)): + for modality in files_to_check: # check all modalities + if filesInDir[i].endswith( + files_to_check[modality] + ): # if modality detected, populate subject dict + files_for_subject[modality] = os.path.abspath( + os.path.join(currentSubjectDir, filesInDir[i]) + ) + + currentSubjectsLabelIsAbsent = ( + False # check if current subject's final_seg is present or not + ) + all_modalities_present = True + if ( + len(files_for_subject) != 5 + ): # if all modalities are not present, add exit statement + if ( + (len(files_for_subject) == 4) and ("MASK" in files_for_subject) + ) or (len(files_for_subject) < 4): + numberOfProblematicCases += 1 + errorMessage += ( + dirs + ",All_required_modalities_are_not_present.\n" + ) + all_modalities_present = False + + if all_modalities_present and len(files_for_subject) > 0: + first, *rest = files_for_subject.items() # split the dict + for i in range(0, len(rest)): + if not ( + imageSanityCheck(first[1], rest[i][1]) + ): # image sanity check + numberOfProblematicCases += 1 + errorMessage += ( + dirs + + ",Image_dimension/size/origin/spacing_mismatch_between_" + + first[0] + + "_and_" + + rest[i][0] + + "\n" + ) + + currentSubjectsLabelIsProblematic = ( + False # check if current subject's label has issues + ) + if "MASK" in files_for_subject: + currentLabelFile = files_for_subject["MASK"] + fixForLabelThree(currentLabelFile) + returnString = checkBraTSLabels(dirs, currentLabelFile) + if ( + returnString + ): # if there is something present in the return string + numberOfProblematicCases += 1 + currentSubjectsLabelIsProblematic = True + errorMessage += returnString + else: + currentSubjectsLabelIsAbsent = True + + fusionToRecommend = "" + segmentationsForQCPresent = True + problematicSegmentationMessage = "" + if ( + currentSubjectsLabelIsProblematic + or currentSubjectsLabelIsAbsent + ): # if final_seg is absent or is problematic + segmentationsFolder = os.path.join( + currentSubjectDir, "SegmentationsForQC" + ) + if os.path.isdir(segmentationsFolder): + segmentationFiles = os.listdir( + segmentationsFolder + ) # get all files in each directory + for i in range(len(segmentationFiles)): + if ( + "fused" in segmentationFiles[i] + ): # only perform checks for fusion results + currentLabelFile = os.path.join( + segmentationsFolder, segmentationFiles[i] + ) + returnString = checkBraTSLabels( + dirs, currentLabelFile + ) + if ( + returnString + ): # if there is something present in the return string + problematicSegmentationMessage += returnString + else: + if not ( + "staple" in fusionToRecommend + ): # overwrite the fusion result to recommend if not staple that was fine + fusionToRecommend = currentLabelFile + + if not fusionToRecommend: + errorMessage += problematicSegmentationMessage + if not ( + "staple" in fusionToRecommend + ): # recommend nnunet or deepscan if not staple + if not ("itkvoting" in fusionToRecommend): + if not ("majorityvoting" in fusionToRecommend): + fusionToRecommend = "nnunet_or_deepscan" + else: + fusionToRecommend = "majorityvoting" + else: + fusionToRecommend = "itkvoting" + else: + fusionToRecommend = "staple" + + else: + errorMessage += ( + dirs + ",SegmentationsForQC_folder_is_absent\n" + ) + numberOfProblematicCases += 1 + segmentationsForQCPresent = False + # errorMessage += dirs + ',Label_file_absent,N.A.,N.A.\n' + + if currentSubjectsLabelIsAbsent and segmentationsForQCPresent: + numberOfProblematicCases += 1 + if fusionToRecommend: + errorMessage += dirs + "," + fusionToRecommend + "\n" + else: + errorMessage += ( + dirs + + ",final_seg_absent_and_use_either_nnunet_or_deepscan,N.A.,N.A.\n" + ) + + if numberOfProblematicCases > 0: + # print(errorMessage) + with open(args.outputFile, "a") as the_file: + the_file.write(errorMessage) + sys.exit( + "There were subjects with either missing annotations or where annotations had problematic labels. Please see the recommendation(s) for new initialization in the outputFile: '" + + args.outputFile + + "'" + ) + else: + print("Congratulations, all subjects are fine and ready to train!") + + +if __name__ == "__main__": + if platform.system().lower() == "darwin": + sys.exit("macOS is not supported") + else: + main() diff --git a/src/applications/Utilities/CMakeLists.txt b/src/applications/Utilities/CMakeLists.txt index eed65b4d..49737167 100644 --- a/src/applications/Utilities/CMakeLists.txt +++ b/src/applications/Utilities/CMakeLists.txt @@ -98,7 +98,7 @@ ELSE() SET( PLATFORM_STRING "linux" ) ENDIF() -SET( DOWNLOAD_LINK "ftp://www.nitrc.org/home/groups/captk/downloads/Hausdorff95_${PLATFORM_STRING}.zip" ) +SET( DOWNLOAD_LINK "https://captk.projects.nitrc.org/Hausdorff95_${PLATFORM_STRING}.zip" ) SET( FILENAME_TO_EXTRACT "hausdorff95_${PLATFORM_STRING}") SET( FILE_TO_EXTRACT ${PROJECT_BINARY_DIR}/${FILENAME_TO_EXTRACT}.zip) diff --git a/src/applications/Utilities/HausdorffCLI/Hausdorff95.py b/src/applications/Utilities/HausdorffCLI/Hausdorff95.py index f71c9223..10019efc 100644 --- a/src/applications/Utilities/HausdorffCLI/Hausdorff95.py +++ b/src/applications/Utilities/HausdorffCLI/Hausdorff95.py @@ -7,28 +7,39 @@ import SimpleITK as sitk import pkg_resources -if __name__ == '__main__': - parser = argparse.ArgumentParser(prog='Hausdorff95', formatter_class=argparse.RawTextHelpFormatter, - description='\nThis code is used to get the Hausdorff 95th percentile. '+\ - 'For questions and feedback contact: software@cbica.upenn.edu') - - parser.add_argument('-gt', dest='groundTruth', type=str, - help='The ground truth image for comparison.\n', - required=True) - - parser.add_argument('-m', dest='maskImage', type=str, - help='The annotated mask to compare against ground truth.\n', - required=True) - - # parser.add_argument('-v', '--version', action='version', - # version=pkg_resources.require("Hausdorff95")[0].version, help="Show program's version number and exit.") # disabled because pyinstaller doesn't import it properly - - args = parser.parse_args() - - groundTruth = os.path.abspath(args.groundTruth) - maskImage = os.path.abspath(args.maskImage) - - gt = sitk.GetArrayFromImage(sitk.ReadImage(groundTruth)) - mk = sitk.GetArrayFromImage(sitk.ReadImage(maskImage)) - - print(hd95(gt,mk)) \ No newline at end of file +if __name__ == "__main__": + parser = argparse.ArgumentParser( + prog="Hausdorff95", + formatter_class=argparse.RawTextHelpFormatter, + description="\nThis code is used to get the Hausdorff 95th percentile. " + + "For questions and feedback contact: admin@fets.ai", + ) + + parser.add_argument( + "-gt", + dest="groundTruth", + type=str, + help="The ground truth image for comparison.\n", + required=True, + ) + + parser.add_argument( + "-m", + dest="maskImage", + type=str, + help="The annotated mask to compare against ground truth.\n", + required=True, + ) + + # parser.add_argument('-v', '--version', action='version', + # version=pkg_resources.require("Hausdorff95")[0].version, help="Show program's version number and exit.") # disabled because pyinstaller doesn't import it properly + + args = parser.parse_args() + + groundTruth = os.path.abspath(args.groundTruth) + maskImage = os.path.abspath(args.maskImage) + + gt = sitk.GetArrayFromImage(sitk.ReadImage(groundTruth)) + mk = sitk.GetArrayFromImage(sitk.ReadImage(maskImage)) + + print(hd95(gt, mk)) diff --git a/src/applications/Utilities/HausdorffCLI/setup.py b/src/applications/Utilities/HausdorffCLI/setup.py index d47578da..55098569 100644 --- a/src/applications/Utilities/HausdorffCLI/setup.py +++ b/src/applications/Utilities/HausdorffCLI/setup.py @@ -3,25 +3,22 @@ from setuptools import setup -setup(name='Hausdorff95', - version='1.0.0.Alpha', - description='Get Hausdorff 95 from 2 annotations', - url='https://github.com/sarthakpati/test_hausdorff', - python_requires='>=3.6', - author='Sarthak Pati', - author_email='software@cbica.upenn.edu', - license='BSD-3-Clause', - zip_safe=False, - install_requires=[ - 'MedPy==0.4.0', - 'setuptools>=47', - 'PyInstaller==3.6' - ], - scripts=['Hausdorff95.py'], - classifiers=[ - 'Intended Audience :: Science/Research', - 'Programming Language :: Python', - 'Topic :: Scientific/Engineering', - 'Operating System :: Unix' - ] - ) \ No newline at end of file +setup( + name="Hausdorff95", + version="1.0.0.Alpha", + description="Get Hausdorff 95 from 2 annotations", + url="https://github.com/sarthakpati/test_hausdorff", + python_requires=">=3.6", + author="Sarthak Pati", + author_email="admin@fets.ai", + license="BSD-3-Clause", + zip_safe=False, + install_requires=["MedPy==0.4.0", "setuptools>=47", "PyInstaller==3.6"], + scripts=["Hausdorff95.py"], + classifiers=[ + "Intended Audience :: Science/Research", + "Programming Language :: Python", + "Topic :: Scientific/Engineering", + "Operating System :: Unix", + ], +) diff --git a/src/applications/__init__.py b/src/applications/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/applications/constants.py b/src/applications/constants.py new file mode 100644 index 00000000..2f3109d3 --- /dev/null +++ b/src/applications/constants.py @@ -0,0 +1,43 @@ +# check against all these modality ID strings with extensions +MODALITY_ID_DICT = { + "T1": ["t1", "t1pre", "t1precontrast", "t1n"], + "T1GD": ["t1ce", "t1gd", "t1post", "t1postcontrast", "t1gallodinium", "t1c"], + "T2": ["t2", "t2w"], + "FLAIR": ["flair", "fl", "t2flair", "t2f"], +} +# this is used to keep a mapping between the fets1 nomenclature +MODALITY_ID_MAPPING = { + "T1": "t1n", + "T1GD": "t1c", + "T2": "t2w", + "FLAIR": "t2f", +} +MODALITIES_LIST = list(MODALITY_ID_DICT.keys()) +SUBJECT_NAMES = {"patientid", "subjectid", "subject", "subid"} +TIMEPOINT_NAMES = {"timepoint", "tp", "time", "series", "subseries"} +INPUT_FILENAMES = { + "T1": "T1_to_SRI.nii.gz", + "T1GD": "T1CE_to_SRI.nii.gz", + "T2": "T2_to_SRI.nii.gz", + "FLAIR": "FL_to_SRI.nii.gz", +} + +GANDLF_DF_COLUMNS = ["SubjectID", "Channel_0"] + +INTERIM_FOLDER = "DataForQC" +FINAL_FOLDER = "DataForFeTS" +TUMOR_MASK_FOLDER = "TumorMasksForQC" +TESTING_FOLDER = "testing" +REORIENTED_FOLDER = "reoriented" + +BRAIN_FILENAME = "gandlf_brain_extraction.csv" +TUMOR_FILENAME = "gandlf_tumor_segmentation.csv" +SUBJECTS_FILENAME = "processed_data.csv" +NEG_SUBJECTS_FILENAME = "QC_subjects_with_negative_intensities.csv" +FAIL_SUBJECTS_FILENAME = "QC_subjects_with_bratspipeline_error.csv" +DICOM_ANON_FILENAME = "dicom_tag_information_to_write_anon.yaml" +DICOM_COLLAB_FILENAME = "dicom_tag_information_to_write_collab.yaml" +STDOUT_FILENAME = "preparedataset_stdout.txt" +STDERR_FILENAME = "preparedataset_stderr.txt" + +EXEC_NAME = "BraTSPipeline" diff --git a/src/applications/setup.py b/src/applications/setup.py new file mode 100644 index 00000000..8787e8a6 --- /dev/null +++ b/src/applications/setup.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python + +"""The setup script.""" + +from setuptools import setup, find_packages + +__version__ = "2.0.0" + +requirements = [ + "black", + "FigureGenerator==0.0.4", + "gandlf==0.0.16", + "labelfusion==1.0.14", + "numpy==1.22.0", + "SimpleITK!=2.0.*", + "SimpleITK!=2.2.1", # https://github.com/mlcommons/GaNDLF/issues/536 + "scikit-learn>=0.23.2", + "scikit-image>=0.19.1", + "tqdm", + "setuptools", + "pandas<2.0.0", + "pyyaml", + "pytest", + "pytest-cov", +] + +if __name__ == "__main__": + setup( + name="FeTS_Tool_Helper", + version=__version__, + author="FeTS-AI", + author_email="admin@fets.ai", + python_requires=">=3.8", + packages=find_packages(), + py_modules=[], + entry_points={ + "console_scripts": [ + "sanitycheck=SanityCheck:main", + "preparedataset=PrepareDataset:main", + "createcsvfordicoms=CreateCSVForDICOMs:main", + ], + }, + classifiers=[ + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Science/Research", + "Natural Language :: English", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Topic :: Scientific/Engineering :: Medical Science Apps.", + ], + description=("Helper scripts for the FeTS Tool."), + install_requires=requirements, + license="https://raw.githubusercontent.com/FeTS-AI/Front-End/master/LICENSE", + long_description="Helper scripts for the FeTS Tool", + long_description_content_type="text", + include_package_data=True, + keywords="brain, mri, neuroimaging, machine learning, federated learning", + zip_safe=False, + ) + + +## download various models +import os, posixpath +from pathlib import Path +from zipfile import ZipFile +from urllib.request import urlretrieve + +models_dir = posixpath.join(Path(__file__).parent.resolve(), "data_prep_models") +Path(models_dir).mkdir(parents=True, exist_ok=True) + +urls_for_download = { + "brain_extraction": "https://upenn.box.com/shared/static/cp5xz726mtb6gwwym8ydcxmw52zfngun", + "tumor_segmentation": "https://storage.googleapis.com/medperf-storage/rano_test_assets/tumor_segmentation.zip", # should be changed +} + +for model in urls_for_download.keys(): + if urls_for_download[model] is not None: + zip_file = posixpath.join(models_dir, f"{model}.zip") + if not Path( + posixpath.join(models_dir, model, "model_0", "config.yaml") + ).exists(): + if not Path(zip_file).exists(): + print(f"Downloading {model} models") + url = urls_for_download[model] + urlretrieve(urls_for_download[model], zip_file) + z = ZipFile(zip_file) + z.extractall(models_dir) + z.close() + os.remove(zip_file)