diff --git a/cli/medperf/config.py b/cli/medperf/config.py index a28160b09..42d00818e 100644 --- a/cli/medperf/config.py +++ b/cli/medperf/config.py @@ -92,6 +92,7 @@ cas_folder = "cas" training_events_folder = "training_events" certificates_folder = "certificates" +dashboards_folder = "dashboards" default_base_storage = str(Path.home().resolve() / ".medperf") @@ -164,6 +165,10 @@ "base": default_base_storage, "name": certificates_folder, }, + "dashboards_folder": { + "base": default_base_storage, + "name": dashboards_folder, + }, } root_folders = [ @@ -186,6 +191,7 @@ "cas_folder", "training_events_folder", "certificates_folder", + "dashboards_folder", ] # MedPerf filenames conventions diff --git a/cli/medperf/dashboard/__init__.py b/cli/medperf/dashboard/__init__.py new file mode 100644 index 000000000..09dac6827 --- /dev/null +++ b/cli/medperf/dashboard/__init__.py @@ -0,0 +1 @@ +"""Dashboard package for preparation monitoring.""" diff --git a/scripts/dashboard/medperf_dashboard/__main__.py b/cli/medperf/dashboard/__main__.py similarity index 51% rename from scripts/dashboard/medperf_dashboard/__main__.py rename to cli/medperf/dashboard/__main__.py index f31c63581..9ecade21c 100644 --- a/scripts/dashboard/medperf_dashboard/__main__.py +++ b/cli/medperf/dashboard/__main__.py @@ -2,4 +2,4 @@ initialize() -from .preparation_dashboard import t_app # noqa \ No newline at end of file +from .preparation_dashboard import t_app # noqa diff --git a/scripts/dashboard/medperf_dashboard/get_data.py b/cli/medperf/dashboard/get_data.py similarity index 84% rename from scripts/dashboard/medperf_dashboard/get_data.py rename to cli/medperf/dashboard/get_data.py index 480c981c5..e643f0c64 100644 --- a/scripts/dashboard/medperf_dashboard/get_data.py +++ b/cli/medperf/dashboard/get_data.py @@ -3,14 +3,17 @@ import datetime import numpy as np +from medperf.entities.benchmark import Benchmark from medperf.entities.dataset import Dataset from medperf import config -from .utils import get_institution_from_email, get_reports_path, stage_id2name +from .utils import get_institution_from_email, stage_id2name -def get_dsets(mlcube_id): - dsets = Dataset.all(filters={"mlcube": mlcube_id}) +def get_dsets(benchmark_id): + bmk = Benchmark.get(benchmark_id) + data_preparator = bmk.data_preparation_mlcube + dsets = Dataset.all(filters={"data_preparation_mlcube": data_preparator}) dsets = [dset.todict() for dset in dsets] for dset in dsets: user_id = dset["owner"] @@ -86,10 +89,12 @@ def write_sites(dsets_df, institutions_df, full_path): f.write("\n".join(sites)) -def get_data(mlcube_id, stages_path, institutions_path, out_path): - dsets = get_dsets(mlcube_id) - full_path = get_reports_path(out_path, mlcube_id) - os.makedirs(full_path, exist_ok=True) +def get_data(benchmark_id, stages_path, institutions_path, out_path): + dsets = get_dsets(benchmark_id) + if not dsets: + return + + os.makedirs(out_path, exist_ok=True) institutions_df = pd.read_csv(institutions_path) user2institution = {u: i for i, u in institutions_df.values.tolist()} @@ -97,5 +102,6 @@ def get_data(mlcube_id, stages_path, institutions_path, out_path): stages_df.set_index("Status Code", inplace=True) dsets_df = build_dset_df(dsets, user2institution, stages_df) - write_dsets_df(dsets_df, full_path) - write_sites(dsets_df, institutions_df, full_path) + write_dsets_df(dsets_df, out_path) + write_sites(dsets_df, institutions_df, out_path) + return True diff --git a/scripts/dashboard/medperf_dashboard/preparation_dashboard.py b/cli/medperf/dashboard/preparation_dashboard.py similarity index 79% rename from scripts/dashboard/medperf_dashboard/preparation_dashboard.py rename to cli/medperf/dashboard/preparation_dashboard.py index ef274ae40..de09b6102 100644 --- a/scripts/dashboard/medperf_dashboard/preparation_dashboard.py +++ b/cli/medperf/dashboard/preparation_dashboard.py @@ -5,6 +5,9 @@ import dash_bootstrap_components as dbc import pandas as pd +from medperf import config +from medperf.utils import sanitize_path + from .get_data import get_data from .utils import get_reports_path @@ -231,6 +234,27 @@ def preparation_timeline(stages_colors, stages_df, full_path): ) +def no_data_layout(): + return dbc.Container( + [ + html.H1("Preparation Progress", style={"textAlign": "center"}), + dbc.Alert( + [ + html.H3("No registered datasets", className="alert-heading"), + html.P( + "There are no datasets registered with the data preparator " + "of this benchmark yet.", + className="fs-5", + ), + ], + color="warning", + className="mt-4 text-center", + ), + ], + className="mt-4", + ) + + def get_sites_dicts(sites_path, latest_table): with open(sites_path, "r") as f: sites = f.readlines() @@ -243,9 +267,20 @@ def get_sites_dicts(sites_path, latest_table): return sites_dicts -def build_dash_app(registered_df, stages_colors, latest_table, stages, full_path): +def _build_dash_app( + data_exists, + registered_df, + stages_colors, + latest_table, + stages, + full_path, + prefix, +): + app = Dash( __name__, + title="Preparation Dashboard", + requests_pathname_prefix=prefix, external_stylesheets=[dbc.themes.LUMEN], meta_tags=[ { @@ -255,6 +290,10 @@ def build_dash_app(registered_df, stages_colors, latest_table, stages, full_path ], ) + if not data_exists: + app.layout = no_data_layout() + return app + app.layout = dbc.Container( [ html.H1(children="Preparation Progress", style={"textAlign": "center"}), @@ -267,10 +306,54 @@ def build_dash_app(registered_df, stages_colors, latest_table, stages, full_path return app +def build_app( + benchmark_id, + stages_path, + institutions_path, + out_path=None, + prefix=None, +): + out_path = sanitize_path(out_path) or config.dashboards_folder + full_path = get_reports_path(out_path, benchmark_id) + + data_exists = get_data(benchmark_id, stages_path, institutions_path, full_path) + + registered_df = None + stages_colors = None + latest_table = None + stages = None + + if data_exists: + latest_path = os.path.join(full_path, "latest_table.csv") + latest_table = pd.read_csv(latest_path) + + sites_path = os.path.join(full_path, "sites.txt") + sites_dicts = get_sites_dicts(sites_path, latest_table) + + registered_df = pd.DataFrame(sites_dicts) + registered_df = registered_df.drop_duplicates() + + stages = pd.read_csv(stages_path) + stages_colors = ( + stages[["status_name", "color"]].set_index("status_name").to_dict()["color"] + ) + stages_colors["Unknown"] = "silver" + + return _build_dash_app( + data_exists, + registered_df, + stages_colors, + latest_table, + stages, + full_path, + prefix, + ) + + @t_app.command() def main( - mlcube_id: int = Option( - ..., "-m", "--mlcube", help="MLCube ID to inspect prparation from" + benchmark_id: int = Option( + ..., "-b", "--benchmark", help="Benchmark ID to inspect preparation from" ), stages_path: str = Option(..., "-s", "--stages", help="Path to stages.csv"), institutions_path: str = Option( @@ -283,29 +366,7 @@ def main( None, "-o", "--out-path", help="location to store progress CSVs" ), ): - cur_path = os.path.dirname(__file__) - if out_path is None: - out_path = os.path.join(cur_path, "reports") - - get_data(mlcube_id, stages_path, institutions_path, out_path) - full_path = get_reports_path(out_path, mlcube_id) - - latest_path = os.path.join(full_path, "latest_table.csv") - latest_table = pd.read_csv(latest_path) - - sites_path = os.path.join(full_path, "sites.txt") - sites_dicts = get_sites_dicts(sites_path, latest_table) - - registered_df = pd.DataFrame(sites_dicts) - registered_df = registered_df.drop_duplicates() - - stages = pd.read_csv(stages_path) - stages_colors = ( - stages[["status_name", "color"]].set_index("status_name").to_dict()["color"] - ) - stages_colors["Unknown"] = "silver" - - app = build_dash_app(registered_df, stages_colors, latest_table, stages, full_path) + app = build_app(benchmark_id, stages_path, institutions_path, out_path) app.run_server(debug=True) diff --git a/scripts/dashboard/medperf_dashboard/utils.py b/cli/medperf/dashboard/utils.py similarity index 68% rename from scripts/dashboard/medperf_dashboard/utils.py rename to cli/medperf/dashboard/utils.py index dad81c49b..01598c160 100644 --- a/scripts/dashboard/medperf_dashboard/utils.py +++ b/cli/medperf/dashboard/utils.py @@ -1,7 +1,5 @@ -import re import os - -from medperf import config +from medperf.utils import sanitize_path def stage_id2name(stage_str, stages_df): @@ -23,8 +21,6 @@ def get_institution_from_email(email, user2institution): return plausible_institution -def get_reports_path(out_path, mlcube_id): - server_path = config.server.split("//")[1] - server_path = re.sub(r"[.:]", "_", server_path) - full_path = os.path.join(out_path, server_path, str(mlcube_id)) - return full_path +def get_reports_path(out_path, benchmark_id): + full_path = os.path.join(out_path, str(benchmark_id)) + return sanitize_path(full_path) diff --git a/cli/medperf/web_ui/app.py b/cli/medperf/web_ui/app.py index 581e47ff0..64e9a5630 100644 --- a/cli/medperf/web_ui/app.py +++ b/cli/medperf/web_ui/app.py @@ -53,6 +53,9 @@ def startup_event(): web_app.state.task_running = False web_app.state.MAXLOGMESSAGES = config.webui_max_log_messages + # {benchmark_id: dict} (checks if mounted and files changed) + web_app.state.dashboards = {} + # List of [schemas.Notification] will appear in the notifications tab web_app.state.notifications = [] diff --git a/cli/medperf/web_ui/benchmarks/routes.py b/cli/medperf/web_ui/benchmarks/routes.py index 7e0627dda..fa40e6d15 100644 --- a/cli/medperf/web_ui/benchmarks/routes.py +++ b/cli/medperf/web_ui/benchmarks/routes.py @@ -30,6 +30,8 @@ UpdateAssociationsPolicy, ) +from medperf.web_ui.utils import mount_dashboard + router = APIRouter() logger = logging.getLogger(__name__) @@ -391,3 +393,51 @@ def update_associations_policy( url=f"/benchmarks/ui/display/{benchmark_id}", ) return return_response + + +@router.post("/ui/dashboard", response_class=HTMLResponse) +def preparation_dashboard( + request: Request, + benchmark_id: int = Form(...), + benchmark_name: str = Form(...), + stages: str = Form(...), + institutions: str = Form(...), + force_update: bool = Form(False), + current_user: bool = Depends(check_user_ui), +): + errors = False + error_message = "Failed to load dashboard: " + + benchmark = Benchmark.get(benchmark_id) + is_owner = benchmark.owner == get_medperf_user_data()["id"] + if not is_owner: + errors = True + error_message += "Only the benchmark owner can access the dashboard." + + try: + if not errors: + mount_dashboard(request, benchmark_id, stages, institutions, force_update) + except Exception as exp: + logger.exception(exp) + errors = True + error_message += str(exp) + + if errors: + return templates.TemplateResponse( + "error.html", + { + "request": request, + "exception": error_message, + }, + ) + + return templates.TemplateResponse( + "dashboard_wrapper.html", + { + "request": request, + "mount_point": f"/ui/display/{benchmark_id}/dashboard/app", + "benchmark_id": benchmark_id, + "prev_url": f"/benchmarks/ui/display/{benchmark_id}/", + "benchmark_name": benchmark_name, + }, + ) diff --git a/cli/medperf/web_ui/static/js/benchmarks/benchmark_detail.js b/cli/medperf/web_ui/static/js/benchmarks/benchmark_detail.js index 67946df37..da693afc0 100644 --- a/cli/medperf/web_ui/static/js/benchmarks/benchmark_detail.js +++ b/cli/medperf/web_ui/static/js/benchmarks/benchmark_detail.js @@ -244,4 +244,51 @@ $(document).ready(() => { $("#dataset-auto-approve-mode").trigger("change"); $("#model-auto-approve-mode").trigger("change"); + + const btn = $("#dashboard-btn"); + const form = $("#dashboard-form-wrapper"); + + if(!btn.length || !form.length) + return; + + const collapse = new bootstrap.Collapse(form[0], { + toggle: false + }); + + $("#redirect-dashobard-form").off("submit").on("submit", (e) => { + e.preventDefault(); + + if (!$("#stages-path").val()) { + showErrorToast("Make sure to enter a valid path for the stages file"); + return; + } + + if (!$("#institutions-path").val()) { + showErrorToast("Make sure to enter a valid path for the institutions file"); + return; + } + + e.currentTarget.submit(); + }); + + $("#browse-stages-btn").on("click", () => { + browseWithFiles = true; + browseFolderHandler("stages-path"); + }); + $("#browse-institutions-btn").on("click", () => { + browseWithFiles = true; + browseFolderHandler("institutions-path"); + }); + + btn.on("click", function () { + const icon = $(this).find("i"); + + if (form.hasClass("show")) { + collapse.hide(); + icon.css("transform", "rotate(0deg)"); + } else { + collapse.show(); + icon.css("transform", "rotate(180deg)"); + } + }); }); \ No newline at end of file diff --git a/cli/medperf/web_ui/templates/benchmark/benchmark_detail.html b/cli/medperf/web_ui/templates/benchmark/benchmark_detail.html index 10bd60ddb..3df7ea70f 100644 --- a/cli/medperf/web_ui/templates/benchmark/benchmark_detail.html +++ b/cli/medperf/web_ui/templates/benchmark/benchmark_detail.html @@ -12,14 +12,100 @@ {% block title %}Benchmark Details{% endblock %} {% block detail_panel %} -

{{ entity.name }}

- +{% endif %}
diff --git a/cli/medperf/web_ui/templates/constants/forms_placeholders.html b/cli/medperf/web_ui/templates/constants/forms_placeholders.html index 2e643e8c1..c0ef2f27c 100644 --- a/cli/medperf/web_ui/templates/constants/forms_placeholders.html +++ b/cli/medperf/web_ui/templates/constants/forms_placeholders.html @@ -24,6 +24,9 @@ {% set benchmark_dataset_allow_list_file_input = "/home/user/dataset_allowed_emails_list_file.txt" %} {% set benchmark_model_allow_list_file_input = "/home/user/model_allowed_emails_list_file.txt" %} +{% set dashboard_stages_path = "/home/user/stages.csv" %} +{% set dashboard_institutions_path = "/home/user/institutions.csv" %} + {# Forms placeholders for datasets pages #} {% set register_dataset_name = "Example Dataset" %} {% set register_dataset_description = "Example Description" %} diff --git a/cli/medperf/web_ui/templates/constants/tooltips.html b/cli/medperf/web_ui/templates/constants/tooltips.html index 5dab1fa1e..9db299e9e 100644 --- a/cli/medperf/web_ui/templates/constants/tooltips.html +++ b/cli/medperf/web_ui/templates/constants/tooltips.html @@ -23,6 +23,10 @@ {% set benchmark_dataset_allow_list_file_input = "Path to the emails allowed list file for dataset associations" %} {% set benchmark_model_allow_list_file_input = "Path to the emails allowed list file for model associations" %} +{% set dashboard_stages_path = "Path to the stages csv file" %} +{% set dashboard_institutions_path = "Path to the institutions csv file" %} +{% set dashboard_force_update = "Rebuild the dashboard using the latest data instead of cached results" %} + {# Tooltips for datasets pages #} {% set register_dataset_name = "Name of the dataset you are registering" %} {% set register_dataset_description = "Description of the dataset you are registering" %} diff --git a/cli/medperf/web_ui/templates/dashboard_wrapper.html b/cli/medperf/web_ui/templates/dashboard_wrapper.html new file mode 100644 index 000000000..df494aba2 --- /dev/null +++ b/cli/medperf/web_ui/templates/dashboard_wrapper.html @@ -0,0 +1,51 @@ +{% extends "base.html" %} + +{% block title %}Preparation Dashboard | {{ benchmark_id }}{% endblock %} + +{% block content %} +
+ +
+

{{benchmark_name}} - Dashboard

+
+ +
+{% endblock %} +{% block extra_js %} + +{% endblock %} diff --git a/cli/medperf/web_ui/utils.py b/cli/medperf/web_ui/utils.py index 676ed60c6..5d66d95fa 100644 --- a/cli/medperf/web_ui/utils.py +++ b/cli/medperf/web_ui/utils.py @@ -1,5 +1,9 @@ import uuid +from fastapi import Request from medperf.entities.cube import Cube +from medperf.utils import sanitize_path +from medperf.dashboard.preparation_dashboard import build_app +from starlette.middleware.wsgi import WSGIMiddleware def get_container_type(container: Cube): @@ -18,3 +22,43 @@ def get_container_type(container: Cube): def generate_uuid(): return str(uuid.uuid4()) + + +def mount_dashboard( + request: Request, benchmark_id, stages_path, institutions_path, force_update +): + dashboards = request.app.state.dashboards + stages_path = sanitize_path(stages_path) + institutions_path = sanitize_path(institutions_path) + + dashboard_built = benchmark_id in dashboards + + stages_changed = False + institutions_changed = False + if dashboard_built: + stages_changed = stages_path != dashboards[benchmark_id]["stages_path"] + institutions_changed = ( + institutions_path != dashboards[benchmark_id]["institutions_path"] + ) + + must_build = ( + not dashboard_built or stages_changed or institutions_changed or force_update + ) + + if not must_build: + return + + dashbaord_app = build_app( + benchmark_id, + stages_path, + institutions_path, + prefix=f"/ui/display/{benchmark_id}/dashboard/app/", + ) + request.app.state.dashboards[benchmark_id] = { + "stages_path": stages_path, + "institutions_path": institutions_path, + } + request.app.mount( + f"/ui/display/{benchmark_id}/dashboard/app", + WSGIMiddleware(dashbaord_app.server), + ) diff --git a/cli/requirements.txt b/cli/requirements.txt index a9e853d85..ccf1385e3 100644 --- a/cli/requirements.txt +++ b/cli/requirements.txt @@ -28,3 +28,6 @@ fastapi==0.111.1 fastapi-login==1.10.2 cryptography==46.0.3 click==8.1.8 +dash==2.16 +plotly==5.20 +dash-bootstrap-components==1.5 \ No newline at end of file diff --git a/cli/setup.py b/cli/setup.py index 7a784aed6..efb602d7c 100644 --- a/cli/setup.py +++ b/cli/setup.py @@ -28,6 +28,7 @@ [console_scripts] medperf=medperf.__main__:app medperf_webui=medperf.webui_main:app + medperf_dashboard=medperf.dashboard.__main__:t_app """, package_data={"medperf": package_data}, ) diff --git a/scripts/dashboard/README.md b/scripts/dashboard/README.md deleted file mode 100644 index f3f5b2f16..000000000 --- a/scripts/dashboard/README.md +++ /dev/null @@ -1,26 +0,0 @@ -# Medperf Data Preparation Dashboard - -The medperf data preparation dashboard provides visualization on the usage of a data preparation mlcube and the stages data owners are at. This will hopefully provide insights into how far along the process is going, and wether users are having trouble specific to the execution of the data preparation pipeline. - -## Installation - -To install, execute the following command at this folder: - -``` -pip install -e . -``` - -## How to use - -To use, you need to have a few assets and identifiers beforehand: -- MLCube ID: The ID of the MLCube that is being used as a data preparation MLCube. To be able to see progress, you must be the owner of this MLCube -- Stages File: A `CSV` file that contains the human-readable information of each of the stages that the data preparation MLCube contains. The CSV should have the following columns: `Status Code, status_name, comment, docs_url, color` -- Institutions File: A `CSV` file that maps emails to institutions that are expected to be part of the preparation procedure. The CSV should have the following columns: `institution, email` - -Once all requirements are covered, you can execute the following command: - -``` -medperf-dashboard -m -s -i -``` - -Running this command will fetch the latest reports from the medperf server, and start a local server that will contain the visualization of the progress. To access this server, head to `http://localhost:8050` on your preferred browser. \ No newline at end of file diff --git a/scripts/dashboard/requirements.txt b/scripts/dashboard/requirements.txt deleted file mode 100644 index aeea8be2e..000000000 --- a/scripts/dashboard/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -dash==2.16 -pandas==2.1 -plotly==5.20 -dash-bootstrap-components==1.5 -typer==0.6 diff --git a/scripts/dashboard/setup.py b/scripts/dashboard/setup.py deleted file mode 100644 index d95085380..000000000 --- a/scripts/dashboard/setup.py +++ /dev/null @@ -1,24 +0,0 @@ -from setuptools import setup - -with open("requirements.txt", "r") as f: - requires = [] - for line in f: - req = line.split("#", 1)[0].strip() - if req and not req.startswith("--"): - requires.append(req) - -setup( - name="medperf-dashboard", - version="0.0.0", - description="TUI for monitoring medperf datasets", - url="https://github.com/mlcommons/medperf", - author="MLCommons", - license="Apache 2.0", - packages=["medperf_dashboard"], - install_requires=requires, - python_requires=">=3.6", - entry_points=""" - [console_scripts] - medperf-dashboard=medperf_dashboard.__main__:t_app - """, -)