From 7429709544ac64b74223230850d12aaf16fd6257 Mon Sep 17 00:00:00 2001 From: Evan Ng Date: Wed, 26 Nov 2025 17:43:45 -0500 Subject: [PATCH 01/17] Updated docs for pediatrics --- src/tabs/data_pre_processing.py | 15 +++++++++++++++ tables/Acoustic_Tasks_Protocol.csv | 18 ++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/src/tabs/data_pre_processing.py b/src/tabs/data_pre_processing.py index 35b9f0e..8e1f8e3 100644 --- a/src/tabs/data_pre_processing.py +++ b/src/tabs/data_pre_processing.py @@ -28,16 +28,31 @@ def data_pre_processing_page(tab_name): ``` **Speech tasks included** + - ABC's - Animal fluency - Cape V sentences - Caterpillar Passage + - Caterpillar Passage (Pediatrics) - Cinderella Story + - Counting - Diadochokinesis + - Favorite Foods + - Favorite Show/Movies + - Identifying Pictures + - Months + - Naming Animals + - Naming Foods + - Outside of School - Picture description + - Picture Description (Pediatrics) - Productive Vocabulary - Prolonged vowel - Rainbow Passage - Random Item Generation + - Ready For School + - Repeat Words + - Repeat Sentences + - Role Naming - Story recall - Word-color Stroop diff --git a/tables/Acoustic_Tasks_Protocol.csv b/tables/Acoustic_Tasks_Protocol.csv index d1181a0..254cced 100644 --- a/tables/Acoustic_Tasks_Protocol.csv +++ b/tables/Acoustic_Tasks_Protocol.csv @@ -21,3 +21,21 @@ Speech,Word-Color Stroop,color descriptions,Neuro Speech,Productive Vocabulary,describing images,Neuro Speech,Random Item generation,describing images,Neuro Speech,Cinderella Story,story,Neuro +Speech,ABC’s,Recalling alphabet,Peds +Speech,Ready For School,Recalling a typical day preparing for School,Peds +Speech,Favorite Show,Recalling favorite shows,Peds +Speech,Favorite Food,Describing their favorite food,Peds +Speech,Outside of School,After school activities description,Peds +Speech,Months,Listing the months,Peds +Speech,Counting,Counting,Peds +Speech,Naming Animals,Listing animals,Peds +Speech,Naming Food,Listing foods,Peds +Speech,Identifying Pictures,Picture identification,Peds +Speech,Picture Description (Pediatrics),describing a picture,Peds +Voice/Non Speech,Long Sounds,Sustained /ee/ and /ah/ sounds,Peds +Voice/Non Speech,Noisy Soubds,/jj/ /ah/ /ee/ /oo/ /sh/ /ss/ /muh/ /nuh/ /zz/ /hh/,Peds +Speech,Caterpillar Passage (Pediatrics),validated passage,Peds +Speech,Repeat Words,Word repetition,Peds +Speech,Role naming,"recalling days, months, and counting from 60 – 70",Peds +Speech,Repeat Sentences,Sentence Recall,Peds +Voice/Non Speech,Silly Sounds,/PUH/ /TUH/ /KUH/ /PUH TUH KUH/,Peds From 5b9a004883186b293ac8c3b7cf949ab66ee4aea9 Mon Sep 17 00:00:00 2001 From: Evan Ng Date: Wed, 26 Nov 2025 23:36:35 -0500 Subject: [PATCH 02/17] Updated docs for validated questionnaires --- tables/Validated_Questionnaires.csv | 30 ++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/tables/Validated_Questionnaires.csv b/tables/Validated_Questionnaires.csv index afffc41..7c311bf 100644 --- a/tables/Validated_Questionnaires.csv +++ b/tables/Validated_Questionnaires.csv @@ -1,13 +1,17 @@ -Validated Questionnaire,Voice Disorders,Respiratory,Mood/Psychiatric,Neurological,Controls -Voice Handicap Index-10 (VHI-10),X,X,X,X,X -Patient Health Questionnaire (PHQ-9),X,X,X,X,X -General Anxiety Disorder (GAD-7),X,X,X,X,X -Positive and Negative Affect Schedule (PANAS),,,X,,X -Custom Affect scale,,,X,,X -Post-Traumatic Stress Disorder Test (PTSD) Adult,,,X,,X -Attention Deficit and Hyperactivity Disorder Questionnaire (ADHD-Adult),,,X,,X -The Diagnostic and Statistical Manual of Mental Disorders (DSM-5 Adult),,,X,,X -Dyspnea Index (DI),,X,,,X -Leicester Cough Questionnaire (LCQ),,X,,,X -Winograd Questionnaire,,,,X,X -Montreal Cognitive Assessment (MOCA)*,,,,X,X +Validated Questionnaire,Voice Disorders,Respiratory,Mood/Psychiatric,Neurological,Controls,Pediatrics +Voice Handicap Index-10 (VHI-10),X,X,X,X,X, +Patient Health Questionnaire (PHQ-9),X,X,X,X,X, +General Anxiety Disorder (GAD-7),X,X,X,X,X, +Positive and Negative Affect Schedule (PANAS),,,X,,X, +Custom Affect scale,,,X,,X, +Post-Traumatic Stress Disorder Test (PTSD) Adult,,,X,,X, +Attention Deficit and Hyperactivity Disorder Questionnaire (ADHD-Adult),,,X,,X, +The Diagnostic and Statistical Manual of Mental Disorders (DSM-5 Adult),,,X,,X, +Dyspnea Index (DI),,X,,,X, +Leicester Cough Questionnaire (LCQ),,X,,,X, +Winograd Questionnaire,,,,X,X, +Montreal Cognitive Assessment (MOCA)*,,,,X,X, +Children's Voice Handicap Index-10 (C-VHI-10),,,,,,X +Pediatric Voice Outcomes Survey (PVOS),,,,,,X +Pediatric Voice-Related Quality-of-Life (PVRQOL),,,,,,X +Patient Health Questionnaire modified for Adolescents (PHQ-A),,,,,,X From 7be054d241d6099f987a4b9f10b3395ac964609a Mon Sep 17 00:00:00 2001 From: Evan Ng Date: Mon, 1 Dec 2025 12:19:14 -0500 Subject: [PATCH 03/17] Added info about pediatric data collection and processing --- src/tabs/collection_methods.py | 8 ++++++-- src/tabs/data_pre_processing.py | 9 +++++++-- src/tabs/study_metadata.py | 2 +- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/tabs/collection_methods.py b/src/tabs/collection_methods.py index c1a66ee..72aafec 100644 --- a/src/tabs/collection_methods.py +++ b/src/tabs/collection_methods.py @@ -6,17 +6,21 @@ def collection_methods_page(tab_name): st.markdown( """ - Data is collected across five disease categories. Initial data release contains data collected from four of five categories (pediatric data to be incorporated in subsequent dataset releases. + Data is collected across five disease categories. Initial data release contains data collected from four of five categories. Participants are recruited across different academic institutions from “high volume expert clinics” based on diagnosis and inclusion/exclusion criteria outlined below **(Table 1)**. + **Pediatric Participants:** Pediatric Participants are recruited strictly from the Hospital for Sick Children (Sickkids) and are based on different age groups. + **High Volume Expert Clinics:** Outpatient clinics within hospital systems or academic institutions that have developed an expertise in a specific disease area and see more than 50 patients per month from the same disease category. Ex: Asthma/COPD pulmonary specialty clinic. Data is collected in the clinic with the assistance of a trained researched assistant. Future data collection will also occur remotely, however remote data collection did not occur with initial dataset being released. Voice samples are collected prospectively using a custom software application (Bridge2AI-Voice app) with the Bridge2AI-Voice protocols. + + For Pediatrics, all data is collected using [reproschema-ui](https://repronim.org/reproschema-ui/) with the Bridge2AI-Voice pediatic protocol. **Clinical validation:** Clinical validation is performed by qualified physician or practitioner based on established gold standards for diagnosis **(Table 1)**. - **Acoustic Tasks:** Voice, breathing, cough, and speech data are recorded with the app. A total of 22 acoustic Tasks are recorded through the app **(Table 2)**. + **Acoustic Tasks:** Voice, breathing, cough, and speech data are recorded with the app for adults, and with reproschema-ui or for pediatrics. A total of 22 acoustic Tasks are recorded through the app **(Table 2)**. **Demographic surveys and confounders:** Detailed demographic data and surveys about confounding factors such as smoking and drinking history is collected through the smartphone application. diff --git a/src/tabs/data_pre_processing.py b/src/tabs/data_pre_processing.py index 8e1f8e3..4dde97a 100644 --- a/src/tabs/data_pre_processing.py +++ b/src/tabs/data_pre_processing.py @@ -3,7 +3,11 @@ def data_pre_processing_page(tab_name): st.markdown( """ - The raw audio files and the questionnaire data exported from REDCap were converted to follow the [Brain Imaging Data Structure v1.9.0](https://bids-specification.readthedocs.io/en/v1.9.0/). The folder structure for the dataset is as follows: + The raw audio files and the questionnaire data retrieved from reproschema-ui or exported from REDCap were converted to follow the [Brain Imaging Data Structure v1.9.0](https://bids-specification.readthedocs.io/en/v1.9.0/). + + **Pediatric data:** Data captured for pediatrics using reproschema-ui is extracted and converted to REDCap before being finally converted to the Brain Imaging Data Structure. + + The folder structure for the dataset is as follows: ``` b2ai-voice-audio @@ -74,7 +78,7 @@ def data_pre_processing_page(tab_name): **Methods of De-identification for v2.0.0** All direct identifiers were removed, as these would reveal the identity of the research participant. These include name, civic address, and social security numbers. Indirect identifiers were removed where these created a significant risk of causing participant re-identification, for example through their combination with other public data available on social media, in government registries, or elsewhere. These include select geographic or demographic identifiers, as well as some information about household composition or cultural identity. Non-identifying elements of data that revealed highly sensitive information, such as information about household income, mental health status, traumatic life experiences, and the like were also removed. - Raw audio transcripts were reviewed and any audio recordings which contained potentially identifying information were removed from the release. + Raw audio transcripts were reviewed and any audio recordings which contained potentially identifying information and external voices were removed from the release. All sensitive fields are removed from the dataset at this stage. These correspond to data elements encoded as sensitive (Column name: "Identifier?") listed in the [RedCap data dictionary (CSV)](https://github.com/eipm/bridge2ai-redcap/blob/main/data/bridge2ai_voice_project_data_dictionary.csv). @@ -84,6 +88,7 @@ def data_pre_processing_page(tab_name): - Generate missingness tables - Check distributions and outliers - For categorical responses, check against schema + - For audio tasks, run quality control metrics - For waveforms: - Check amount of silence - Duration diff --git a/src/tabs/study_metadata.py b/src/tabs/study_metadata.py index 7d21927..fc2763b 100644 --- a/src/tabs/study_metadata.py +++ b/src/tabs/study_metadata.py @@ -46,7 +46,7 @@ def study_metadata_page(tab_name): No Minimum Age
- 18 years (this will change when pediatric cohort is introduced, and metadata will be updated to reflect new eligibility criteria) + 2 - 17 for pediatric participants and 18+ for adult cohorts. Maximum Age
120 years From 4d46cafbbd47a42493ef0521562c68703d5ae041 Mon Sep 17 00:00:00 2001 From: Evan Ng Date: Wed, 3 Dec 2025 11:40:34 -0500 Subject: [PATCH 04/17] Updated pre-processing docs --- src/tabs/data_pre_processing.py | 8 ++------ src/tabs/overview.py | 3 +-- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/src/tabs/data_pre_processing.py b/src/tabs/data_pre_processing.py index 4dde97a..fc55a86 100644 --- a/src/tabs/data_pre_processing.py +++ b/src/tabs/data_pre_processing.py @@ -3,9 +3,9 @@ def data_pre_processing_page(tab_name): st.markdown( """ - The raw audio files and the questionnaire data retrieved from reproschema-ui or exported from REDCap were converted to follow the [Brain Imaging Data Structure v1.9.0](https://bids-specification.readthedocs.io/en/v1.9.0/). + The raw audio files and the questionnaire data retrieved from ReproSchema-UI or exported from REDCap were converted to comply with the [Brain Imaging Data Structure v1.9.0](https://bids-specification.readthedocs.io/en/v1.9.0/). - **Pediatric data:** Data captured for pediatrics using reproschema-ui is extracted and converted to REDCap before being finally converted to the Brain Imaging Data Structure. + **Pediatric data:** Pediatric data collected through ReproSchema-UI is extracted and transformed into REDCap format, and subsequently converted to the Brain Imaging Data Structure (BIDS). The folder structure for the dataset is as follows: @@ -15,8 +15,6 @@ def data_pre_processing_page(tab_name): ├── CHANGES.md ├── README.md ├── dataset_description.json - ├── participants.json - ├── participants.tsv ├── phenotype │   ├── .json │   └── .tsv @@ -73,8 +71,6 @@ def data_pre_processing_page(tab_name): 1. A fixed feature format that includes static features extracted from the entire waveform 2. A temporal format that varies for each audio file depending on the length of recording. - The questionnaire features are combined into a single table (phenotype.tsv). This can be used for cohort selection. - **Methods of De-identification for v2.0.0** All direct identifiers were removed, as these would reveal the identity of the research participant. These include name, civic address, and social security numbers. Indirect identifiers were removed where these created a significant risk of causing participant re-identification, for example through their combination with other public data available on social media, in government registries, or elsewhere. These include select geographic or demographic identifiers, as well as some information about household composition or cultural identity. Non-identifying elements of data that revealed highly sensitive information, such as information about household income, mental health status, traumatic life experiences, and the like were also removed. diff --git a/src/tabs/overview.py b/src/tabs/overview.py index d535ef7..27108d8 100644 --- a/src/tabs/overview.py +++ b/src/tabs/overview.py @@ -38,8 +38,7 @@ def overview_page(tab_name): - Respiratory disorders - Pediatric Voice and Speech Disorders - **Please Note:** The public data releases do not contain pediatric data. It also does not contain an equal distribution - of these categories of diseases. Further releases will contain additional data. + **Please Note:** The public data releases do contain an equal distribution of these categories of diseases. Further releases will contain additional data. """ ) From b179f088dd14d7f7f49c82579cc4d7222c48d0f5 Mon Sep 17 00:00:00 2001 From: Evan Ng Date: Wed, 3 Dec 2025 13:48:48 -0500 Subject: [PATCH 05/17] Updated docs --- src/tabs/data_pre_processing.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/tabs/data_pre_processing.py b/src/tabs/data_pre_processing.py index fc55a86..2c5af29 100644 --- a/src/tabs/data_pre_processing.py +++ b/src/tabs/data_pre_processing.py @@ -71,6 +71,8 @@ def data_pre_processing_page(tab_name): 1. A fixed feature format that includes static features extracted from the entire waveform 2. A temporal format that varies for each audio file depending on the length of recording. + The questionnaire features are combined into a single table (phenotype.tsv). This can be used for cohort selection. + **Methods of De-identification for v2.0.0** All direct identifiers were removed, as these would reveal the identity of the research participant. These include name, civic address, and social security numbers. Indirect identifiers were removed where these created a significant risk of causing participant re-identification, for example through their combination with other public data available on social media, in government registries, or elsewhere. These include select geographic or demographic identifiers, as well as some information about household composition or cultural identity. Non-identifying elements of data that revealed highly sensitive information, such as information about household income, mental health status, traumatic life experiences, and the like were also removed. From 98b901d74278a200749c785cfed1dfcd146aea96 Mon Sep 17 00:00:00 2001 From: Evan Ng Date: Tue, 9 Dec 2025 14:58:15 -0500 Subject: [PATCH 06/17] fixed typo --- tables/Acoustic_Tasks_Protocol.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tables/Acoustic_Tasks_Protocol.csv b/tables/Acoustic_Tasks_Protocol.csv index 254cced..f1c55f2 100644 --- a/tables/Acoustic_Tasks_Protocol.csv +++ b/tables/Acoustic_Tasks_Protocol.csv @@ -33,7 +33,7 @@ Speech,Naming Food,Listing foods,Peds Speech,Identifying Pictures,Picture identification,Peds Speech,Picture Description (Pediatrics),describing a picture,Peds Voice/Non Speech,Long Sounds,Sustained /ee/ and /ah/ sounds,Peds -Voice/Non Speech,Noisy Soubds,/jj/ /ah/ /ee/ /oo/ /sh/ /ss/ /muh/ /nuh/ /zz/ /hh/,Peds +Voice/Non Speech,Noisy Sounds,/jj/ /ah/ /ee/ /oo/ /sh/ /ss/ /muh/ /nuh/ /zz/ /hh/,Peds Speech,Caterpillar Passage (Pediatrics),validated passage,Peds Speech,Repeat Words,Word repetition,Peds Speech,Role naming,"recalling days, months, and counting from 60 – 70",Peds From ce2a14bf7177316a4d029bb02edaa1da09f9074e Mon Sep 17 00:00:00 2001 From: Evan Ng Date: Tue, 9 Dec 2025 15:59:12 -0500 Subject: [PATCH 07/17] Added pediatric dashboard --- src/dashboard.py | 3 + .../dashboard_data/peds_dashboard_data.json | 61 ++++ src/tabs/study_dashboard_peds.py | 300 ++++++++++++++++++ src/tabs/utils.py | 12 + 4 files changed, 376 insertions(+) create mode 100644 src/tabs/dashboard_data/peds_dashboard_data.json create mode 100644 src/tabs/study_dashboard_peds.py diff --git a/src/dashboard.py b/src/dashboard.py index 3bd0015..2c3a026 100644 --- a/src/dashboard.py +++ b/src/dashboard.py @@ -4,6 +4,7 @@ from tabs.collection_methods import collection_methods_page from tabs.data_governance import data_governance_page from tabs.study_dashboard import study_dashboard_page +from tabs.study_dashboard_peds import study_dashboard_page_peds from tabs.study_metadata import study_metadata_page from tabs.healthsheet import healthsheet_page from tabs.data_pre_processing import data_pre_processing_page @@ -55,6 +56,7 @@ def main(): # collections_methods_page() is defined in tabs/collections_methods.py # data_governance_page() is defined in tabs/data_governance.py # study_dashboard_page() is defined in tabs/study_dashboard.py + # study_dashboard_peds_page() is defined in tabs/peds_study_dashboard.py # study_metadata_page() is defined in tabs/study_metadata.py # healthsheet_page() is defined in tabs/healthsheet.py # data_pre_processing_page() is defined in tabs/data_pre_processing.py @@ -69,6 +71,7 @@ def main(): "Collection Methods": collection_methods_page, "Data Governance": data_governance_page, "Study Dashboard": study_dashboard_page, + "Pediatric Study Dashboard": study_dashboard_page_peds, "Study Metadata": study_metadata_page, "Healthsheet": healthsheet_page, "Data Pre-Processing": data_pre_processing_page, diff --git a/src/tabs/dashboard_data/peds_dashboard_data.json b/src/tabs/dashboard_data/peds_dashboard_data.json new file mode 100644 index 0000000..0a6c384 --- /dev/null +++ b/src/tabs/dashboard_data/peds_dashboard_data.json @@ -0,0 +1,61 @@ +{ + "number_of_participants": 300, + "number_of_recordings": 28896, + "gender_identity": { + "Female gender identity": 159, + "Male gender identity": 140, + "Prefer not to answer": 1 + }, + "race": { + "Asian": 87, + "Black or African American": 16, + "White": 119, + "Canadian Indigenous or Aboriginal": 4, + "Other": 40, + "Prefer not to answer": 10, + "Multiple Races": 21 + }, + "age_groups": { + "2-4": 0, + "4-5": 2, + "6-9": 91, + "10-17": 207 + }, + "primary_language": { + "English": 278, + "Spanish": 2, + "Arabic": 2, + "French": 2, + "Mandarin": 5, + "Other": 11 + }, + "total_hours_of_recordings": 38.608125936003276, + "questionnaire_collected": { + "Q - Pediatric - Generic - Demographics": 300, + "Q - Pediatric - Generic - VHI-10": 300, + "Q - Pediatric - Generic - Voice Outcome Survey": 300, + "Q - Pediatric - Generic - Voice Related QoL Survey": 300, + "Q - Pediatric - Generic PHQ-A": 300, + "Q - Pediatric - Generic Medical Conditions": 300 + }, + "acoustic_task_collected": { + "ABC's - peds": 0, + "Ready For School - peds": 298, + "Favorite Show - peds": 298, + "Favorite Food - peds": 298, + "Outside of School - peds": 298, + "Months - peds": 2, + "Counting - peds": 2, + "Naming Animals - peds": 208, + "Naming Food - peds": 208, + "Identifying Pictures - peds": 300, + "Picture Description - peds": 299, + "Long Sounds - peds": 208, + "Noisy Sounds - peds": 92, + "Caterpillar Passage - peds": 298, + "Repeat Words - peds": 300, + "Role Naming - peds": 90, + "Senteces - peds": 299, + "Silly Sounds - peds": 296 + } +} \ No newline at end of file diff --git a/src/tabs/study_dashboard_peds.py b/src/tabs/study_dashboard_peds.py new file mode 100644 index 0000000..2703978 --- /dev/null +++ b/src/tabs/study_dashboard_peds.py @@ -0,0 +1,300 @@ +import streamlit as st +import pandas as pd +import plotly.express as px + +from tabs.utils import load_peds_data + +def get_data(json_data, tag, name_mapping=None): + data = json_data.get(tag) + names = list(data.keys()) + values = list(data.values()) + if name_mapping: + new_names = [name_mapping.get(name, name) for name in names] + return new_names, values + return names, values + +def create_pie_chart(names, values, title, props={'height': 400, 'color_discrete_sequence': px.colors.qualitative.D3, 'y': -0.3, 'entry_width': 0.5, 'font_size': 11}): + fig = px.pie( + names=names, values=values, + category_orders={'names': names}, + color_discrete_sequence=props['color_discrete_sequence'], + hole=0.5) # donut chart + + title_setting = { + 'text': title, + 'font': { + 'size': 16, + 'color': 'black', + 'family': 'Source Sans Pro, sans-serif' + }, + 'x': 0.005, + 'y': 0.99, + 'xanchor': 'left', + 'yanchor': 'top', + } + + fig.update_layout( + autosize=True, + showlegend=True, + paper_bgcolor='white', + plot_bgcolor='white', + margin=dict(l=0, r=0, t=30, b=0), + height=props['height'], + title=title_setting, + legend=dict( + x=0, + y=props['y'], + xanchor='left', + yanchor='bottom', + orientation='h', + traceorder='normal', + font=dict(size=props['font_size'], color="black",family='Source Sans Pro, sans-serif', lineposition='none'), + itemwidth=30, + itemsizing='trace', + valign='top', + entrywidthmode='fraction', + entrywidth=props['entry_width'], + indentation= -5, + tracegroupgap=0 + ) + ) + + fig.update_traces( + marker=dict(line=dict(color='black', width=0.5)), + textposition='inside', + textfont=dict(family='Source Sans Pro, sans-serif'), + texttemplate="%{value}
(%{percent:.2%})", + hovertemplate="%{label}
%{percent:.2%}(%{value})", + domain=dict(x=[0, 1], y=[0, 1]) + ) + + return fig + +def get_asis_chart_property(text, font_size=11): + return { + 'title': { + 'text': text, + 'font': { + 'size': font_size, + 'color': 'black', + 'family': 'Source Sans Pro, sans-serif' + } + }, + 'tickfont': { + 'size': font_size, + 'color': 'black', + 'family': 'Source Sans Pro, sans-serif' + }, + 'showgrid': True, + 'gridcolor': 'lightgray', + } + +def create_bar_chart(names, values, title, props={'height': 400, 'individual_color': False, 'color_discrete_sequence': px.colors.qualitative.D3, 'orientation':'v', 'x': '', 'y': '', 'font_size': 11}): + fig = px.bar( + x=values if props['orientation'] == 'h' else names, + y=names if props['orientation'] == 'h' else values, + orientation=f"{props['orientation']}", + color=names if props['individual_color'] else None, + color_discrete_sequence=props['color_discrete_sequence']) + + fig.update_layout( + xaxis=get_asis_chart_property(props['x'], props['font_size']), + yaxis=get_asis_chart_property(props['y'], props['font_size']), + autosize=True, + showlegend=False, + paper_bgcolor='white', + plot_bgcolor='white', + height=props['height'], + margin=dict(l=5, r=5, t=30, b=5), + title={ + 'text': title, + 'font': { + 'size': 16, + 'color': 'black', + 'family': 'Source Sans Pro, sans-serif' + }, + 'x': 0.001, + 'y': 0.99, + 'xanchor': 'left', + 'yanchor': 'top', + } + ) + fig.update_traces( + marker=dict(line=dict(color='black', width=0.5)), + textposition='auto', + textfont=dict(family='Source Sans Pro, sans-serif'), + textangle=0, + texttemplate="%{x}", + hovertemplate='%{y}
%{x}', + ) + + return fig + +def getPlotlyConfig(): + return { + 'displayModeBar': True, + 'displaylogo': False, + 'modeBarButtonsToRemove': [ + 'zoom2d', 'pan2d', 'select2d', 'lasso2d', 'zoomIn2d', 'zoomOut2d', + 'autoScale2d', 'resetScale2d' + ] + } + +def create_plots(data, plots, cols_per_row=4): + num_plots = len(plots) + rows = (num_plots + cols_per_row - 1) // cols_per_row # Calculate number of rows needed + + for row in range(rows): + cols = st.columns(cols_per_row, gap="small", vertical_alignment="top") + for col_index in range(cols_per_row): + plot_index = row * cols_per_row + col_index + if plot_index < num_plots: + plot = plots[plot_index] + key, title, chart_type, plot_props = plot[:4] + # Optional name mapping for charts + name_mapping = plot[4] if len(plot) == 5 else None + + if key and title and chart_type: + labels, values = get_data(data, key, name_mapping) + + if num_plots <= rows*cols_per_row+col_index: + if chart_type is None: + cols[col_index].empty() + else: + if chart_type == 'pie': + fig = create_pie_chart(labels, values, title, plot_props) + elif chart_type == 'horizontal_bar': + fig = create_bar_chart(labels, values, title, plot_props) + + elif chart_type == 'vertical_bar': + fig = create_bar_chart(labels, values, title, plot_props) + cols[col_index].plotly_chart(fig, use_container_width=True, config=getPlotlyConfig()) + else: + cols[col_index].empty() + +def overview_section(data): + number_of_participants = data.get('number_of_participants') + number_of_recordings = data.get('number_of_recordings') + total_hours_of_recordings = data.get('total_hours_of_recordings') + total_questionnaire_collected = pd.json_normalize(data.get('questionnaire_collected')).values.sum() + total_acoustic_task_collected = pd.json_normalize(data.get('acoustic_task_collected')).values.sum() + + if isinstance(total_hours_of_recordings, float): + total_hours_of_recordings = round(total_hours_of_recordings, 2) + + cards = [ + ("Number of Participants", number_of_participants), + ("Number of Recordings", number_of_recordings), + ("Total of Questionnaires", total_questionnaire_collected), + ("Total of Acoustic Tasks", total_acoustic_task_collected), + ("Total Hours of Recordings", total_hours_of_recordings) + ] + + # Create a 5-column layout for the metrics + columns = st.columns([1, 1, 1, 1, 1]) + for i, col in enumerate(columns): + name, value = cards[i] + if name is not None and value is not None: + col.metric(name, value) + else: + col.empty() + +def data_collection_section(data, collected_data): + columns = st.columns([2,2,1]) + # Define columns for questionnaires and acoustic tasks + column_configs = { + 'questionnaire_collected': { + "name": st.column_config.TextColumn( + "Questionnaire", + width="large" + ), + "value": st.column_config.TextColumn( + "Count", + width="small" + ) + }, + 'acoustic_task_collected': { + "name": st.column_config.TextColumn( + "Acoustic Task", + width="large" + ), + "value": st.column_config.TextColumn( + "Count", + width="small" + ) + } + } + + for index, (key, title) in enumerate(collected_data): + names, values = get_data(data, key) + df = pd.DataFrame({"name": names, "value": values}) + with columns[index]: + st.dataframe(df,column_config = column_configs[key], hide_index=True) + +def study_dashboard_page_peds(tab_name): + data = load_peds_data() + if not data: + st.write("No data available") + return + + # Pre-defined colors for plots + colors = [ + '#D21AE8', + '#63A9FF', + '#FF7820', + '#FF4121', + '#18ED84', + '#FCF500', + '#7E04E9', + '#F109AE', + '#0FB6B5', + '#1D8AD7' + ] + + # Demographic plots + # params: key, title, chart_type, props, name_mapping + # key: key in the JSON object + # title: title of the chart + # chart_type: type of the chart (pie, horizontal_bar, vertical_bar, table) + # props: plot properties + # name_mapping: mapping of names to be displayed in the chart if needed + demographic_plots = [ + ('gender_identity', 'Gender Identity', 'pie', {'height': 450, 'color_discrete_sequence': colors, 'y': -0.26, 'entry_width': 0.5, 'font_size': 11}, {'Female gender identity': 'Female', 'Male gender identity': 'Male', 'Non-binary or genderqueer gender identity': 'Non-binary/genderqueer'}), + ('race', 'Race', 'horizontal_bar', {'height': 450, 'individual_color': True, 'color_discrete_sequence': colors, 'orientation':'h', 'x': 'Count', 'y': 'Race Categories', 'font_size': 11}, {'American Indian or Alaska Native': 'American Indian/Alaska Native', 'Native Hawaiian or other Pacific Islander': 'Native Hawaiian/other Pacific Islander', 'Canadian Indigenous or Aboriginal': 'Canadian Indigenous/Aboriginal'}), + ('primary_language', 'Primary Language', 'pie', {'height': 450, 'color_discrete_sequence': colors, 'y': -0.26, 'entry_width': .15, 'font_size': 11}), + ('age_groups','Age', 'horizontal_bar', {'height': 450, 'individual_color': False, 'color_discrete_sequence': colors, 'orientation':'h', 'x': 'Number of Participants', 'y': 'Age Groups', 'font_size': 11}, {'90 and above': '90 and
above'}) + ] + + # Disorder plots + # params: key, title, chart_type, props, name_mapping + # key: key in the JSON object + # title: title of the chart + # chart_type: type of the chart (pie, horizontal_bar, vertical_bar, table) + # props: plot properties + # name_mapping: mapping of names to be displayed in the chart if needed + + + # Data collection plots + collected_data_plots = [ + ('questionnaire_collected', 'Questionnaire Collection', 'horizontal_bar', {'height': 600, 'individual_color': False, 'color_discrete_sequence': colors, 'orientation':'h', 'x': 'Count', 'y': 'Questionnaire Categories', 'font_size': 11}), + ('acoustic_task_collected', 'Acoustic Task Collection', 'horizontal_bar', {'height': 600, 'individual_color': False, 'color_discrete_sequence': colors, 'orientation':'h', 'x': 'Count', 'y': 'Acoustic Task Categories', 'font_size': 11}), + ] + + # Overview Section + st.subheader("Overview") + overview_section(data) + + # Disorders Section + st.subheader("Diagnostic Breakdown") + + + # Demographic Section + st.subheader("Demographic Breakdown") + # Create the demographic plots + # params: data, plots, cols_per_row + create_plots(data, demographic_plots, 3) + + # Data Collection Section + #params: data, plots, cols_per_row + st.subheader("Data Collection") + create_plots(data, collected_data_plots, 2) \ No newline at end of file diff --git a/src/tabs/utils.py b/src/tabs/utils.py index c910e72..e5ece8d 100644 --- a/src/tabs/utils.py +++ b/src/tabs/utils.py @@ -91,6 +91,18 @@ def load_data(): # Read the JSON object from the file file_path = os.path.join('src', 'tabs', 'dashboard_data', 'dashboard_data.json') # Check if the file path is valid and the file exists + if not os.path.isfile(file_path): + st.warning(f"The file at path {file_path} does not exist. Data is unavailable.") + return {} + with open(file_path, 'r') as json_file: + data = json.load(json_file) + return data + + +def load_peds_data(): + # Read the JSON object from the file + file_path = os.path.join('src', 'tabs', 'dashboard_data', 'peds_dashboard_data.json') + # Check if the file path is valid and the file exists if not os.path.isfile(file_path): st.warning(f"The file at path {file_path} does not exist. Data is unavailable.") return {} From 46d446c622d7cd43d7a968e2f876155dc299ba5c Mon Sep 17 00:00:00 2001 From: Evan Ng Date: Tue, 9 Dec 2025 18:24:28 -0500 Subject: [PATCH 08/17] Updated metrics for race --- src/tabs/dashboard_data/peds_dashboard_data.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/tabs/dashboard_data/peds_dashboard_data.json b/src/tabs/dashboard_data/peds_dashboard_data.json index 0a6c384..13f8bae 100644 --- a/src/tabs/dashboard_data/peds_dashboard_data.json +++ b/src/tabs/dashboard_data/peds_dashboard_data.json @@ -7,13 +7,13 @@ "Prefer not to answer": 1 }, "race": { - "Asian": 87, + "Asian": 83, "Black or African American": 16, - "White": 119, + "White": 116, "Canadian Indigenous or Aboriginal": 4, "Other": 40, - "Prefer not to answer": 10, - "Multiple Races": 21 + "Prefer not to answer": 9, + "Multiple Races": 28 }, "age_groups": { "2-4": 0, From e01e7828179bf09a5cfa6c6e6d50cffc45127f62 Mon Sep 17 00:00:00 2001 From: Jordan Wilke Date: Mon, 15 Dec 2025 11:34:37 -0500 Subject: [PATCH 09/17] Added more data preprocessing information --- src/tabs/data_pre_processing.py | 118 ++++++++++++++++++++++++++++---- 1 file changed, 105 insertions(+), 13 deletions(-) diff --git a/src/tabs/data_pre_processing.py b/src/tabs/data_pre_processing.py index 2c5af29..feeea3e 100644 --- a/src/tabs/data_pre_processing.py +++ b/src/tabs/data_pre_processing.py @@ -16,17 +16,107 @@ def data_pre_processing_page(tab_name): ├── README.md ├── dataset_description.json ├── phenotype - │   ├── .json - │   └── .tsv + ├── confounders + │ ├── confounders.json + │ └── confounders.tsv + ├── demographics + │ ├── demographics.json + │ └── demographics.tsv + ├── diagnosis + │ ├── adhd_adult.json + │ ├── adhd_adult.tsv + │ ├── airway_stenosis.json + │ ├── airway_stenosis.tsv + │ ├── amyotrophic_lateral_sclerosis.json + │ ├── amyotrophic_lateral_sclerosis.tsv + │ ├── anxiety.json + │ ├── anxiety.tsv + │ ├── benign_lesions.json + │ ├── benign_lesions.tsv + │ ├── bipolar_disorder.json + │ ├── bipolar_disorder.tsv + │ ├── cognitive_impairment.json + │ ├── cognitive_impairment.tsv + │ ├── control.json + │ ├── control.tsv + │ ├── copd_and_asthma.json + │ ├── copd_and_asthma.tsv + │ ├── depression.json + │ ├── depression.tsv + │ ├── glottic_insufficiency.json + │ ├── glottic_insufficiency.tsv + │ ├── laryngeal_cancer.json + │ ├── laryngeal_cancer.tsv + │ ├── laryngeal_dystonia.json + │ ├── laryngeal_dystonia.tsv + │ ├── laryngitis.json + │ ├── laryngitis.tsv + │ ├── muscle_tension_dysphonia.json + │ ├── muscle_tension_dysphonia.tsv + │ ├── parkinsons_disease.json + │ ├── parkinsons_disease.tsv + │ ├── precancerous_lesions.json + │ ├── precancerous_lesions.tsv + │ ├── psychiatric_history.json + │ ├── psychiatric_history.tsv + │ ├── ptsd_adult.json + │ ├── ptsd_adult.tsv + │ ├── unexplained_chronic_cough.json + │ ├── unexplained_chronic_cough.tsv + │ ├── unilateral_vocal_fold_paralysis.json + │ └── unilateral_vocal_fold_paralysis.tsv + ├── enrollment + │ ├── eligibility.json + │ ├── eligibility.tsv + │ ├── enrollment_form.json + │ ├── enrollment_form.tsv + │ ├── participant.json + │ └── participant.tsv + ├── questionnaire + │ ├── custom_affect_scale.json + │ ├── custom_affect_scale.tsv + │ ├── dsm5_adult.json + │ ├── dsm5_adult.tsv + │ ├── dyspnea_index.json + │ ├── dyspnea_index.tsv + │ ├── gad7_anxiety.json + │ ├── gad7_anxiety.tsv + │ ├── leicester_cough_questionnaire.json + │ ├── leicester_cough_questionnaire.tsv + │ ├── panas.json + │ ├── panas.tsv + │ ├── phq9.json + │ ├── phq9.tsv + │ ├── productive_vocabulary.json + │ ├── productive_vocabulary.tsv + │ ├── vhi10.json + │ ├── vhi10.tsv + │ ├── voice_perception.json + │ └── voice_perception.tsv + └── task + ├── acoustic_task.json + ├── acoustic_task.tsv + ├── harvard_sentences.json + ├── harvard_sentences.tsv + ├── random_item_generation.json + ├── random_item_generation.tsv + ├── recording.json + ├── recording.tsv + ├── session.json + ├── session.tsv + ├── stroop.json + ├── stroop.tsv + ├── voice_perception.json + ├── voice_perception.tsv + ├── voice_problem_severity.json + ├── voice_problem_severity.tsv + ├── winograd.json + └── winograd.tsv └── sub- - ├── ses- - │   └── audio - │   ├── sub-_ses-_task-.wav - │   ├── sub-_ses-_task-.json └── ses- -    └── voice -    ├── sub-_ses-_task-.wav -    ├── sub-_ses-_task-.json +    └── audio +    ├── sub-_ses-_task-.wav +    └── sub-_ses-_task-.json ``` **Speech tasks included** @@ -65,22 +155,24 @@ def data_pre_processing_page(tab_name): - Parselmouth/Praat speech features for any speech tasks - Speech intelligibility metrics for speech tasks Time varying features - - Torchaudio-based pitch contour, spectrograms, mel spectrogram, and MFCCs + - Torchaudio-based pitch contour, spectrograms, mel spectrogram, and MFCCs + - Speech Articulatory Coding (sparc)-based features including electromagnetic articulography (EMA) estimates, plus loudness, periodicity, and pitch measures + - Phonetic posteriorgrams (PPGs) The waveform-derived features are stored using two formats: 1. A fixed feature format that includes static features extracted from the entire waveform 2. A temporal format that varies for each audio file depending on the length of recording. - The questionnaire features are combined into a single table (phenotype.tsv). This can be used for cohort selection. + The questionnaire features are collected and distributed in the phenotype folder format shown above. These can be used for cohort selection. - **Methods of De-identification for v2.0.0** + **Methods of De-identification for v3.0.0** All direct identifiers were removed, as these would reveal the identity of the research participant. These include name, civic address, and social security numbers. Indirect identifiers were removed where these created a significant risk of causing participant re-identification, for example through their combination with other public data available on social media, in government registries, or elsewhere. These include select geographic or demographic identifiers, as well as some information about household composition or cultural identity. Non-identifying elements of data that revealed highly sensitive information, such as information about household income, mental health status, traumatic life experiences, and the like were also removed. Raw audio transcripts were reviewed and any audio recordings which contained potentially identifying information and external voices were removed from the release. All sensitive fields are removed from the dataset at this stage. These correspond to data elements encoded as sensitive (Column name: "Identifier?") listed in the [RedCap data dictionary (CSV)](https://github.com/eipm/bridge2ai-redcap/blob/main/data/bridge2ai_voice_project_data_dictionary.csv). - In addition, all spectrograms, mfcc, and transcriptions from open responses are removed from the feature only dataset. + In addition, all spectrograms, mfcc, Mel spectrograms, transcriptions, EMAs, and PPGs from open responses are removed from the feature only dataset. **Audit protocol** - Generate missingness tables From 4ed3485ec6064143d62ab143c60aa554afe13847 Mon Sep 17 00:00:00 2001 From: Evan Ng Date: Mon, 15 Dec 2025 18:18:22 -0500 Subject: [PATCH 10/17] Updated peds #s --- src/tabs/dashboard_data/peds_dashboard_data.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tabs/dashboard_data/peds_dashboard_data.json b/src/tabs/dashboard_data/peds_dashboard_data.json index 13f8bae..dbc1fd7 100644 --- a/src/tabs/dashboard_data/peds_dashboard_data.json +++ b/src/tabs/dashboard_data/peds_dashboard_data.json @@ -1,6 +1,6 @@ { "number_of_participants": 300, - "number_of_recordings": 28896, + "number_of_recordings": 23533, "gender_identity": { "Female gender identity": 159, "Male gender identity": 140, @@ -29,7 +29,7 @@ "Mandarin": 5, "Other": 11 }, - "total_hours_of_recordings": 38.608125936003276, + "total_hours_of_recordings": 28.0972599306, "questionnaire_collected": { "Q - Pediatric - Generic - Demographics": 300, "Q - Pediatric - Generic - VHI-10": 300, From 6dd40941b3233fb9c424553e9e0279450c98c1e2 Mon Sep 17 00:00:00 2001 From: Evan Ng Date: Tue, 16 Dec 2025 13:26:00 -0500 Subject: [PATCH 11/17] updated adult dashboard --- src/tabs/dashboard_data/dashboard_data.json | 212 +++++++++----------- 1 file changed, 100 insertions(+), 112 deletions(-) diff --git a/src/tabs/dashboard_data/dashboard_data.json b/src/tabs/dashboard_data/dashboard_data.json index e8ffa5a..4479079 100644 --- a/src/tabs/dashboard_data/dashboard_data.json +++ b/src/tabs/dashboard_data/dashboard_data.json @@ -1,145 +1,133 @@ { - "number_of_participants": 442, - "number_of_recordings": 16738, + "number_of_participants": 833, + "number_of_recordings": 32249, "control": { - "Yes": 64, - "No": 378 + "Yes": 142, + "No": 691 }, "gender_identity": { - "Female gender identity": 260, - "Male gender identity": 177, - "Prefer not to answer": 3 + "Female gender identity": 510, + "Male gender identity": 316, + "Prefer not to answer": 7 }, "sexual_orientation": { - "Heterosexual": 377, - "Bisexual": 22, - "Prefer not to answer": 21, - "Homosexual": 15, - "Other": 5 + "Heterosexual": 715, + "Bisexual": 39, + "Prefer not to answer": 28, + "Homosexual": 43, + "Other": 8 }, "race": { - "American Indian or Alaska Native": 1, - "Asian": 28, - "Black or African American": 33, + "American Indian or Alaska Native": 3, + "Asian": 48, + "Black or African American": 71, "Native Hawaiian or other Pacific Islander": 2, - "White": 341, + "White": 634, "Canadian Indigenous or Aboriginal": 0, - "Other": 10, - "Prefer not to answer": 14, - "Multiple Races": 11 + "Other": 23, + "Prefer not to answer": 31, + "Multiple Races": 21 }, "ethnicity": { - "Not Hispanic or Latino": 391, - "Hispanic or Latino": 25, - "Prefer not to answer": 24 + "Not Hispanic or Latino": 724, + "Hispanic or Latino": 68, + "Prefer not to answer": 40 }, "age_groups": { "0-9": 0, - "10-19": 4, - "20-29": 39, - "30-39": 28, - "40-49": 42, - "50-59": 73, - "60-69": 101, - "70-79": 117, - "80-89": 36, - "90 and above": 2 + "10-19": 1, + "20-29": 88, + "30-39": 57, + "40-49": 78, + "50-59": 127, + "60-69": 191, + "70-79": 215, + "80-89": 72, + "90 and above": 4 }, "primary_language": { - "English": 436, - "Spanish": 1, - "Other": 5 + "English": 828, + "Spanish": 5, + "Other": 0 }, "disorder_types": { - "Voice Disorders": 96, - "Neurological and Neurodegenerative Disorders": 70, - "Mood and Psychiatric Disorders": 22, - "Respiratory Disorders": 77, - "Multiple Disorders": 177 + "Voice Disorders": 159, + "Neurological and Neurodegenerative Disorders": 128, + "Mood and Psychiatric Disorders": 41, + "Respiratory Disorders": 151, + "Multiple Disorders": 354 }, "voice_disorders_category": { - "Glottic Insufficiency / Presbyphonia": 2, - "Laryngeal Cancer": 8, - "Laryngitis": 11, - "Lesions of the vocal cord (nodule, polyp, cyst)": 37, - "Muscle Tension Dysphonia (MTD)": 36, - "Recurrent Laryngeal Papilloma (RRP)": 4, - "Spasmodic Dysphonia / Laryngeal Tremor": 49, - "Unilateral Vocal Fold Paralysis": 39 + "Glottic Insufficiency / Presbyphonia": 8, + "Laryngeal Cancer": 4, + "Laryngitis": 6, + "Benign Lesions": 58, + "Muscle Tension Dysphonia (MTD)": 58, + "Laryngeal Dystonia": 78, + "Unilateral Vocal Fold Paralysis": 69 }, "neurological_and_neurodegenerative_disorders_category": { - "Alzheimer's, Dementia, or Mild Cognitive Impairment": 42, - "Amyotrophic Lateral Sclerosis (ALS)": 0, - "Huntington's Disease": 0, - "Parkinson's disease": 61 + "Alzheimer's, Dementia, or Mild Cognitive Impairment": 74, + "Amyotrophic Lateral Sclerosis (ALS)": 3, + "Parkinson's disease": 106 }, "mood_and_psychiatric_disorders_category": { - "Alcohol or Substance Use Disorder": 4, - "Anxiety Disorder": 49, - "Attention-Deficit / Hyperactivity Disorder (ADHD)": 12, - "Autism Spectrum Disorder (ASD)": 0, - "Bipolar Disorder": 11, - "Borderline Personality Disorder (BPD)": 3, - "Depression or Major Depressive Disorder": 52, - "Eating Disorder (ED)": 1, - "Insomnia / Sleep Disorder": 12, - "Obsessive-Compulsive Disorder (OCD)": 3, - "Panic Disorder": 2, - "Post-Traumatic Stress Disorder (PTSD)": 10, - "Schizophrenia": 0, - "Social Anxiety Disorder": 6, - "Other Psychiatric Disorder": 6 + "Anxiety Disorder": 18, + "Attention-Deficit / Hyperactivity Disorder (ADHD)": 173, + "Bipolar Disorder": 12, + "Depression or Major Depressive Disorder": 4, + "Post-Traumatic Stress Disorder (PTSD)": 141 }, "respiratory_disorders_category": { - "Asthma": 44, - "Airway Stenosis (for example: bilateral vocal fold paralysis; laryngeal stenosis)": 69, - "Chronic Cough": 40, - "COPD": 16, - "Obstructive Sleep Apnea (OSA)": 44 + "COPD and Asthma": 9, + "Airway Stenosis (for example: bilateral vocal fold paralysis; laryngeal stenosis)": 138, + "Unexplained Chronic Cough": 41 }, - "total_hours_of_recordings": 86.13216666666669, + + "total_hours_of_recordings": 158.5231525174, "questionnaire_collected": { - "Q Mood Participant History": 35, - "Q Generic Voice Perception": 477, - "Q Generic Voice Handicap Index Vhi10": 474, - "Q Generic Patient Health Questionnaire9": 475, - "Q Generic Demographics": 485, - "Q Generic Confounders": 480, - "Q - Voice - Voice Problem Severity": 217, - "Q - Resp - Leicester Cough Questionnaire Lcq": 176, - "Q - Resp - Dyspnea Index Di": 177, - "Q - Neuro Winograd Schemas": 149, - "Q - Neuro - Wordcolor Stroop": 151, - "Q - Neuro - Random Item Generation": 150, - "Q - Neuro - Productive Vocabulary": 140, - "Q - Mood - Ptsd Adult": 93, - "Q - Mood - Panas": 95, - "Q - Mood - Dsm5 Adult": 84, - "Q - Mood - Custom Affect Scale": 95, - "Q - Mood - Adhd Adult": 93, - "Q - Generic - Gad7 Anxiety": 473 + "Q Mood Participant History": 75, + "Q Generic Voice Perception": 897, + "Q Generic Voice Handicap Index Vhi10": 893, + "Q Generic Patient Health Questionnaire9": 891, + "Q Generic Demographics": 1744, + "Q Generic Confounders": 903, + "Q - Voice - Voice Problem Severity": 338, + "Q - Resp - Leicester Cough Questionnaire Lcq": 312, + "Q - Resp - Dyspnea Index Di": 319, + "Q - Neuro Winograd Schemas": 177, + "Q - Neuro - Wordcolor Stroop": 261, + "Q - Neuro - Random Item Generation": 261, + "Q - Neuro - Productive Vocabulary": 251, + "Q - Mood - Ptsd Adult": 141, + "Q - Mood - Panas": 180, + "Q - Mood - Dsm5 Adult": 163, + "Q - Mood - Custom Affect Scale": 179, + "Q - Mood - Adhd Adult": 173, + "Q - Generic - Gad7 Anxiety": 889 }, "acoustic_task_collected": { - "Word-color Stroop - neurology": 151, - "Voluntary Cough - respiratory": 179, - "Story recall - generic": 478, - "Respiration and cough - generic": 478, - "Random Item Generation - neurology": 150, - "Rainbow Passage - generic": 481, - "Prolonged vowel - generic": 482, - "Productive Vocabulary - neurology": 151, - "Picture description - generic": 476, - "Open response questions - mood": 96, - "Maximum phonation time - generic": 481, - "Loudness - generic": 481, - "Glides - generic": 482, - "Free speech - generic": 480, - "Free Speech - voice": 217, - "Diadochokinesis - generic": 481, - "Cinderella Story - neurology": 151, - "Caterpillar Passage - voice": 219, - "Cape V sentences - voice": 219, - "Breath Sounds - respiratory": 179, - "Animal fluency - mood": 95 + "Word-color Stroop - neurology": 261, + "Voluntary Cough - respiratory": 309, + "Story recall - generic": 892, + "Respiration and cough - generic": 894, + "Random Item Generation - neurology": 251, + "Rainbow Passage - generic": 823, + "Prolonged vowel - generic": 899, + "Productive Vocabulary - neurology": 263, + "Picture description - generic": 892, + "Open response questions - mood": 164, + "Maximum phonation time - generic": 899, + "Loudness - generic": 897, + "Glides - generic": 897, + "Free speech - generic": 898, + "Free Speech - voice": 360, + "Diadochokinesis - generic": 895, + "Cinderella Story - neurology": 235, + "Caterpillar Passage - voice": 384, + "Cape V sentences - voice": 384, + "Breath Sounds - respiratory": 308, + "Animal fluency - mood": 163, + "Harvard Sentences - generic": 74 } } \ No newline at end of file From 63892ccad29e3c25d60930eb28f732913d57d03c Mon Sep 17 00:00:00 2001 From: Jordan Wilke Date: Tue, 16 Dec 2025 16:23:48 -0500 Subject: [PATCH 12/17] Added links and more protocol details --- src/tabs/collection_methods.py | 71 +++++++++++++++++++++++++++-- src/tabs/utils.py | 4 +- tables/Validated_Questionnaires.csv | 34 +++++++------- 3 files changed, 86 insertions(+), 23 deletions(-) diff --git a/src/tabs/collection_methods.py b/src/tabs/collection_methods.py index 72aafec..e70b18f 100644 --- a/src/tabs/collection_methods.py +++ b/src/tabs/collection_methods.py @@ -38,19 +38,82 @@ def collection_methods_page(tab_name): >Bensoussan, Y., Ghosh, S. S., Rameau, A., Boyer, M., Bahr, R., Watts, S., Rudzicz, F., Bolser, D., Lerner-Ellis, J., Awan, S., Powell, M. E., Belisle-Pipon, J.-C., Ravitsky, V., Johnson, A., Zisimopoulos, P., Tang, J., Sigaras, A., Elemento, O., Dorr, D., … Bridge2AI-Voice. (2024). eipm/bridge2ai-redcap. Zenodo. [https://zenodo.org/doi/10.5281/zenodo.12760724](https://zenodo.org/doi/10.5281/zenodo.12760724). - Protocols can be found in the Bridge2AI-Voice documentation for v2.0.0 of the dataset at [https://kind-lab.github.io/vbai-fhir/protocol.html](https://kind-lab.github.io/vbai-fhir/protocol.html). - + Protocols can be found in the Bridge2AI-Voice documentation for v3.0.0 of the dataset for each cohort in the following: + - [Voice Disorders]() + - [Respiratory]() + - [Mood/Psychiatric]() + - [Neurological]() + - [Controls]() + - [Pediatrics]() + - [Peds 10+]() + - [Peds 6-10]() + - [Peds 4-6]() + - [Peds 2-4]() """ ) csv_file_path = "tables/Disease_cohort_inclusion_exclusion_criteria.csv" caption = 'Table 1 - Disease cohort inclusion/exclusion criteria and validation methods' + + def map_tasks_to_videos(task): + tasks_to_link = { + "Respiration Part A": "https://www.youtube.com/watch?v=Yb4bMj18Iqg", + "Cough part A": "https://www.youtube.com/watch?v=Yb4bMj18Iqg", + "Breath Sounds": "https://www.youtube.com/watch?v=i7BhlwNMk28", + "Voluntary Cough": "https://www.youtube.com/watch?v=2rLMfMjS_R0", + "Prolonged Vowel": "https://www.youtube.com/watch?v=ZanjPvWkB3M", + "Maximum Phonation Time": "https://www.youtube.com/watch?v=1limRFPAtPE", + "Glides": "https://www.youtube.com/watch?v=xKBYdkwEOvU", + "Loudness": "https://www.youtube.com/watch?v=5ssCSqZPb7Y", + "Diadochokinesis": "https://www.youtube.com/watch?v=RlY5KMXtZ4o", + "Rainbow Passage": "https://www.youtube.com/watch?v=Syq_ryCNQKQ", + "Caterpillar Passage": "https://www.youtube.com/watch?v=jN7bGT-PFXY", + "Free Speech Part A": "https://www.youtube.com/watch?v=FqK0WeGCAzg", + "Picture Description": "https://www.youtube.com/watch?v=abjWJEN6jf8", + "Free Speech Voice": "https://www.youtube.com/watch?v=5QMBSHNLRVI", + "Story Recall": "https://www.youtube.com/watch?v=cfkU-N5tWe4", + "Animal Fluency": "https://www.youtube.com/watch?v=4lkEAxDiEE8", + "Open Response Questions": "https://www.youtube.com/watch?v=THfOnGCaALA", + "Word-Color Stroop": "https://www.youtube.com/watch?v=IzotHKbYh30", + "Productive Vocabulary": "https://www.youtube.com/watch?v=TEshcUAlfPA", + "Cape-V Sentences": "https://www.youtube.com/watch?v=1qbiCdWxuSY", + "Random Item generation": "https://www.youtube.com/watch?v=ry__w1Mm2aE", + "Cinderella Story": "https://www.youtube.com/watch?v=eHx-vetG8Fk", + "ABC’s": None, + "Ready For School": None, + "Favorite Show": None, + "Favorite Food": None, + "Outside of School": None, + "Months": None, + "Counting": None, + "Naming Animals": None, + "Naming Food": None, + "Identifying Pictures": None, + "Picture Description (Pediatrics)": None, + "Long Sounds": None, + "Noisy Sounds": None, + "Caterpillar Passage (Pediatrics)": None, + "Repeat Words": None, + "Role naming": None, + "Repeat Sentences": None, + "Silly Sounds": None, + } + + link = tasks_to_link[task] + if link: + return f"{task} [Example]" + else: + return f"{task}" + + def map_questionnaire_link(q_link): + return f"PDF" + create_html_table(csv_file_path, caption, [], 0) csv_file_path = "tables/Acoustic_Tasks_Protocol.csv" caption = 'Table 2 - Acoustic Tasks in Protocol' - create_html_table(csv_file_path, caption) + create_html_table(csv_file_path, caption,link_formattter={"Task": map_tasks_to_videos}) csv_file_path = "tables/Validated_Questionnaires.csv" caption = 'Table 3 - Validated Questionnaires integrated into App' - create_html_table(csv_file_path, caption, ['X']) + create_html_table(csv_file_path, caption, ['X'],link_formattter={'Example': map_questionnaire_link}) diff --git a/src/tabs/utils.py b/src/tabs/utils.py index e5ece8d..aa9d8e3 100644 --- a/src/tabs/utils.py +++ b/src/tabs/utils.py @@ -11,7 +11,7 @@ def coming_soon_message(tab_name): image_path = "images/Wave.png" st.image(image_path, caption='', use_container_width=True) -def create_html_table(csv_file_path, caption=None, cell_values=[], column_index=-1): +def create_html_table(csv_file_path, caption=None, cell_values=[], column_index=-1,link_formattter=None): # Read CSV file df = pd.read_csv(csv_file_path, dtype=str) # Ensure all columns are read as strings @@ -21,7 +21,7 @@ def create_html_table(csv_file_path, caption=None, cell_values=[], column_index= bold_cells = df.iloc[:, column_index].tolist() if column_index >= 0 else [] # Convert DataFrame to HTML table - html_table = df.to_html(index=False, classes='table table-striped') + html_table = df.to_html(index=False, classes='table table-striped',escape=False,formatters=link_formattter) if bold_cells and len(bold_cells) > 0: for bold_cell in bold_cells: diff --git a/tables/Validated_Questionnaires.csv b/tables/Validated_Questionnaires.csv index 7c311bf..9ac780b 100644 --- a/tables/Validated_Questionnaires.csv +++ b/tables/Validated_Questionnaires.csv @@ -1,17 +1,17 @@ -Validated Questionnaire,Voice Disorders,Respiratory,Mood/Psychiatric,Neurological,Controls,Pediatrics -Voice Handicap Index-10 (VHI-10),X,X,X,X,X, -Patient Health Questionnaire (PHQ-9),X,X,X,X,X, -General Anxiety Disorder (GAD-7),X,X,X,X,X, -Positive and Negative Affect Schedule (PANAS),,,X,,X, -Custom Affect scale,,,X,,X, -Post-Traumatic Stress Disorder Test (PTSD) Adult,,,X,,X, -Attention Deficit and Hyperactivity Disorder Questionnaire (ADHD-Adult),,,X,,X, -The Diagnostic and Statistical Manual of Mental Disorders (DSM-5 Adult),,,X,,X, -Dyspnea Index (DI),,X,,,X, -Leicester Cough Questionnaire (LCQ),,X,,,X, -Winograd Questionnaire,,,,X,X, -Montreal Cognitive Assessment (MOCA)*,,,,X,X, -Children's Voice Handicap Index-10 (C-VHI-10),,,,,,X -Pediatric Voice Outcomes Survey (PVOS),,,,,,X -Pediatric Voice-Related Quality-of-Life (PVRQOL),,,,,,X -Patient Health Questionnaire modified for Adolescents (PHQ-A),,,,,,X +Validated Questionnaire,Voice Disorders,Respiratory,Mood/Psychiatric,Neurological,Controls,Pediatrics,Example +Voice Handicap Index-10 (VHI-10),X,X,X,X,X,,https://github.com/eipm/bridge2ai-redcap/blob/main/data/en-us/Questionnaire%20-%20PDFs/Generic/Q%20-%20Generic%20-%20VHI-10.pdf +Patient Health Questionnaire (PHQ-9),X,X,X,X,X,,https://github.com/eipm/bridge2ai-redcap/blob/main/data/en-us/Questionnaire%20-%20PDFs/Generic/Q%20-%20Generic%20-%20PHQ-9.pdf +General Anxiety Disorder (GAD-7),X,X,X,X,X,,https://github.com/eipm/bridge2ai-redcap/blob/main/data/en-us/Questionnaire%20-%20PDFs/Generic/Q%20-%20Generic%20-%20GAD-7%20Anxiety.pdf +Positive and Negative Affect Schedule (PANAS),,,X,,X,,https://github.com/eipm/bridge2ai-redcap/blob/main/data/en-us/Questionnaire%20-%20PDFs/Mood/Q%20-%20Mood%20-%20PANAS.pdf +Custom Affect scale,,,X,,X,,https://github.com/eipm/bridge2ai-redcap/blob/main/data/en-us/Questionnaire%20-%20PDFs/Mood/Q%20-%20Mood%20-%20Custom%20Affect%20Scale.pdf +Post-Traumatic Stress Disorder Test (PTSD) Adult,,,X,,X,,https://github.com/eipm/bridge2ai-redcap/blob/main/data/en-us/Questionnaire%20-%20PDFs/Mood/Q%20-%20Mood%20-%20PTSD%20Adult.pdf +Attention Deficit and Hyperactivity Disorder Questionnaire (ADHD-Adult),,,X,,X,,https://github.com/eipm/bridge2ai-redcap/blob/main/data/en-us/Questionnaire%20-%20PDFs/Mood/Q%20-%20Mood%20-%20ADHD%20Adult.pdf +The Diagnostic and Statistical Manual of Mental Disorders (DSM-5 Adult),,,X,,X,,https://github.com/eipm/bridge2ai-redcap/blob/main/data/en-us/Questionnaire%20-%20PDFs/Mood/Q%20-%20Mood%20-%20DSM-5%20Adult.pdf +Dyspnea Index (DI),,X,,,X,,https://github.com/eipm/bridge2ai-redcap/blob/main/data/en-us/Questionnaire%20-%20PDFs/Resp/Q%20-%20Resp%20-%20Dyspnea%20Index%20(DI).pdf +Leicester Cough Questionnaire (LCQ),,X,,,X,,https://github.com/eipm/bridge2ai-redcap/blob/main/data/en-us/Questionnaire%20-%20PDFs/Resp/Q%20-%20Resp%20-%20Leicester%20Cough%20Questionnaire%20(LCQ).pdf +Winograd Questionnaire,,,,X,X,,https://github.com/eipm/bridge2ai-redcap/blob/main/data/en-us/Questionnaire%20-%20PDFs/Neuro/Q%20-%20Neuro%20-%20Winograd%20Schemas.pdf +Montreal Cognitive Assessment (MOCA)*,,,,X,X,,https://github.com/eipm/bridge2ai-redcap/blob/main/data/en-us/Questionnaire%20-%20PDFs/Neuro/Q%20-%20Neuro%20-%20MoCA.pdf +Children's Voice Handicap Index-10 (C-VHI-10),,,,,,X,https://github.com/eipm/bridge2ai-redcap/blob/main/data/en-us/Questionnaire%20-%20PDFs/Pediatric%20-%20Generic/Q%20-%20Pediatric%20-%20Generic%20-%20VHI-10.pdf +Pediatric Voice Outcomes Survey (PVOS),,,,,,X,https://github.com/eipm/bridge2ai-redcap/blob/main/data/en-us/Questionnaire%20-%20PDFs/Pediatric%20-%20Generic/Q%20-%20Pediatric%20-%20Generic%20-%20Voice%20Outcome%20Survey.pdf +Pediatric Voice-Related Quality-of-Life (PVRQOL),,,,,,X,https://github.com/eipm/bridge2ai-redcap/blob/main/data/en-us/Questionnaire%20-%20PDFs/Pediatric%20-%20Generic/Q%20-%20Pediatric%20-%20Generic%20-%20Voice%20Related%20QoL%20Survey.pdf +Patient Health Questionnaire modified for Adolescents (PHQ-A),,,,,,X,https://github.com/eipm/bridge2ai-redcap/blob/main/data/en-us/Questionnaire%20-%20PDFs/Pediatric%20-%20Generic/Q%20-%20Pediatric%20-%20Generic%20PHQ-A.pdf From 25f875e1c381eb84efcf44ea17435c84627f7205 Mon Sep 17 00:00:00 2001 From: Alexandros Sigaras Date: Thu, 18 Dec 2025 16:33:49 -0500 Subject: [PATCH 13/17] Update src/tabs/dashboard_data/peds_dashboard_data.json Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/tabs/dashboard_data/peds_dashboard_data.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tabs/dashboard_data/peds_dashboard_data.json b/src/tabs/dashboard_data/peds_dashboard_data.json index dbc1fd7..3adeaf2 100644 --- a/src/tabs/dashboard_data/peds_dashboard_data.json +++ b/src/tabs/dashboard_data/peds_dashboard_data.json @@ -55,7 +55,7 @@ "Caterpillar Passage - peds": 298, "Repeat Words - peds": 300, "Role Naming - peds": 90, - "Senteces - peds": 299, + "Sentences - peds": 299, "Silly Sounds - peds": 296 } } \ No newline at end of file From c672549e507ed4ceb4d827f28aa350f188e15f21 Mon Sep 17 00:00:00 2001 From: Alexandros Sigaras Date: Thu, 18 Dec 2025 16:37:06 -0500 Subject: [PATCH 14/17] Update src/tabs/collection_methods.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/tabs/collection_methods.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tabs/collection_methods.py b/src/tabs/collection_methods.py index e70b18f..4e686f6 100644 --- a/src/tabs/collection_methods.py +++ b/src/tabs/collection_methods.py @@ -16,7 +16,7 @@ def collection_methods_page(tab_name): Data is collected in the clinic with the assistance of a trained researched assistant. Future data collection will also occur remotely, however remote data collection did not occur with initial dataset being released. Voice samples are collected prospectively using a custom software application (Bridge2AI-Voice app) with the Bridge2AI-Voice protocols. - For Pediatrics, all data is collected using [reproschema-ui](https://repronim.org/reproschema-ui/) with the Bridge2AI-Voice pediatic protocol. + For Pediatrics, all data is collected using [reproschema-ui](https://repronim.org/reproschema-ui/) with the Bridge2AI-Voice pediatric protocol. **Clinical validation:** Clinical validation is performed by qualified physician or practitioner based on established gold standards for diagnosis **(Table 1)**. From 40d7036bbc9ef70b79bca60819a99366fd2678a0 Mon Sep 17 00:00:00 2001 From: Alexandros Sigaras Date: Thu, 18 Dec 2025 16:38:46 -0500 Subject: [PATCH 15/17] Update src/tabs/collection_methods.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/tabs/collection_methods.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tabs/collection_methods.py b/src/tabs/collection_methods.py index 4e686f6..c4f6e6f 100644 --- a/src/tabs/collection_methods.py +++ b/src/tabs/collection_methods.py @@ -14,7 +14,7 @@ def collection_methods_page(tab_name): **High Volume Expert Clinics:** Outpatient clinics within hospital systems or academic institutions that have developed an expertise in a specific disease area and see more than 50 patients per month from the same disease category. Ex: Asthma/COPD pulmonary specialty clinic. - Data is collected in the clinic with the assistance of a trained researched assistant. Future data collection will also occur remotely, however remote data collection did not occur with initial dataset being released. Voice samples are collected prospectively using a custom software application (Bridge2AI-Voice app) with the Bridge2AI-Voice protocols. + Data is collected in the clinic with the assistance of a trained research assistant. Future data collection will also occur remotely, however remote data collection did not occur with initial dataset being released. Voice samples are collected prospectively using a custom software application (Bridge2AI-Voice app) with the Bridge2AI-Voice protocols. For Pediatrics, all data is collected using [reproschema-ui](https://repronim.org/reproschema-ui/) with the Bridge2AI-Voice pediatric protocol. From f7b8499e0671c68e7a248c4539f1764f9045d61e Mon Sep 17 00:00:00 2001 From: Alexandros Sigaras Date: Thu, 18 Dec 2025 16:39:42 -0500 Subject: [PATCH 16/17] Update src/tabs/collection_methods.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/tabs/collection_methods.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tabs/collection_methods.py b/src/tabs/collection_methods.py index c4f6e6f..d934c57 100644 --- a/src/tabs/collection_methods.py +++ b/src/tabs/collection_methods.py @@ -20,7 +20,7 @@ def collection_methods_page(tab_name): **Clinical validation:** Clinical validation is performed by qualified physician or practitioner based on established gold standards for diagnosis **(Table 1)**. - **Acoustic Tasks:** Voice, breathing, cough, and speech data are recorded with the app for adults, and with reproschema-ui or for pediatrics. A total of 22 acoustic Tasks are recorded through the app **(Table 2)**. + **Acoustic Tasks:** Voice, breathing, cough, and speech data are recorded with the app for adults, and with reproschema-ui for pediatrics. A total of 22 acoustic Tasks are recorded through the app **(Table 2)**. **Demographic surveys and confounders:** Detailed demographic data and surveys about confounding factors such as smoking and drinking history is collected through the smartphone application. From f78f044f4b27182e59f643d9907c24c3161a59a4 Mon Sep 17 00:00:00 2001 From: Alexandros Sigaras Date: Thu, 18 Dec 2025 17:11:21 -0500 Subject: [PATCH 17/17] updated version to v3.0.0 and added pediatric dataset button --- README.md | 2 +- SECURITY.md | 4 ++-- src/dashboard.py | 2 +- src/tabs/overview.py | 14 ++++++++++++-- 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index ab64766..2910012 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ Docs for the Bridge2AI Voice Project. -[![GitHub](https://img.shields.io/badge/github-2.0.5-green?style=flat&logo=github)](https://github.com/eipm/bridge2ai-docs) [![Python 3.12.0](https://img.shields.io/badge/python-3.12.0-blue.svg)](https://www.python.org/downloads/release/python-3120/) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) [![DOI](https://zenodo.org/badge/860006845.svg)](https://zenodo.org/doi/10.5281/zenodo.13834653) +[![GitHub](https://img.shields.io/badge/github-3.0.0-green?style=flat&logo=github)](https://github.com/eipm/bridge2ai-docs) [![Python 3.12.0](https://img.shields.io/badge/python-3.12.0-blue.svg)](https://www.python.org/downloads/release/python-3120/) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) [![DOI](https://zenodo.org/badge/860006845.svg)](https://zenodo.org/doi/10.5281/zenodo.13834653) ## 🤝 License diff --git a/SECURITY.md b/SECURITY.md index 4e3996e..1521cdb 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -7,8 +7,8 @@ currently being supported with security updates. | Version | Supported | | ------- | ------------------ | -| v2.0.5 | :white_check_mark: | -| < v2.0.5 | :x: | +| v3.0.0 | :white_check_mark: | +| < v3.0.0 | :x: | ## Reporting a Vulnerability diff --git a/src/dashboard.py b/src/dashboard.py index 2c3a026..80c7bd6 100644 --- a/src/dashboard.py +++ b/src/dashboard.py @@ -48,7 +48,7 @@ def create_tabs(tabs_func): def main(): # Current version of the app - version = "2.0.5" + version = "3.0.0" # Map tab names to functions # In this dictionary, the key is the tab name and the value is the function that will be called when the tab is selected # The function is defined in the respective file diff --git a/src/tabs/overview.py b/src/tabs/overview.py index 27108d8..318abed 100644 --- a/src/tabs/overview.py +++ b/src/tabs/overview.py @@ -56,7 +56,7 @@ def overview_page(tab_name): Registration on PhysioNet and signing of a data use agreement will enable access. The latest version of the dataset is available at the following URL: """ ) - st.link_button("Bridge2AI-Voice: An ethically-sourced, diverse voice dataset linked to health information", type="primary", url="https://doi.org/10.13026/37yb-1t42", help="Bridge2AI-Voice: An ethically-sourced, diverse voice dataset linked to health information", icon=":material/login:") + st.link_button("Bridge2AI-Voice: An ethically-sourced, diverse voice dataset linked to health information", type="primary", url="https://physionet.org/content/b2ai-voice/3.0.0/", help="Bridge2AI-Voice: An ethically-sourced, diverse voice dataset linked to health information", icon=":material/login:") st.markdown( """ @@ -66,7 +66,17 @@ def overview_page(tab_name): """ ) - st.link_button("Bridge2AI-Voice: An ethically-sourced, diverse voice dataset linked to health information (Audio included)", type="primary", url="https://physionet.org/content/b2ai-voice-audio/", help="Bridge2AI-Voice: An ethically-sourced, diverse voice dataset linked to health information (Audio included)", icon=":material/login:") + st.link_button("Bridge2AI-Voice: An ethically-sourced, diverse voice dataset linked to health information (Audio included)", type="primary", url="https://physionet.org/content/b2ai-voice/3.0.0/", help="Bridge2AI-Voice: An ethically-sourced, diverse voice dataset linked to health information (Audio included)", icon=":material/login:") + + st.markdown( + """ + #### Pediatric Dataset + + The Bridge2AI Voice consortium has also prepared a pediatric dataset. To access the Bridge2AI Voice pediatric dataset please click here + + """ + ) + st.link_button("Bridge2AI-Voice Pediatric Dataset", type="primary", url="https://physionet.org/content/b2ai-voice-pediatric/1.0.0/", help="Bridge2AI-Voice Pediatric Dataset", icon=":material/login:") st.markdown( """