From bd4eb072304605ebfa40b40413125a8982322c62 Mon Sep 17 00:00:00 2001 From: guirque Date: Fri, 12 Dec 2025 16:45:25 -0300 Subject: [PATCH 01/11] feat: adding basic app layout --- app/main.py | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 app/main.py diff --git a/app/main.py b/app/main.py new file mode 100644 index 000000000..077d4a565 --- /dev/null +++ b/app/main.py @@ -0,0 +1,51 @@ +import streamlit as st + +# HEAD ------------------------------------------------------------------------------------------------------ + +data_context = None +available_labels = { + 'label 1': 'red', + 'label 2': 'green', + 'label 3': 'yellow' +} + +imgs = [] + +st.set_page_config(layout='wide') + +# HEADER -------------------------------------------------------------------------------------------------- + +st.header('Googol') + +'---' +# FILE UPLOAD AREA ---------------------------------------------------------------------------------------- + + +# MAIN AREA (Where Images are Displayed) ------------------------------------------------------------- + +columns = st.columns(3) + + +# SIDEBAR (Chatbot Zone) ----------------------------------------------------------------------------- +with st.sidebar: + + with st.container(horizontal=True): + for label in available_labels.keys(): + st.badge(label, color=available_labels[label]) + + with st.form('context'): + st.write('Write the medical context for your dataset:') + data_context = st.text_input('context') + submit_button = st.form_submit_button('Submit') + + if submit_button: + print(data_context) + + with st.container(key='chat_area'): + '---' + '# AI Assistance' + with st.chat_message('ai'): + 'Hello! How can I help you with labeling this dataset?' + + with st.chat_message('user'): + user_input = st.text_input('You:', placeholder='Can you label these images according to anomalies found?') \ No newline at end of file From 5c7f0c9f6a990701ffa8e101b3c985e39920e7df Mon Sep 17 00:00:00 2001 From: guirque Date: Fri, 12 Dec 2025 16:46:06 -0300 Subject: [PATCH 02/11] feat: adding file upload feature --- app/main.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/app/main.py b/app/main.py index 077d4a565..65119d52f 100644 --- a/app/main.py +++ b/app/main.py @@ -1,4 +1,6 @@ import streamlit as st +import os + # HEAD ------------------------------------------------------------------------------------------------------ @@ -20,6 +22,25 @@ '---' # FILE UPLOAD AREA ---------------------------------------------------------------------------------------- +# imgs = st.file_uploader('Upload Dataset Folder / Images', type=['jpg', 'jpeg', 'png', 'svg'], accept_multiple_files='directory') + +with st.expander('# 📁 Add Files'): + folder_path = st.text_input('Please choose a folder path:') + confirmed = st.button('Confirm') + ALLOWED_EXTENSIONS = ('jpg', 'jpeg', 'png', 'svg') + if folder_path and confirmed: + iterator = os.walk(folder_path) + data = next(iterator, None) + while data is not None: + dirpath, dirnames, filenames = data + + for filename in filenames: + # Check if file format is appropriate + if filename.endswith(ALLOWED_EXTENSIONS): + imgs.append(os.path.join(dirpath, filename)) + + data = next(iterator, None) + print('Data collected: ', imgs) # MAIN AREA (Where Images are Displayed) ------------------------------------------------------------- From cc14f022d3cb3f1265c3c794699c3c5c76cccea7 Mon Sep 17 00:00:00 2001 From: guirque Date: Fri, 12 Dec 2025 16:46:43 -0300 Subject: [PATCH 03/11] feat: adding image display --- app/components/image.py | 11 +++++++++++ app/main.py | 3 +++ 2 files changed, 14 insertions(+) create mode 100644 app/components/image.py diff --git a/app/components/image.py b/app/components/image.py new file mode 100644 index 000000000..90e084b06 --- /dev/null +++ b/app/components/image.py @@ -0,0 +1,11 @@ +import streamlit as st +import uuid + +def display_img(column, path, name): + + with column: + with st.container(): + column.write(f'``{path}``') + column.badge('label 1', color='red') + column.image(image=path, caption='## A medical image\n This photo reveals to us that...') + column.pills('', ['Flag', 'Relabel', 'Remove'], key=path + name, selection_mode='single') diff --git a/app/main.py b/app/main.py index 65119d52f..dbc361e7d 100644 --- a/app/main.py +++ b/app/main.py @@ -1,4 +1,5 @@ import streamlit as st +from components.image import display_img import os @@ -46,6 +47,8 @@ columns = st.columns(3) +for i, img in enumerate(imgs): + display_img(columns[i % 3], img, str(i)) # SIDEBAR (Chatbot Zone) ----------------------------------------------------------------------------- with st.sidebar: From 8896bf1c0cdd8b8cd1d1dc152c3a4479f20da209 Mon Sep 17 00:00:00 2001 From: guirque Date: Fri, 12 Dec 2025 16:47:08 -0300 Subject: [PATCH 04/11] chore: updating .gitignore --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..0ce591b69 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +venv/ +__pycache__ +data/* +!data/.gitkeep \ No newline at end of file From 2dc2c826b60c80e62e06e279745be2e5b2befb18 Mon Sep 17 00:00:00 2001 From: guirque Date: Fri, 12 Dec 2025 17:48:45 -0300 Subject: [PATCH 05/11] feat: adding file loading in different pages --- app/main.py | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/app/main.py b/app/main.py index dbc361e7d..d9bb3cfff 100644 --- a/app/main.py +++ b/app/main.py @@ -12,7 +12,9 @@ 'label 3': 'yellow' } -imgs = [] +MAX_IMG_PER_PAGE=12 +if 'imgs' not in st.session_state.keys(): + st.session_state['imgs'] = [[]] st.set_page_config(layout='wide') @@ -30,25 +32,43 @@ confirmed = st.button('Confirm') ALLOWED_EXTENSIONS = ('jpg', 'jpeg', 'png', 'svg') if folder_path and confirmed: + current_page = 0 + file_num = 0 iterator = os.walk(folder_path) data = next(iterator, None) + st.session_state['imgs'] = [[]] while data is not None: dirpath, dirnames, filenames = data for filename in filenames: # Check if file format is appropriate if filename.endswith(ALLOWED_EXTENSIONS): - imgs.append(os.path.join(dirpath, filename)) + + # If there's no space in the current page, add a new page + if file_num == MAX_IMG_PER_PAGE: + current_page += 1 + st.session_state['imgs'].append([]) + file_num = 0 + + # Add to current page + st.session_state['imgs'][current_page].append(os.path.join(dirpath, filename)) + file_num += 1 data = next(iterator, None) - print('Data collected: ', imgs) + print('Data collected: ', st.session_state['imgs']) # MAIN AREA (Where Images are Displayed) ------------------------------------------------------------- columns = st.columns(3) - -for i, img in enumerate(imgs): - display_img(columns[i % 3], img, str(i)) +if 'page_num' not in st.session_state: + st.session_state['page_num'] = 0 +with st.container(key='imgs_page'): + + for i, img in enumerate(st.session_state['imgs'][st.session_state['page_num']]): + display_img(columns[i % 3], img, str(i)) + if len(st.session_state['imgs']) > 1: + last_page = len(st.session_state['imgs']) + st.session_state['page_num'] = st.select_slider('Page', options=range(last_page)) # SIDEBAR (Chatbot Zone) ----------------------------------------------------------------------------- with st.sidebar: From acbafa5c44b113f3f046f8a9b356a20f0f4a661d Mon Sep 17 00:00:00 2001 From: guirque Date: Fri, 12 Dec 2025 18:11:31 -0300 Subject: [PATCH 06/11] feat: changing img display layout --- app/components/image.py | 13 ++++++++----- app/main.py | 17 +++++++++++++---- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/app/components/image.py b/app/components/image.py index 90e084b06..b68024b29 100644 --- a/app/components/image.py +++ b/app/components/image.py @@ -1,11 +1,14 @@ import streamlit as st import uuid -def display_img(column, path, name): +def display_img(column, path, final_data, name): with column: with st.container(): - column.write(f'``{path}``') - column.badge('label 1', color='red') - column.image(image=path, caption='## A medical image\n This photo reveals to us that...') - column.pills('', ['Flag', 'Relabel', 'Remove'], key=path + name, selection_mode='single') + + st.pills('', ['Flag', 'Relabel', 'Remove'], key=path + name, selection_mode='single') + with st.container(horizontal=True): + with st.popover('Image Path'): + st.write(f'``{path}``') + st.badge(final_data['label'], color='red') + st.image(image=path, caption=final_data['description']) diff --git a/app/main.py b/app/main.py index d9bb3cfff..54c1e31c5 100644 --- a/app/main.py +++ b/app/main.py @@ -7,7 +7,7 @@ data_context = None available_labels = { - 'label 1': 'red', + 'default': 'red', 'label 2': 'green', 'label 3': 'yellow' } @@ -16,6 +16,10 @@ if 'imgs' not in st.session_state.keys(): st.session_state['imgs'] = [[]] +if 'final_data' not in st.session_state.keys(): + st.session_state['final_data'] = {} # this object will store all data, such as imgs (keys) and their labels + # each img in the dictionary consists in another dictionary with the following keys: 'label', 'description'. + st.set_page_config(layout='wide') # HEADER -------------------------------------------------------------------------------------------------- @@ -51,7 +55,12 @@ file_num = 0 # Add to current page - st.session_state['imgs'][current_page].append(os.path.join(dirpath, filename)) + file_path = os.path.join(dirpath, filename) + st.session_state['imgs'][current_page].append(file_path) + st.session_state['final_data'][file_path] = { + 'label': 'default', + 'description': 'No description provided.' + } file_num += 1 data = next(iterator, None) @@ -59,13 +68,13 @@ # MAIN AREA (Where Images are Displayed) ------------------------------------------------------------- -columns = st.columns(3) +columns = st.columns(3, gap='medium') if 'page_num' not in st.session_state: st.session_state['page_num'] = 0 with st.container(key='imgs_page'): for i, img in enumerate(st.session_state['imgs'][st.session_state['page_num']]): - display_img(columns[i % 3], img, str(i)) + display_img(columns[i % 3], img, st.session_state['final_data'][img], str(i)) if len(st.session_state['imgs']) > 1: last_page = len(st.session_state['imgs']) st.session_state['page_num'] = st.select_slider('Page', options=range(last_page)) From 90efee4bab317da1a444dc3e4b233e60bffd894d Mon Sep 17 00:00:00 2001 From: guirque Date: Fri, 12 Dec 2025 19:26:23 -0300 Subject: [PATCH 07/11] feat: adding chat history and chat feature --- app/main.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/app/main.py b/app/main.py index 54c1e31c5..f547fce8d 100644 --- a/app/main.py +++ b/app/main.py @@ -20,6 +20,9 @@ st.session_state['final_data'] = {} # this object will store all data, such as imgs (keys) and their labels # each img in the dictionary consists in another dictionary with the following keys: 'label', 'description'. +if 'chat_history' not in st.session_state.keys(): + st.session_state['chat_history'] = [{'name': 'ai', 'content': 'Hello! How can I help you with labeling this dataset?'}] + st.set_page_config(layout='wide') # HEADER -------------------------------------------------------------------------------------------------- @@ -96,9 +99,20 @@ with st.container(key='chat_area'): '---' - '# AI Assistance' - with st.chat_message('ai'): - 'Hello! How can I help you with labeling this dataset?' + '# AI Chat' + for msg in st.session_state['chat_history']: + with st.chat_message(msg['name']): + st.write(msg['content']) + with st.chat_message('user'): - user_input = st.text_input('You:', placeholder='Can you label these images according to anomalies found?') \ No newline at end of file + with st.form('user_msg_form', clear_on_submit=True): + user_input = st.text_input('You:', placeholder='Can you label these images according to anomalies found?', value='') + submit = st.form_submit_button('Send', icon='➡️') + + if submit: + st.session_state['chat_history'].append({'name': 'user', 'content': user_input}) + + # This is where the API call will take place + st.session_state['chat_history'].append({'name': 'ai', 'content': 'I\'m an AI reply'}) + st.rerun() \ No newline at end of file From fe494c227bf96f1bee80075fb07b214f4ab82d16 Mon Sep 17 00:00:00 2001 From: guirque Date: Fri, 12 Dec 2025 20:52:45 -0300 Subject: [PATCH 08/11] feat: adding statistics option Also added export button and changed data so it is stored as a pandas DataFrame --- app/components/image.py | 4 ++-- app/main.py | 49 ++++++++++++++++++++++++++++++++--------- 2 files changed, 41 insertions(+), 12 deletions(-) diff --git a/app/components/image.py b/app/components/image.py index b68024b29..b66ad52f5 100644 --- a/app/components/image.py +++ b/app/components/image.py @@ -10,5 +10,5 @@ def display_img(column, path, final_data, name): with st.container(horizontal=True): with st.popover('Image Path'): st.write(f'``{path}``') - st.badge(final_data['label'], color='red') - st.image(image=path, caption=final_data['description']) + st.badge(final_data['label'].values[0], color='red') + st.image(image=path, caption=final_data['description'].values[0]) diff --git a/app/main.py b/app/main.py index f547fce8d..37b55708f 100644 --- a/app/main.py +++ b/app/main.py @@ -1,6 +1,7 @@ import streamlit as st from components.image import display_img import os +import pandas as pd # HEAD ------------------------------------------------------------------------------------------------------ @@ -16,10 +17,6 @@ if 'imgs' not in st.session_state.keys(): st.session_state['imgs'] = [[]] -if 'final_data' not in st.session_state.keys(): - st.session_state['final_data'] = {} # this object will store all data, such as imgs (keys) and their labels - # each img in the dictionary consists in another dictionary with the following keys: 'label', 'description'. - if 'chat_history' not in st.session_state.keys(): st.session_state['chat_history'] = [{'name': 'ai', 'content': 'Hello! How can I help you with labeling this dataset?'}] @@ -34,10 +31,11 @@ # imgs = st.file_uploader('Upload Dataset Folder / Images', type=['jpg', 'jpeg', 'png', 'svg'], accept_multiple_files='directory') -with st.expander('# 📁 Add Files'): +with st.expander('# 📁 Add Files', width='stretch'): folder_path = st.text_input('Please choose a folder path:') confirmed = st.button('Confirm') ALLOWED_EXTENSIONS = ('jpg', 'jpeg', 'png', 'svg') + df_data = {'label': [], 'description': [], 'path': []} if folder_path and confirmed: current_page = 0 file_num = 0 @@ -60,15 +58,46 @@ # Add to current page file_path = os.path.join(dirpath, filename) st.session_state['imgs'][current_page].append(file_path) - st.session_state['final_data'][file_path] = { - 'label': 'default', - 'description': 'No description provided.' - } + + df_data['label'].append('default') + df_data['description'].append('No description provided.') + df_data['path'].append(file_path) file_num += 1 data = next(iterator, None) + if 'final_data_df' not in st.session_state.keys(): + st.session_state['final_data_df'] = pd.DataFrame(df_data) + print('Data collected: ', st.session_state['imgs']) +# EXPORT & Statistics -------------------------------------------------------------------------------- + +@st.dialog('Statistics', width='large') +def show_statistics(): + #st.bar_chart() + st.write('Data') + + if 'final_data_df' in st.session_state is not None: + + st.dataframe(st.session_state['final_data_df']) + + st.write('Label Frequencies') + frequencies_df = st.session_state['final_data_df']['label'].value_counts() + + st.bar_chart(frequencies_df, horizontal=True) + else: + st.error('Please choose a folder before viewing statistics.') + + +export_and_st = st.columns(2) + +with export_and_st[0]: + if st.button('# 📦 Export Results', width='stretch'): + print('Exported') +with export_and_st[1]: + if st.button('📊 View Statistics', width='stretch'): + show_statistics() + # MAIN AREA (Where Images are Displayed) ------------------------------------------------------------- columns = st.columns(3, gap='medium') @@ -77,7 +106,7 @@ with st.container(key='imgs_page'): for i, img in enumerate(st.session_state['imgs'][st.session_state['page_num']]): - display_img(columns[i % 3], img, st.session_state['final_data'][img], str(i)) + display_img(columns[i % 3], img, st.session_state['final_data_df'][st.session_state['final_data_df']['path'] == img], str(i)) if len(st.session_state['imgs']) > 1: last_page = len(st.session_state['imgs']) st.session_state['page_num'] = st.select_slider('Page', options=range(last_page)) From 8ca3d3e9f88f0553f7a230762a716ae0fed215aa Mon Sep 17 00:00:00 2001 From: guirque Date: Fri, 12 Dec 2025 20:53:40 -0300 Subject: [PATCH 09/11] fix: fixing page loading Pages were only loading after their number was changed --- app/main.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/app/main.py b/app/main.py index 37b55708f..d5e439161 100644 --- a/app/main.py +++ b/app/main.py @@ -101,16 +101,19 @@ def show_statistics(): # MAIN AREA (Where Images are Displayed) ------------------------------------------------------------- columns = st.columns(3, gap='medium') + if 'page_num' not in st.session_state: st.session_state['page_num'] = 0 with st.container(key='imgs_page'): - - for i, img in enumerate(st.session_state['imgs'][st.session_state['page_num']]): - display_img(columns[i % 3], img, st.session_state['final_data_df'][st.session_state['final_data_df']['path'] == img], str(i)) + if len(st.session_state['imgs']) > 1: last_page = len(st.session_state['imgs']) st.session_state['page_num'] = st.select_slider('Page', options=range(last_page)) + for i, img in enumerate(st.session_state['imgs'][st.session_state['page_num']]): + display_img(columns[i % 3], img, st.session_state['final_data_df'][st.session_state['final_data_df']['path'] == img], str(i)) + + # SIDEBAR (Chatbot Zone) ----------------------------------------------------------------------------- with st.sidebar: From 03cec40df5f1cf3bcab413679063cc37e43e6a20 Mon Sep 17 00:00:00 2001 From: guirque Date: Sat, 13 Dec 2025 13:05:02 -0300 Subject: [PATCH 10/11] feat: adding patient id and subfolder classes --- app/components/image.py | 10 ++++++---- app/main.py | 35 +++++++++++++++++++++++------------ 2 files changed, 29 insertions(+), 16 deletions(-) diff --git a/app/components/image.py b/app/components/image.py index b66ad52f5..3ceaffa8f 100644 --- a/app/components/image.py +++ b/app/components/image.py @@ -1,14 +1,16 @@ import streamlit as st import uuid -def display_img(column, path, final_data, name): +def display_img(column, path, final_data, name, available_colors): with column: with st.container(): st.pills('', ['Flag', 'Relabel', 'Remove'], key=path + name, selection_mode='single') + with st.popover('Image Path'): + st.write(f'``{path}``') with st.container(horizontal=True): - with st.popover('Image Path'): - st.write(f'``{path}``') - st.badge(final_data['label'].values[0], color='red') + st.badge(final_data['label'].values[0], color=available_colors[final_data['label'].values[0]]) + #st.space() + st.write(f'Patient ID: ``{final_data['patient'].values[0]}``') st.image(image=path, caption=final_data['description'].values[0]) diff --git a/app/main.py b/app/main.py index d5e439161..31f55cee4 100644 --- a/app/main.py +++ b/app/main.py @@ -7,11 +7,12 @@ # HEAD ------------------------------------------------------------------------------------------------------ data_context = None -available_labels = { - 'default': 'red', - 'label 2': 'green', - 'label 3': 'yellow' -} +colors = ['red', 'green', 'yellow', 'violet', 'orange', 'blue', 'gray'] +colors_i = 1 +if 'available_labels' not in st.session_state.keys(): + st.session_state['available_labels'] = { + 'default': 'red' + } MAX_IMG_PER_PAGE=12 if 'imgs' not in st.session_state.keys(): @@ -33,9 +34,11 @@ with st.expander('# 📁 Add Files', width='stretch'): folder_path = st.text_input('Please choose a folder path:') + consider_folder_as_patient = st.checkbox('Consider Subfolder As Patient ID') + consider_folder_as_label = st.checkbox('Consider Subfolder As Label') confirmed = st.button('Confirm') ALLOWED_EXTENSIONS = ('jpg', 'jpeg', 'png', 'svg') - df_data = {'label': [], 'description': [], 'path': []} + df_data = {'label': [], 'description': [], 'path': [], 'patient': []} if folder_path and confirmed: current_page = 0 file_num = 0 @@ -45,6 +48,8 @@ while data is not None: dirpath, dirnames, filenames = data + folder_name = dirpath.split('/')[-1] + for filename in filenames: # Check if file format is appropriate if filename.endswith(ALLOWED_EXTENSIONS): @@ -59,14 +64,20 @@ file_path = os.path.join(dirpath, filename) st.session_state['imgs'][current_page].append(file_path) - df_data['label'].append('default') + df_data['label'].append(folder_name if consider_folder_as_label else 'default') df_data['description'].append('No description provided.') df_data['path'].append(file_path) + df_data['patient'].append(folder_name if consider_folder_as_patient else 'anonymous') file_num += 1 data = next(iterator, None) - if 'final_data_df' not in st.session_state.keys(): - st.session_state['final_data_df'] = pd.DataFrame(df_data) + + st.session_state['final_data_df'] = pd.DataFrame(df_data) + + # Setting labels + for label in st.session_state['final_data_df']['label'].unique(): + st.session_state['available_labels'][label] = colors[colors_i % len(colors)] + colors_i += 1 print('Data collected: ', st.session_state['imgs']) @@ -111,15 +122,15 @@ def show_statistics(): st.session_state['page_num'] = st.select_slider('Page', options=range(last_page)) for i, img in enumerate(st.session_state['imgs'][st.session_state['page_num']]): - display_img(columns[i % 3], img, st.session_state['final_data_df'][st.session_state['final_data_df']['path'] == img], str(i)) + display_img(columns[i % 3], img, st.session_state['final_data_df'][st.session_state['final_data_df']['path'] == img], str(i), st.session_state['available_labels']) # SIDEBAR (Chatbot Zone) ----------------------------------------------------------------------------- with st.sidebar: with st.container(horizontal=True): - for label in available_labels.keys(): - st.badge(label, color=available_labels[label]) + for label in st.session_state['available_labels'].keys(): + st.badge(label, color=st.session_state['available_labels'][label]) with st.form('context'): st.write('Write the medical context for your dataset:') From 86c74905c1d821fc5b69e4319431ce2d210e9f9a Mon Sep 17 00:00:00 2001 From: guirque Date: Sat, 13 Dec 2025 13:08:46 -0300 Subject: [PATCH 11/11] chore: adding .gitkeep --- data/.gitkeep | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 data/.gitkeep diff --git a/data/.gitkeep b/data/.gitkeep new file mode 100644 index 000000000..e69de29bb