From 2596e408629e37e6cffbd45f3f884dfacf0146de Mon Sep 17 00:00:00 2001 From: Avitup-R Date: Sun, 8 Jun 2025 02:49:46 +0900 Subject: [PATCH] Add: Adjusted the data_loeades.py to handle non-numeric data for mode operation. Also made some changes to requirement.txt --- lee_et_al_2023/requirements.txt | 4 ++-- lee_et_al_2023/src/data_loaders.py | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/lee_et_al_2023/requirements.txt b/lee_et_al_2023/requirements.txt index 56c454f..5e43529 100644 --- a/lee_et_al_2023/requirements.txt +++ b/lee_et_al_2023/requirements.txt @@ -3,11 +3,11 @@ CairoSVG==2.7.0 matplotlib nbmake numpy -pandas==1.5.3 +pandas pytest rdkit # Scikit may require special install procedures on; e.g. M1 macbooks. scikit-learn -seaborn==0.11.1 +seaborn statsmodels==0.13.5 tqdm==4.61.0 diff --git a/lee_et_al_2023/src/data_loaders.py b/lee_et_al_2023/src/data_loaders.py index 4dfc8cc..57ead91 100644 --- a/lee_et_al_2023/src/data_loaders.py +++ b/lee_et_al_2023/src/data_loaders.py @@ -74,7 +74,10 @@ def get_clean(filter=True): # Select the humans that rated at least some of these SMILES subjects = humans['SubjectCode'].unique() - panel = humans.groupby('RedJade Code').mean().loc[mol_codes, base.MONELL_CLASS_LIST] + # Filter to numeric columns only before calculating mean + numeric_cols = humans.select_dtypes(include='number').columns + panel = humans.groupby('RedJade Code')[numeric_cols].mean().loc[mol_codes, base.MONELL_CLASS_LIST] + return models, humans, panel, subjects