From fed093b3dcaf99909ce4537c943a7bd683cf03ea Mon Sep 17 00:00:00 2001 From: Abhinav Shukla <67401627+maxprogrammer007@users.noreply.github.com> Date: Mon, 5 May 2025 11:50:51 +0530 Subject: [PATCH 01/10] Update app.py --- dashboard/app.py | 178 ++++++++++++++++++++++++----------------------- 1 file changed, 90 insertions(+), 88 deletions(-) diff --git a/dashboard/app.py b/dashboard/app.py index 116ec46..db5df96 100644 --- a/dashboard/app.py +++ b/dashboard/app.py @@ -9,43 +9,52 @@ # --- Configuration --- PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir)) DATA_DIR = os.path.join(PROJECT_ROOT, 'data', 'processed') -SHAPE_PATH = os.path.join(PROJECT_ROOT, 'data', 'raw', 'shapefiles', 'cb_2018_us_county_500k.shp') +SHAPE_PATH = os.path.join(PROJECT_ROOT, 'data', 'raw', 'shapefiles', + 'cb_2018_us_county_500k.shp') SHAP_CSV = os.path.join(DATA_DIR, 'shap_explanations.csv') +GEOSHAP_CSV = os.path.join(DATA_DIR, 'geoshapley_explanations.csv') MGWR_CSV = os.path.join(DATA_DIR, 'mgwr_coefficients.csv') BOOT_CSV = os.path.join(DATA_DIR, 'bootstrap_shap_stats.csv') FAIR_CSV = os.path.join(DATA_DIR, 'fairness_metrics.csv') SENSITIVE_ATTRS = ["pct_black", "pct_hisp", "median_income"] -# Must be first Streamlit call +# Must be the first Streamlit call st.set_page_config(layout="wide", page_title="πŸ—ΊοΈ Explainable GeoAI Dashboard") # --- Load Data --- @st.cache_data def load_data(): + # Base GeoDataFrame gdf = gpd.read_file(SHAPE_PATH).to_crs("EPSG:4326") gdf['GEOID'] = gdf['GEOID'].astype(str).str.zfill(5) - shap_df = pd.read_csv(SHAP_CSV, dtype={'GEOID': str}) - mgwr_df = pd.read_csv(MGWR_CSV, dtype={'GEOID': str}) - boot_df = pd.read_csv(BOOT_CSV) - fair_df = pd.read_csv(FAIR_CSV, dtype={'GEOID': str}) - - # Merge shap & mgwr into GeoDataFrame - map_df = gdf.merge(shap_df, on='GEOID', how='left') - map_df = map_df.merge(mgwr_df, on='GEOID', how='left', suffixes=('_shap','_mgwr')) + # Tabular outputs + shap_df = pd.read_csv(SHAP_CSV, dtype={'GEOID': str}) + geoshap_df = pd.read_csv(GEOSHAP_CSV, dtype={'GEOID': str}) + mgwr_df = pd.read_csv(MGWR_CSV, dtype={'GEOID': str}) + boot_df = pd.read_csv(BOOT_CSV) + fair_df = pd.read_csv(FAIR_CSV, dtype={'GEOID': str}) + + # Merge all into the GeoDataFrame + df = ( + gdf + .merge(shap_df, on='GEOID', how='left') + .merge(geoshap_df, on='GEOID', how='left', suffixes=('_shap','_geoshap')) + .merge(mgwr_df, on='GEOID', how='left', suffixes=('', '_mgwr')) + ) + return df, shap_df, geoshap_df, mgwr_df, boot_df, fair_df - return map_df, shap_df, mgwr_df, boot_df, fair_df +map_df, shap_df, geoshap_df, mgwr_df, boot_df, fair_df = load_data() -map_df, shap_df, mgwr_df, boot_df, fair_df = load_data() # --- Sidebar Controls --- st.sidebar.title("Controls") mode = st.sidebar.radio( "Select Mode:", - ["SHAP", "MGWR/OLS", "Fairness"] + ["SHAP", "GeoShapley", "MGWR/OLS", "Fairness"] ) view = st.sidebar.radio( @@ -53,118 +62,111 @@ def load_data(): ["Point Estimate", "Uncertainty"] ) -# Determine feature & columns +# --- Determine which column to map --- if mode == "SHAP": features = [c.replace('phi_', '') for c in shap_df.columns if c.startswith('phi_')] - feature = st.sidebar.selectbox("Feature:", features) - col_point = f"phi_{feature}" + feature = st.sidebar.selectbox("SHAP Feature:", features) + col_point = f"phi_{feature}" col_uncert = 'std_phi' title_point = f"SHAP Attribution: {feature}" title_uncert = f"SHAP Uncertainty (Std Dev): {feature}" +elif mode == "GeoShapley": + # GeoShapley components + base_opts = ["phi_GEO"] # intrinsic location + feat_opts = [c for c in geoshap_df.columns if c.startswith('phi_') and not c.startswith(('phi_int','phi_GEO'))] + int_opts = [c for c in geoshap_df.columns if c.startswith('phi_int_')] + options = ["phi_GEO"] + feat_opts + int_opts + comp = st.sidebar.selectbox("GeoShapley Component:", options) + col_point = comp + col_uncert = None + title_point = comp.replace('phi_','').replace('_',' ').title() + title_uncert= "" + elif mode == "MGWR/OLS": features = [c for c in mgwr_df.columns if c != 'GEOID'] - feature = st.sidebar.selectbox("Coefficient:", features) - col_point = feature - col_uncert = None - title_point = f"MGWR/OLS Coefficient: {feature}" - title_uncert = "" + feature = st.sidebar.selectbox("Coefficient:", features) + col_point = feature + col_uncert = None + title_point = f"MGWR/OLS Coefficient: {feature}" + title_uncert= "" else: # Fairness - fair_labels = { - "pct_black":"Black %", - "pct_hisp":"Hispanic %", - "median_income":"Median Income" - } - attr = st.sidebar.selectbox( - "Sensitive Attribute:", - SENSITIVE_ATTRS, - format_func=lambda x: fair_labels[x] - ) - feature = attr - col_point = f"{attr}_fairness_score" - col_uncert = None - title_point = f"Fairness Score – {fair_labels[attr]}" - title_uncert = "" + labels = {"pct_black":"Black %","pct_hisp":"Hispanic %","median_income":"Median Income"} + attr = st.sidebar.selectbox("Sensitive Attribute:", SENSITIVE_ATTRS, + format_func=lambda x: labels[x]) + col_point = f"{attr}_fairness_score" + col_uncert = None + title_point = f"Fairness Score – {labels[attr]}" + title_uncert= "" # Default to point estimate -col_to_map = col_point -title = title_point - -# Handle SHAP uncertainty view -if mode == "SHAP" and view == "Uncertainty": - # Get std_phi for selected feature - std_row = boot_df.loc[boot_df["feature"] == feature] - if not std_row.empty: - std_val = float(std_row["std_phi"]) - # inject into map_df copy - map_df["uncertainty"] = std_val - col_to_map = "uncertainty" - title = title_uncert +col_to_map, title = col_point, title_point + +# Handle SHAP uncertainty +if mode=="SHAP" and view=="Uncertainty": + row = boot_df[boot_df['feature']==feature] + if not row.empty: + stdv = float(row['std_phi']) + map_df['uncertainty'] = stdv + col_to_map, title = 'uncertainty', title_uncert else: st.sidebar.warning("No bootstrap std available for this feature.") - col_to_map = col_point - title = title_point -# --- Build Map --- +# --- Render Map --- st.subheader(title) -# Prepare DataFrame to map plot_df = map_df.copy() -if mode == "Fairness": +if mode=="Fairness": plot_df = plot_df.merge(fair_df, on="GEOID", how="left") -# Verify column exists if col_to_map not in plot_df.columns: - st.error(f"πŸ›‘ Column '{col_to_map}' not found. Available columns: {plot_df.columns.tolist()}") + st.error(f"Column '{col_to_map}' not found. Available: {plot_df.columns.tolist()}") else: - m = folium.Map(location=[37.8, -96], zoom_start=4, tiles='cartodbpositron') - - choropleth = folium.Choropleth( - geo_data=plot_df, - data=plot_df, - columns=["GEOID", col_to_map], - key_on="feature.properties.GEOID", - fill_color='YlGnBu' if mode!="Fairness" else 'RdYlBu_r', - fill_opacity=0.7, - line_opacity=0.2, - legend_name=title, - nan_fill_color="white" - ).add_to(m) - - # Add tooltip - choropleth.geojson.add_child( - folium.features.GeoJsonTooltip( + m = folium.Map(location=[37.8, -96], zoom_start=4, tiles='cartodbpositron') + + choropleth = folium.Choropleth( + geo_data=plot_df, + data=plot_df, + columns=["GEOID", col_to_map], + key_on="feature.properties.GEOID", + fill_color='YlGnBu' if mode!="Fairness" else 'RdYlBu_r', + fill_opacity=0.7, + line_opacity=0.2, + legend_name=title, + nan_fill_color="white" + ).add_to(m) + + # Correctly attach tooltips to the GeoJson sub‐layer + choropleth.geojson.add_child( + folium.features.GeoJsonTooltip( fields=["GEOID", col_to_map], aliases=["GEOID", title], localize=True - ) - ) + ) + ) - map_html = m._repr_html_() - components.html(map_html, height=500, scrolling=True) + components.html(m._repr_html_(), height=550) -# --- SHAP Feature Importance --- -if mode == "SHAP" and view == "Point Estimate": +# --- SHAP Feature Importance Bar (SHAP only) --- +if mode=="SHAP" and view=="Point Estimate": st.subheader("Global SHAP Feature Importance") imp_df = boot_df.copy() imp_df['abs_mean'] = imp_df['mean_phi'].abs() - top10 = imp_df.sort_values('abs_mean', ascending=False).head(10) + top10 = imp_df.nlargest(10, 'abs_mean') fig = px.bar( top10, - x='feature', - y='mean_phi', - error_y='std_phi', + x='feature', y='mean_phi', error_y='std_phi', labels={'mean_phi':'Mean SHAP'}, title='Top 10 SHAP Feature Importances' ) st.plotly_chart(fig, use_container_width=True) -# --- Download Buttons --- +# --- Downloads --- st.markdown("---") c1, c2, c3, c4 = st.columns(4) -c1.download_button("Download SHAP CSV", shap_df.to_csv(index=False), 'shap_explanations.csv') -c2.download_button("Download MGWR CSV", mgwr_df.to_csv(index=False), 'mgwr_coefficients.csv') -c3.download_button("Download Bootstrap Stats", boot_df.to_csv(index=False), 'bootstrap_shap_stats.csv') -c4.download_button("Download Fairness CSV", fair_df.to_csv(index=False), 'fairness_metrics.csv') +c1.download_button("Download SHAP CSV", shap_df.to_csv(index=False), 'shap_explanations.csv') +c2.download_button("Download GeoShapley", geoshap_df.to_csv(index=False), 'geoshapley_explanations.csv') +c3.download_button("Download MGWR CSV", mgwr_df.to_csv(index=False), 'mgwr_coefficients.csv') +c4.download_button("Download Fairness CSV", fair_df.to_csv(index=False), 'fairness_metrics.csv') From 341ec25572263ecff0113cae7c7502946d97cee7 Mon Sep 17 00:00:00 2001 From: Abhinav Shukla <67401627+maxprogrammer007@users.noreply.github.com> Date: Mon, 5 May 2025 12:21:01 +0530 Subject: [PATCH 02/10] Create ci.yml --- .github/workflows/ci.yml | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 .github/workflows/ci.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..b8e3f8e --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,26 @@ +name: CI + +on: + push: + branches: [ main ] + pull_request: + +jobs: + lint-and-test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + - name: Install dependencies + run: | + pip install -r requirements.txt + - name: Lint with flake8 + run: | + pip install flake8 + flake8 src/ dashboard/ + - name: Smoke-run Streamlit (health check) + run: | + streamlit run dashboard/app.py -- --headless --run-once || true From 933feca5585706427cbe1e36466d5c28ecc425ac Mon Sep 17 00:00:00 2001 From: Abhinav Shukla <67401627+maxprogrammer007@users.noreply.github.com> Date: Mon, 5 May 2025 12:21:07 +0530 Subject: [PATCH 03/10] Update app.py --- dashboard/app.py | 212 +++++++++++++++++++++++------------------------ 1 file changed, 103 insertions(+), 109 deletions(-) diff --git a/dashboard/app.py b/dashboard/app.py index db5df96..6d18135 100644 --- a/dashboard/app.py +++ b/dashboard/app.py @@ -1,3 +1,5 @@ +# dashboard/app.py + import os import streamlit as st import pandas as pd @@ -6,116 +8,112 @@ import streamlit.components.v1 as components import folium -# --- Configuration --- -PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir)) -DATA_DIR = os.path.join(PROJECT_ROOT, 'data', 'processed') -SHAPE_PATH = os.path.join(PROJECT_ROOT, 'data', 'raw', 'shapefiles', - 'cb_2018_us_county_500k.shp') +# ─── Paths & Config ───────────────────────────────────────────────────────── +PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir)) +DATA_DIR = os.path.join(PROJECT_ROOT, "data", "processed") +SHAPE_PATH = os.path.join(PROJECT_ROOT, "data", "raw", "shapefiles", + "cb_2018_us_county_500k.shp") -SHAP_CSV = os.path.join(DATA_DIR, 'shap_explanations.csv') -GEOSHAP_CSV = os.path.join(DATA_DIR, 'geoshapley_explanations.csv') -MGWR_CSV = os.path.join(DATA_DIR, 'mgwr_coefficients.csv') -BOOT_CSV = os.path.join(DATA_DIR, 'bootstrap_shap_stats.csv') -FAIR_CSV = os.path.join(DATA_DIR, 'fairness_metrics.csv') +SHAP_CSV = os.path.join(DATA_DIR, "shap_explanations.csv") +GEOSHAP_CSV = os.path.join(DATA_DIR, "geoshapley_explanations.csv") +MGWR_CSV = os.path.join(DATA_DIR, "mgwr_coefficients.csv") +BOOT_CSV = os.path.join(DATA_DIR, "bootstrap_shap_stats.csv") +FAIR_CSV = os.path.join(DATA_DIR, "fairness_metrics.csv") SENSITIVE_ATTRS = ["pct_black", "pct_hisp", "median_income"] -# Must be the first Streamlit call st.set_page_config(layout="wide", page_title="πŸ—ΊοΈ Explainable GeoAI Dashboard") -# --- Load Data --- -@st.cache_data +# ─── Sidebar Help ─────────────────────────────────────────────────────────── +st.sidebar.title("Controls") +st.sidebar.markdown(""" +**Mode Descriptions** +- **SHAP:** Exact `phi_…` columns from your SHAP output + bootstrap uncertainty +- **GeoShapley:** Decomposed spatial–feature effects +- **MGWR/OLS:** Local regression coefficients +- **Fairness:** Residual-based fairness gaps +""") +with st.expander("❓ How to use"): + st.write(""" + 1. Pick a **Mode**. + 2. Pick **View** (Point vs Uncertainty). + 3. For SHAP, choose exactly one `phi_…` column from your CSV. + 4. Hover on the map or download any CSV below. + """) + +# ─── Data Loader ───────────────────────────────────────────────────────────── +@st.cache_data(ttl=86400) def load_data(): - # Base GeoDataFrame gdf = gpd.read_file(SHAPE_PATH).to_crs("EPSG:4326") - gdf['GEOID'] = gdf['GEOID'].astype(str).str.zfill(5) + gdf["GEOID"] = gdf["GEOID"].astype(str).str.zfill(5) - # Tabular outputs - shap_df = pd.read_csv(SHAP_CSV, dtype={'GEOID': str}) - geoshap_df = pd.read_csv(GEOSHAP_CSV, dtype={'GEOID': str}) - mgwr_df = pd.read_csv(MGWR_CSV, dtype={'GEOID': str}) + shap_df = pd.read_csv(SHAP_CSV, dtype={"GEOID": str}) + geoshap_df = pd.read_csv(GEOSHAP_CSV, dtype={"GEOID": str}) + mgwr_df = pd.read_csv(MGWR_CSV, dtype={"GEOID": str}) boot_df = pd.read_csv(BOOT_CSV) - fair_df = pd.read_csv(FAIR_CSV, dtype={'GEOID': str}) + fair_df = pd.read_csv(FAIR_CSV, dtype={"GEOID": str}) - # Merge all into the GeoDataFrame - df = ( + merged = ( gdf - .merge(shap_df, on='GEOID', how='left') - .merge(geoshap_df, on='GEOID', how='left', suffixes=('_shap','_geoshap')) - .merge(mgwr_df, on='GEOID', how='left', suffixes=('', '_mgwr')) + .merge(shap_df, on="GEOID", how="left") + .merge(geoshap_df, on="GEOID", how="left", suffixes=("_shap","_geoshap")) + .merge(mgwr_df, on="GEOID", how="left", suffixes=("", "_mgwr")) ) - return df, shap_df, geoshap_df, mgwr_df, boot_df, fair_df + return merged, shap_df, geoshap_df, mgwr_df, boot_df, fair_df map_df, shap_df, geoshap_df, mgwr_df, boot_df, fair_df = load_data() +# ─── Mode & View ───────────────────────────────────────────────────────────── +mode = st.sidebar.radio("Select Mode:", ["SHAP", "GeoShapley", "MGWR/OLS", "Fairness"]) +view = st.sidebar.radio("View:", ["Point Estimate", "Uncertainty"]) -# --- Sidebar Controls --- -st.sidebar.title("Controls") - -mode = st.sidebar.radio( - "Select Mode:", - ["SHAP", "GeoShapley", "MGWR/OLS", "Fairness"] -) - -view = st.sidebar.radio( - "View:", - ["Point Estimate", "Uncertainty"] -) - -# --- Determine which column to map --- +# ─── Sidebar selectors & titles ────────────────────────────────────────────── if mode == "SHAP": - features = [c.replace('phi_', '') for c in shap_df.columns if c.startswith('phi_')] - feature = st.sidebar.selectbox("SHAP Feature:", features) - col_point = f"phi_{feature}" - col_uncert = 'std_phi' - title_point = f"SHAP Attribution: {feature}" - title_uncert = f"SHAP Uncertainty (Std Dev): {feature}" + # list all existing phi_ columns + phi_cols = [c for c in shap_df.columns if c.startswith("phi_")] + feature = st.sidebar.selectbox("SHAP Column:", sorted(phi_cols)) + col_point = feature + col_uncert = "std_phi" + title_point = feature + title_unc = f"Bootstrap std of {feature}" elif mode == "GeoShapley": - # GeoShapley components - base_opts = ["phi_GEO"] # intrinsic location - feat_opts = [c for c in geoshap_df.columns if c.startswith('phi_') and not c.startswith(('phi_int','phi_GEO'))] - int_opts = [c for c in geoshap_df.columns if c.startswith('phi_int_')] - options = ["phi_GEO"] + feat_opts + int_opts - comp = st.sidebar.selectbox("GeoShapley Component:", options) + geosh_cols = [c for c in geoshap_df.columns if c.startswith("phi_")] + comp = st.sidebar.selectbox("GeoShapley Column:", sorted(geosh_cols)) col_point = comp col_uncert = None - title_point = comp.replace('phi_','').replace('_',' ').title() - title_uncert= "" + title_point = comp + title_unc = "" elif mode == "MGWR/OLS": - features = [c for c in mgwr_df.columns if c != 'GEOID'] - feature = st.sidebar.selectbox("Coefficient:", features) - col_point = feature + mgwr_cols = [c for c in mgwr_df.columns if c != "GEOID"] + coef = st.sidebar.selectbox("MGWR/OLS Column:", sorted(mgwr_cols)) + col_point = coef col_uncert = None - title_point = f"MGWR/OLS Coefficient: {feature}" - title_uncert= "" + title_point = coef + title_unc = "" else: # Fairness - labels = {"pct_black":"Black %","pct_hisp":"Hispanic %","median_income":"Median Income"} - attr = st.sidebar.selectbox("Sensitive Attribute:", SENSITIVE_ATTRS, - format_func=lambda x: labels[x]) + fair_labels = {"pct_black":"Black %","pct_hisp":"Hispanic %","median_income":"Median Income"} + attr = st.sidebar.selectbox("Attribute:", SENSITIVE_ATTRS, + format_func=lambda x: fair_labels[x]) col_point = f"{attr}_fairness_score" col_uncert = None - title_point = f"Fairness Score – {labels[attr]}" - title_uncert= "" + title_point = col_point + title_unc = "" -# Default to point estimate +# ─── Handle SHAP Uncertainty ──────────────────────────────────────────────── col_to_map, title = col_point, title_point - -# Handle SHAP uncertainty if mode=="SHAP" and view=="Uncertainty": - row = boot_df[boot_df['feature']==feature] + row = boot_df.loc[boot_df["feature"]==feature.removeprefix("phi_")] if not row.empty: - stdv = float(row['std_phi']) - map_df['uncertainty'] = stdv - col_to_map, title = 'uncertainty', title_uncert + map_df["uncertainty"] = float(row["std_phi"]) + col_to_map, title = "uncertainty", title_unc else: - st.sidebar.warning("No bootstrap std available for this feature.") + st.sidebar.warning("No bootstrap std available.") -# --- Render Map --- +# ─── Render Map ────────────────────────────────────────────────────────────── st.subheader(title) - plot_df = map_df.copy() if mode=="Fairness": plot_df = plot_df.merge(fair_df, on="GEOID", how="left") @@ -123,50 +121,46 @@ def load_data(): if col_to_map not in plot_df.columns: st.error(f"Column '{col_to_map}' not found. Available: {plot_df.columns.tolist()}") else: - m = folium.Map(location=[37.8, -96], zoom_start=4, tiles='cartodbpositron') - - choropleth = folium.Choropleth( - geo_data=plot_df, - data=plot_df, - columns=["GEOID", col_to_map], - key_on="feature.properties.GEOID", - fill_color='YlGnBu' if mode!="Fairness" else 'RdYlBu_r', - fill_opacity=0.7, - line_opacity=0.2, - legend_name=title, - nan_fill_color="white" - ).add_to(m) - - # Correctly attach tooltips to the GeoJson sub‐layer - choropleth.geojson.add_child( - folium.features.GeoJsonTooltip( + m = folium.Map(location=[37.8,-96], zoom_start=4, tiles="cartodbpositron") + chor = folium.Choropleth( + geo_data=plot_df, + data=plot_df, + columns=["GEOID", col_to_map], + key_on="feature.properties.GEOID", + fill_color=("YlGnBu" if mode!="Fairness" else "RdYlBu_r"), + fill_opacity=0.7, + line_opacity=0.2, + legend_name=title, + nan_fill_color="white" + ).add_to(m) + + chor.geojson.add_child( + folium.features.GeoJsonTooltip( fields=["GEOID", col_to_map], aliases=["GEOID", title], localize=True - ) - ) - - components.html(m._repr_html_(), height=550) + ) + ) + components.html(m._repr_html_(), height=550) -# --- SHAP Feature Importance Bar (SHAP only) --- +# ─── SHAP Global Importance ───────────────────────────────────────────────── if mode=="SHAP" and view=="Point Estimate": - st.subheader("Global SHAP Feature Importance") - imp_df = boot_df.copy() - imp_df['abs_mean'] = imp_df['mean_phi'].abs() - top10 = imp_df.nlargest(10, 'abs_mean') + st.subheader("Global SHAP Importance") + imp = boot_df.copy() + imp["abs_mean"] = imp["mean_phi"].abs() + top10 = imp.nlargest(10, "abs_mean") fig = px.bar( - top10, - x='feature', y='mean_phi', error_y='std_phi', - labels={'mean_phi':'Mean SHAP'}, - title='Top 10 SHAP Feature Importances' + top10, x="feature", y="mean_phi", error_y="std_phi", + labels={"mean_phi":"Mean SHAP"}, + title="Top 10 SHAP Features" ) st.plotly_chart(fig, use_container_width=True) -# --- Downloads --- +# ─── Downloads ─────────────────────────────────────────────────────────────── st.markdown("---") c1, c2, c3, c4 = st.columns(4) -c1.download_button("Download SHAP CSV", shap_df.to_csv(index=False), 'shap_explanations.csv') -c2.download_button("Download GeoShapley", geoshap_df.to_csv(index=False), 'geoshapley_explanations.csv') -c3.download_button("Download MGWR CSV", mgwr_df.to_csv(index=False), 'mgwr_coefficients.csv') -c4.download_button("Download Fairness CSV", fair_df.to_csv(index=False), 'fairness_metrics.csv') +c1.download_button("Download SHAP", shap_df.to_csv(index=False), "shap_explanations.csv") +c2.download_button("Download GeoShapley", geoshap_df.to_csv(index=False), "geoshapley_explanations.csv") +c3.download_button("Download MGWR", mgwr_df.to_csv(index=False), "mgwr_coefficients.csv") +c4.download_button("Download Fairness", fair_df.to_csv(index=False), "fairness_metrics.csv") From d0ca72cd3de78e68c843ca709ce2205f52e82472 Mon Sep 17 00:00:00 2001 From: Abhinav Shukla <67401627+maxprogrammer007@users.noreply.github.com> Date: Mon, 5 May 2025 12:21:08 +0530 Subject: [PATCH 04/10] Create dockerfile --- dockerfile | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 dockerfile diff --git a/dockerfile b/dockerfile new file mode 100644 index 0000000..f6f8067 --- /dev/null +++ b/dockerfile @@ -0,0 +1,13 @@ +# Dockerfile +FROM python:3.12-slim + +WORKDIR /app +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy the rest of your repo +COPY . . + +# Tell Streamlit to run your dashboard +ENV STREAMLIT_SERVER_HEADLESS=true +ENTRYPOINT ["streamlit", "run", "dashboard/app.py"] From a75da43eabed0493dc4510ef98e2e7437e5ec897 Mon Sep 17 00:00:00 2001 From: Abhinav Shukla <67401627+maxprogrammer007@users.noreply.github.com> Date: Mon, 5 May 2025 12:21:09 +0530 Subject: [PATCH 05/10] Update README.md --- README.md | 142 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 142 insertions(+) diff --git a/README.md b/README.md index e69de29..9596857 100644 --- a/README.md +++ b/README.md @@ -0,0 +1,142 @@ + +# Explainable GeoAI: Interpreting Socio-Spatial Patterns + +An end-to-end spatial XAI pipeline combining XGBoost, SHAP, GeoShapley, and MGWR to uncover and visualize interpretable spatial effects in U.S. county-level voting data. + +--- + +## πŸ“‚ Repository Structure + +``` + +explainable-geoai/ +β”œβ”€β”€ data/ +β”‚ β”œβ”€β”€ raw/ +β”‚ β”‚ β”œβ”€β”€ census/ # raw ACS downloads +β”‚ β”‚ β”œβ”€β”€ shapefiles/ # county geometries +β”‚ β”‚ └── voting\_2021.csv # raw vote share +β”‚ └── processed/ +β”‚ β”œβ”€β”€ voting\_clean.csv # cleaned tabular data +β”‚ β”œβ”€β”€ voting\_features.csv # with engineered features & spatial lags +β”‚ β”œβ”€β”€ xgb\_automl\_model.pkl # trained FLAML+XGBoost model +β”‚ β”œβ”€β”€ shap\_explanations.csv # SHAP outputs +β”‚ β”œβ”€β”€ geoshapley\_explanations.csv # GeoShapley outputs +β”‚ β”œβ”€β”€ mgwr\_coefficients.csv # MGWR baseline +β”‚ β”œβ”€β”€ bootstrap\_shap\_stats.csv # SHAP uncertainty stats +β”‚ └── fairness\_metrics.csv # spatial fairness gaps +β”œβ”€β”€ src/ +β”‚ β”œβ”€β”€ data\_loader.py # load & clean +β”‚ β”œβ”€β”€ feature\_engineering.py # spatial lags, GeoDataFrame +β”‚ β”œβ”€β”€ model\_training.py # FLAML + XGBoost training +β”‚ β”œβ”€β”€ shap\_explainer.py # Kernel SHAP wrapper +β”‚ β”œβ”€β”€ geoshapley\_explainer.py # GeoShapley computations +β”‚ β”œβ”€β”€ mgwr\_comparison.py # MGWR baseline scripts +β”‚ β”œβ”€β”€ bootstrap\_uncertainty.py # bootstrap SHAP stats +β”‚ β”œβ”€β”€ spatial\_fairness.py # compute residual‐fairness +β”‚ └── config.py # paths & constants +β”œβ”€β”€ dashboard/ +β”‚ └── app.py # Streamlit + Folium dashboard +β”œβ”€β”€ docs/ +β”‚ β”œβ”€β”€ implementation\_notes.md # detailed pipeline doc +β”‚ └── paper\_summary.pdf # summary of Li (2025) chapter +β”œβ”€β”€ README.md # this file +└── requirements.txt # pip dependencies + +```` + +--- + +## βš™οΈ Installation + +1. **Clone repo** + ```bash + git clone https://github.com/yourusername/explainable-geoai.git + cd explainable-geoai +```` + +2. **Create & activate** a virtual environment + + ```bash + python3 -m venv venv + source venv/bin/activate # macOS/Linux + venv\Scripts\activate # Windows + ``` + +3. **Install dependencies** + + ```bash + pip install -r requirements.txt + ``` + +4. **Download raw data** + + * Place `voting_2021.csv` in `data/raw/` + * Download ACS and shapefiles via `src/download_census.py` or manually. + +--- + +## πŸš€ Quick Start + +1. **Data & features** + + ```bash + python src/data_loader.py + python src/feature_engineering.py + ``` + +2. **Train model** + + ```bash + python src/model_training.py + ``` + +3. **Generate explanations** + + ```bash + python src/shap_explainer.py + python src/geoshapley_explainer.py + python src/mgwr_comparison.py + python src/bootstrap_uncertainty.py + python src/spatial_fairness.py + ``` + +4. **Launch dashboard** + + ```bash + cd dashboard + streamlit run app.py + ``` + +--- + +## πŸ“ Scripts & Modules + +* **`data_loader.py`**: cleans raw vote + ACS, saves `voting_clean.csv`. +* **`feature_engineering.py`**: builds spatial lags, exports `voting_features.csv`. +* **`model_training.py`**: uses FLAML to find best XGBoost; saves model. +* **`shap_explainer.py`**: Kernel SHAP over FLAML model β†’ `shap_explanations.csv`. +* **`geoshapley_explainer.py`**: computes GeoShapley components β†’ `geoshapley_explanations.csv`. +* **`mgwr_comparison.py`**: fits MGWR baseline β†’ `mgwr_coefficients.csv`. +* **`bootstrap_uncertainty.py`**: bootstraps SHAP β†’ `bootstrap_shap_stats.csv`. +* **`spatial_fairness.py`**: calculates fairness gaps β†’ `fairness_metrics.csv`. +* **`dashboard/app.py`**: interactive Streamlit + Folium map. + +--- + +## πŸ“Š Dashboard Overview + +* **SHAP**: county-level attributions, with uncertainty. +* **GeoShapley**: decomposed intrinsic (GEO), main, and interaction effects. +* **MGWR/OLS**: local regression coefficients for comparison. +* **Fairness**: residual differences across demographic groups. +* **Download** any CSV for offline analysis. + +--- + +## 🧾 Citing + +If you use this work, please cite: + +> Li, Ziqi (2025). *Explainable AI in Spatial Analysis*. In: +> *Advances in Spatial Data Science*, Springer. + From 10f1749285ded16ce1b6d8c4fc2151ff499ec142 Mon Sep 17 00:00:00 2001 From: Abhinav Shukla <67401627+maxprogrammer007@users.noreply.github.com> Date: Mon, 5 May 2025 12:21:10 +0530 Subject: [PATCH 06/10] Create requirements.txt --- requirements.txt | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..da876da --- /dev/null +++ b/requirements.txt @@ -0,0 +1,13 @@ +# requirements.txt +streamlit==1.25.0 +geopandas==0.13.0 +pandas==2.1.0 +plotly==5.17.0 +folium==0.14.0 +branca==0.8.1 +geoshapley==0.1.2 +xgboost==1.7.6 +flaml==1.1.3 +mgwr==2.5.2 +numpy==1.24.4 +scikit-learn==1.5.0 From 9ff8ff77804b69e5f19ed8cb12147f769b1c90db Mon Sep 17 00:00:00 2001 From: Abhinav Shukla <67401627+maxprogrammer007@users.noreply.github.com> Date: Mon, 5 May 2025 12:23:10 +0530 Subject: [PATCH 07/10] Update README.md --- README.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/README.md b/README.md index 9596857..7b73afc 100644 --- a/README.md +++ b/README.md @@ -44,8 +44,6 @@ explainable-geoai/ ```` ---- - ## βš™οΈ Installation 1. **Clone repo** @@ -131,7 +129,7 @@ explainable-geoai/ * **Fairness**: residual differences across demographic groups. * **Download** any CSV for offline analysis. ---- + ## 🧾 Citing From e678fd776209db88add12b5b628ce3c012fc0478 Mon Sep 17 00:00:00 2001 From: Abhinav Shukla <67401627+maxprogrammer007@users.noreply.github.com> Date: Mon, 5 May 2025 12:24:04 +0530 Subject: [PATCH 08/10] Update README.md --- README.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 7b73afc..d73b0bc 100644 --- a/README.md +++ b/README.md @@ -71,7 +71,7 @@ explainable-geoai/ * Place `voting_2021.csv` in `data/raw/` * Download ACS and shapefiles via `src/download_census.py` or manually. ---- + ## πŸš€ Quick Start @@ -105,7 +105,7 @@ explainable-geoai/ streamlit run app.py ``` ---- + ## πŸ“ Scripts & Modules @@ -119,7 +119,6 @@ explainable-geoai/ * **`spatial_fairness.py`**: calculates fairness gaps β†’ `fairness_metrics.csv`. * **`dashboard/app.py`**: interactive Streamlit + Folium map. ---- ## πŸ“Š Dashboard Overview From 6e0c9916024e22613d9a886e1f678ea18dc8233b Mon Sep 17 00:00:00 2001 From: Abhinav Shukla <67401627+maxprogrammer007@users.noreply.github.com> Date: Mon, 5 May 2025 12:25:02 +0530 Subject: [PATCH 09/10] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d73b0bc..2f64398 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,7 @@ explainable-geoai/ β”œβ”€β”€ README.md # this file └── requirements.txt # pip dependencies -```` +``` ## βš™οΈ Installation From ce32b54474db3b98fd9dfbf351403bf3253518bd Mon Sep 17 00:00:00 2001 From: Abhinav Shukla <67401627+maxprogrammer007@users.noreply.github.com> Date: Mon, 5 May 2025 12:25:38 +0530 Subject: [PATCH 10/10] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 2f64398..10ce911 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ explainable-geoai/ ```bash git clone https://github.com/yourusername/explainable-geoai.git cd explainable-geoai -```` +``` 2. **Create & activate** a virtual environment