From fed093b3dcaf99909ce4537c943a7bd683cf03ea Mon Sep 17 00:00:00 2001
From: Abhinav Shukla <67401627+maxprogrammer007@users.noreply.github.com>
Date: Mon, 5 May 2025 11:50:51 +0530
Subject: [PATCH 01/10] Update app.py

---
 dashboard/app.py | 178 ++++++++++++++++++++++++-----------------------
 1 file changed, 90 insertions(+), 88 deletions(-)

diff --git a/dashboard/app.py b/dashboard/app.py
index 116ec46..db5df96 100644
--- a/dashboard/app.py
+++ b/dashboard/app.py
@@ -9,43 +9,52 @@
 # --- Configuration ---
 PROJECT_ROOT  = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir))
 DATA_DIR      = os.path.join(PROJECT_ROOT, 'data', 'processed')
-SHAPE_PATH    = os.path.join(PROJECT_ROOT, 'data', 'raw', 'shapefiles', 'cb_2018_us_county_500k.shp')
+SHAPE_PATH    = os.path.join(PROJECT_ROOT, 'data', 'raw', 'shapefiles',
+                             'cb_2018_us_county_500k.shp')
 
 SHAP_CSV      = os.path.join(DATA_DIR, 'shap_explanations.csv')
+GEOSHAP_CSV   = os.path.join(DATA_DIR, 'geoshapley_explanations.csv')
 MGWR_CSV      = os.path.join(DATA_DIR, 'mgwr_coefficients.csv')
 BOOT_CSV      = os.path.join(DATA_DIR, 'bootstrap_shap_stats.csv')
 FAIR_CSV      = os.path.join(DATA_DIR, 'fairness_metrics.csv')
 
 SENSITIVE_ATTRS = ["pct_black", "pct_hisp", "median_income"]
 
-# Must be first Streamlit call
+# Must be the first Streamlit call
 st.set_page_config(layout="wide", page_title="🗺️ Explainable GeoAI Dashboard")
 
 # --- Load Data ---
 @st.cache_data
 def load_data():
+    # Base GeoDataFrame
     gdf = gpd.read_file(SHAPE_PATH).to_crs("EPSG:4326")
     gdf['GEOID'] = gdf['GEOID'].astype(str).str.zfill(5)
 
-    shap_df = pd.read_csv(SHAP_CSV, dtype={'GEOID': str})
-    mgwr_df = pd.read_csv(MGWR_CSV, dtype={'GEOID': str})
-    boot_df = pd.read_csv(BOOT_CSV)
-    fair_df = pd.read_csv(FAIR_CSV, dtype={'GEOID': str})
-
-    # Merge shap & mgwr into GeoDataFrame
-    map_df = gdf.merge(shap_df, on='GEOID', how='left')
-    map_df = map_df.merge(mgwr_df, on='GEOID', how='left', suffixes=('_shap','_mgwr'))
+    # Tabular outputs
+    shap_df    = pd.read_csv(SHAP_CSV,    dtype={'GEOID': str})
+    geoshap_df = pd.read_csv(GEOSHAP_CSV, dtype={'GEOID': str})
+    mgwr_df    = pd.read_csv(MGWR_CSV,    dtype={'GEOID': str})
+    boot_df    = pd.read_csv(BOOT_CSV)
+    fair_df    = pd.read_csv(FAIR_CSV,    dtype={'GEOID': str})
+
+    # Merge all into the GeoDataFrame
+    df = (
+        gdf
+        .merge(shap_df,    on='GEOID', how='left')
+        .merge(geoshap_df, on='GEOID', how='left', suffixes=('_shap','_geoshap'))
+        .merge(mgwr_df,    on='GEOID', how='left', suffixes=('', '_mgwr'))
+    )
+    return df, shap_df, geoshap_df, mgwr_df, boot_df, fair_df
 
-    return map_df, shap_df, mgwr_df, boot_df, fair_df
+map_df, shap_df, geoshap_df, mgwr_df, boot_df, fair_df = load_data()
 
-map_df, shap_df, mgwr_df, boot_df, fair_df = load_data()
 
 # --- Sidebar Controls ---
 st.sidebar.title("Controls")
 
 mode = st.sidebar.radio(
     "Select Mode:",
-    ["SHAP", "MGWR/OLS", "Fairness"]
+    ["SHAP", "GeoShapley", "MGWR/OLS", "Fairness"]
 )
 
 view = st.sidebar.radio(
@@ -53,118 +62,111 @@ def load_data():
     ["Point Estimate", "Uncertainty"]
 )
 
-# Determine feature & columns
+# --- Determine which column to map ---
 if mode == "SHAP":
     features = [c.replace('phi_', '') for c in shap_df.columns if c.startswith('phi_')]
-    feature = st.sidebar.selectbox("Feature:", features)
-    col_point = f"phi_{feature}"
+    feature  = st.sidebar.selectbox("SHAP Feature:", features)
+    col_point  = f"phi_{feature}"
     col_uncert = 'std_phi'
     title_point  = f"SHAP Attribution: {feature}"
     title_uncert = f"SHAP Uncertainty (Std Dev): {feature}"
 
+elif mode == "GeoShapley":
+    # GeoShapley components
+    base_opts = ["phi_GEO"]                            # intrinsic location
+    feat_opts = [c for c in geoshap_df.columns if c.startswith('phi_') and not c.startswith(('phi_int','phi_GEO'))]
+    int_opts  = [c for c in geoshap_df.columns if c.startswith('phi_int_')]
+    options   = ["phi_GEO"] + feat_opts + int_opts
+    comp      = st.sidebar.selectbox("GeoShapley Component:", options)
+    col_point   = comp
+    col_uncert  = None
+    title_point = comp.replace('phi_','').replace('_',' ').title()
+    title_uncert= ""
+
 elif mode == "MGWR/OLS":
     features = [c for c in mgwr_df.columns if c != 'GEOID']
-    feature = st.sidebar.selectbox("Coefficient:", features)
-    col_point  = feature
-    col_uncert = None
-    title_point  = f"MGWR/OLS Coefficient: {feature}"
-    title_uncert = ""
+    feature  = st.sidebar.selectbox("Coefficient:", features)
+    col_point   = feature
+    col_uncert  = None
+    title_point = f"MGWR/OLS Coefficient: {feature}"
+    title_uncert= ""
 
 else:  # Fairness
-    fair_labels = {
-        "pct_black":"Black %",
-        "pct_hisp":"Hispanic %",
-        "median_income":"Median Income"
-    }
-    attr = st.sidebar.selectbox(
-        "Sensitive Attribute:",
-        SENSITIVE_ATTRS,
-        format_func=lambda x: fair_labels[x]
-    )
-    feature = attr
-    col_point  = f"{attr}_fairness_score"
-    col_uncert = None
-    title_point  = f"Fairness Score – {fair_labels[attr]}"
-    title_uncert = ""
+    labels = {"pct_black":"Black %","pct_hisp":"Hispanic %","median_income":"Median Income"}
+    attr    = st.sidebar.selectbox("Sensitive Attribute:", SENSITIVE_ATTRS,
+                                   format_func=lambda x: labels[x])
+    col_point   = f"{attr}_fairness_score"
+    col_uncert  = None
+    title_point = f"Fairness Score – {labels[attr]}"
+    title_uncert= ""
 
 # Default to point estimate
-col_to_map = col_point
-title = title_point
-
-# Handle SHAP uncertainty view
-if mode == "SHAP" and view == "Uncertainty":
-    # Get std_phi for selected feature
-    std_row = boot_df.loc[boot_df["feature"] == feature]
-    if not std_row.empty:
-        std_val = float(std_row["std_phi"])
-        # inject into map_df copy
-        map_df["uncertainty"] = std_val
-        col_to_map = "uncertainty"
-        title = title_uncert
+col_to_map, title = col_point, title_point
+
+# Handle SHAP uncertainty
+if mode=="SHAP" and view=="Uncertainty":
+    row = boot_df[boot_df['feature']==feature]
+    if not row.empty:
+        stdv = float(row['std_phi'])
+        map_df['uncertainty'] = stdv
+        col_to_map, title = 'uncertainty', title_uncert
     else:
         st.sidebar.warning("No bootstrap std available for this feature.")
-        col_to_map = col_point
-        title = title_point
 
-# --- Build Map ---
+# --- Render Map ---
 st.subheader(title)
 
-# Prepare DataFrame to map
 plot_df = map_df.copy()
-if mode == "Fairness":
+if mode=="Fairness":
     plot_df = plot_df.merge(fair_df, on="GEOID", how="left")
 
-# Verify column exists
 if col_to_map not in plot_df.columns:
-    st.error(f"🛑 Column '{col_to_map}' not found. Available columns: {plot_df.columns.tolist()}")
+    st.error(f"Column '{col_to_map}' not found. Available: {plot_df.columns.tolist()}")
 else:
-    m = folium.Map(location=[37.8, -96], zoom_start=4, tiles='cartodbpositron')
-
-    choropleth = folium.Choropleth(
-        geo_data=plot_df,
-        data=plot_df,
-        columns=["GEOID", col_to_map],
-        key_on="feature.properties.GEOID",
-        fill_color='YlGnBu' if mode!="Fairness" else 'RdYlBu_r',
-        fill_opacity=0.7,
-        line_opacity=0.2,
-        legend_name=title,
-        nan_fill_color="white"
-    ).add_to(m)
-
-    # Add tooltip
-    choropleth.geojson.add_child(
-        folium.features.GeoJsonTooltip(
+        m = folium.Map(location=[37.8, -96], zoom_start=4, tiles='cartodbpositron')
+
+        choropleth = folium.Choropleth(
+            geo_data=plot_df,
+            data=plot_df,
+            columns=["GEOID", col_to_map],
+            key_on="feature.properties.GEOID",
+            fill_color='YlGnBu' if mode!="Fairness" else 'RdYlBu_r',
+            fill_opacity=0.7,
+            line_opacity=0.2,
+            legend_name=title,
+            nan_fill_color="white"
+        ).add_to(m)
+
+    # Correctly attach tooltips to the GeoJson sub‐layer
+        choropleth.geojson.add_child(
+            folium.features.GeoJsonTooltip(
             fields=["GEOID", col_to_map],
             aliases=["GEOID", title],
             localize=True
-        )
-    )
+            )
+         )
 
-    map_html = m._repr_html_()
-    components.html(map_html, height=500, scrolling=True)
+        components.html(m._repr_html_(), height=550)
 
 
-# --- SHAP Feature Importance ---
-if mode == "SHAP" and view == "Point Estimate":
+# --- SHAP Feature Importance Bar (SHAP only) ---
+if mode=="SHAP" and view=="Point Estimate":
     st.subheader("Global SHAP Feature Importance")
     imp_df = boot_df.copy()
     imp_df['abs_mean'] = imp_df['mean_phi'].abs()
-    top10 = imp_df.sort_values('abs_mean', ascending=False).head(10)
+    top10 = imp_df.nlargest(10, 'abs_mean')
     fig = px.bar(
         top10,
-        x='feature',
-        y='mean_phi',
-        error_y='std_phi',
+        x='feature', y='mean_phi', error_y='std_phi',
         labels={'mean_phi':'Mean SHAP'},
         title='Top 10 SHAP Feature Importances'
     )
     st.plotly_chart(fig, use_container_width=True)
 
-# --- Download Buttons ---
+# --- Downloads ---
 st.markdown("---")
 c1, c2, c3, c4 = st.columns(4)
-c1.download_button("Download SHAP CSV", shap_df.to_csv(index=False), 'shap_explanations.csv')
-c2.download_button("Download MGWR CSV", mgwr_df.to_csv(index=False), 'mgwr_coefficients.csv')
-c3.download_button("Download Bootstrap Stats", boot_df.to_csv(index=False), 'bootstrap_shap_stats.csv')
-c4.download_button("Download Fairness CSV", fair_df.to_csv(index=False), 'fairness_metrics.csv')
+c1.download_button("Download SHAP CSV",    shap_df.to_csv(index=False),    'shap_explanations.csv')
+c2.download_button("Download GeoShapley",  geoshap_df.to_csv(index=False), 'geoshapley_explanations.csv')
+c3.download_button("Download MGWR CSV",    mgwr_df.to_csv(index=False),    'mgwr_coefficients.csv')
+c4.download_button("Download Fairness CSV", fair_df.to_csv(index=False),   'fairness_metrics.csv')

From 341ec25572263ecff0113cae7c7502946d97cee7 Mon Sep 17 00:00:00 2001
From: Abhinav Shukla <67401627+maxprogrammer007@users.noreply.github.com>
Date: Mon, 5 May 2025 12:21:01 +0530
Subject: [PATCH 02/10] Create ci.yml

---
 .github/workflows/ci.yml | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 .github/workflows/ci.yml

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..b8e3f8e
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,26 @@
+name: CI
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+
+jobs:
+  lint-and-test:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+      - name: Install dependencies
+        run: |
+          pip install -r requirements.txt
+      - name: Lint with flake8
+        run: |
+          pip install flake8
+          flake8 src/ dashboard/
+      - name: Smoke-run Streamlit (health check)
+        run: |
+          streamlit run dashboard/app.py -- --headless --run-once || true

From 933feca5585706427cbe1e36466d5c28ecc425ac Mon Sep 17 00:00:00 2001
From: Abhinav Shukla <67401627+maxprogrammer007@users.noreply.github.com>
Date: Mon, 5 May 2025 12:21:07 +0530
Subject: [PATCH 03/10] Update app.py

---
 dashboard/app.py | 212 +++++++++++++++++++++++------------------------
 1 file changed, 103 insertions(+), 109 deletions(-)

diff --git a/dashboard/app.py b/dashboard/app.py
index db5df96..6d18135 100644
--- a/dashboard/app.py
+++ b/dashboard/app.py
@@ -1,3 +1,5 @@
+# dashboard/app.py
+
 import os
 import streamlit as st
 import pandas as pd
@@ -6,116 +8,112 @@
 import streamlit.components.v1 as components
 import folium
 
-# --- Configuration ---
-PROJECT_ROOT  = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir))
-DATA_DIR      = os.path.join(PROJECT_ROOT, 'data', 'processed')
-SHAPE_PATH    = os.path.join(PROJECT_ROOT, 'data', 'raw', 'shapefiles',
-                             'cb_2018_us_county_500k.shp')
+# ─── Paths & Config ─────────────────────────────────────────────────────────
+PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir))
+DATA_DIR     = os.path.join(PROJECT_ROOT, "data", "processed")
+SHAPE_PATH   = os.path.join(PROJECT_ROOT, "data", "raw", "shapefiles",
+                            "cb_2018_us_county_500k.shp")
 
-SHAP_CSV      = os.path.join(DATA_DIR, 'shap_explanations.csv')
-GEOSHAP_CSV   = os.path.join(DATA_DIR, 'geoshapley_explanations.csv')
-MGWR_CSV      = os.path.join(DATA_DIR, 'mgwr_coefficients.csv')
-BOOT_CSV      = os.path.join(DATA_DIR, 'bootstrap_shap_stats.csv')
-FAIR_CSV      = os.path.join(DATA_DIR, 'fairness_metrics.csv')
+SHAP_CSV     = os.path.join(DATA_DIR, "shap_explanations.csv")
+GEOSHAP_CSV  = os.path.join(DATA_DIR, "geoshapley_explanations.csv")
+MGWR_CSV     = os.path.join(DATA_DIR, "mgwr_coefficients.csv")
+BOOT_CSV     = os.path.join(DATA_DIR, "bootstrap_shap_stats.csv")
+FAIR_CSV     = os.path.join(DATA_DIR, "fairness_metrics.csv")
 
 SENSITIVE_ATTRS = ["pct_black", "pct_hisp", "median_income"]
 
-# Must be the first Streamlit call
 st.set_page_config(layout="wide", page_title="🗺️ Explainable GeoAI Dashboard")
 
-# --- Load Data ---
-@st.cache_data
+# ─── Sidebar Help ───────────────────────────────────────────────────────────
+st.sidebar.title("Controls")
+st.sidebar.markdown("""
+**Mode Descriptions**  
+- **SHAP:** Exact `phi_…` columns from your SHAP output + bootstrap uncertainty  
+- **GeoShapley:** Decomposed spatial–feature effects  
+- **MGWR/OLS:** Local regression coefficients  
+- **Fairness:** Residual-based fairness gaps  
+""")
+with st.expander("❓ How to use"):
+    st.write("""
+      1. Pick a **Mode**.  
+      2. Pick **View** (Point vs Uncertainty).  
+      3. For SHAP, choose exactly one `phi_…` column from your CSV.  
+      4. Hover on the map or download any CSV below.
+    """)
+
+# ─── Data Loader ─────────────────────────────────────────────────────────────
+@st.cache_data(ttl=86400)
 def load_data():
-    # Base GeoDataFrame
     gdf = gpd.read_file(SHAPE_PATH).to_crs("EPSG:4326")
-    gdf['GEOID'] = gdf['GEOID'].astype(str).str.zfill(5)
+    gdf["GEOID"] = gdf["GEOID"].astype(str).str.zfill(5)
 
-    # Tabular outputs
-    shap_df    = pd.read_csv(SHAP_CSV,    dtype={'GEOID': str})
-    geoshap_df = pd.read_csv(GEOSHAP_CSV, dtype={'GEOID': str})
-    mgwr_df    = pd.read_csv(MGWR_CSV,    dtype={'GEOID': str})
+    shap_df    = pd.read_csv(SHAP_CSV,    dtype={"GEOID": str})
+    geoshap_df = pd.read_csv(GEOSHAP_CSV, dtype={"GEOID": str})
+    mgwr_df    = pd.read_csv(MGWR_CSV,    dtype={"GEOID": str})
     boot_df    = pd.read_csv(BOOT_CSV)
-    fair_df    = pd.read_csv(FAIR_CSV,    dtype={'GEOID': str})
+    fair_df    = pd.read_csv(FAIR_CSV,    dtype={"GEOID": str})
 
-    # Merge all into the GeoDataFrame
-    df = (
+    merged = (
         gdf
-        .merge(shap_df,    on='GEOID', how='left')
-        .merge(geoshap_df, on='GEOID', how='left', suffixes=('_shap','_geoshap'))
-        .merge(mgwr_df,    on='GEOID', how='left', suffixes=('', '_mgwr'))
+        .merge(shap_df,    on="GEOID", how="left")
+        .merge(geoshap_df, on="GEOID", how="left", suffixes=("_shap","_geoshap"))
+        .merge(mgwr_df,    on="GEOID", how="left", suffixes=("", "_mgwr"))
     )
-    return df, shap_df, geoshap_df, mgwr_df, boot_df, fair_df
+    return merged, shap_df, geoshap_df, mgwr_df, boot_df, fair_df
 
 map_df, shap_df, geoshap_df, mgwr_df, boot_df, fair_df = load_data()
 
+# ─── Mode & View ─────────────────────────────────────────────────────────────
+mode = st.sidebar.radio("Select Mode:", ["SHAP", "GeoShapley", "MGWR/OLS", "Fairness"])
+view = st.sidebar.radio("View:", ["Point Estimate", "Uncertainty"])
 
-# --- Sidebar Controls ---
-st.sidebar.title("Controls")
-
-mode = st.sidebar.radio(
-    "Select Mode:",
-    ["SHAP", "GeoShapley", "MGWR/OLS", "Fairness"]
-)
-
-view = st.sidebar.radio(
-    "View:",
-    ["Point Estimate", "Uncertainty"]
-)
-
-# --- Determine which column to map ---
+# ─── Sidebar selectors & titles ──────────────────────────────────────────────
 if mode == "SHAP":
-    features = [c.replace('phi_', '') for c in shap_df.columns if c.startswith('phi_')]
-    feature  = st.sidebar.selectbox("SHAP Feature:", features)
-    col_point  = f"phi_{feature}"
-    col_uncert = 'std_phi'
-    title_point  = f"SHAP Attribution: {feature}"
-    title_uncert = f"SHAP Uncertainty (Std Dev): {feature}"
+    # list all existing phi_ columns
+    phi_cols = [c for c in shap_df.columns if c.startswith("phi_")]
+    feature  = st.sidebar.selectbox("SHAP Column:", sorted(phi_cols))
+    col_point   = feature
+    col_uncert  = "std_phi"
+    title_point = feature
+    title_unc   = f"Bootstrap std of {feature}"
 
 elif mode == "GeoShapley":
-    # GeoShapley components
-    base_opts = ["phi_GEO"]                            # intrinsic location
-    feat_opts = [c for c in geoshap_df.columns if c.startswith('phi_') and not c.startswith(('phi_int','phi_GEO'))]
-    int_opts  = [c for c in geoshap_df.columns if c.startswith('phi_int_')]
-    options   = ["phi_GEO"] + feat_opts + int_opts
-    comp      = st.sidebar.selectbox("GeoShapley Component:", options)
+    geosh_cols = [c for c in geoshap_df.columns if c.startswith("phi_")]
+    comp       = st.sidebar.selectbox("GeoShapley Column:", sorted(geosh_cols))
     col_point   = comp
     col_uncert  = None
-    title_point = comp.replace('phi_','').replace('_',' ').title()
-    title_uncert= ""
+    title_point = comp
+    title_unc   = ""
 
 elif mode == "MGWR/OLS":
-    features = [c for c in mgwr_df.columns if c != 'GEOID']
-    feature  = st.sidebar.selectbox("Coefficient:", features)
-    col_point   = feature
+    mgwr_cols = [c for c in mgwr_df.columns if c != "GEOID"]
+    coef      = st.sidebar.selectbox("MGWR/OLS Column:", sorted(mgwr_cols))
+    col_point   = coef
     col_uncert  = None
-    title_point = f"MGWR/OLS Coefficient: {feature}"
-    title_uncert= ""
+    title_point = coef
+    title_unc   = ""
 
 else:  # Fairness
-    labels = {"pct_black":"Black %","pct_hisp":"Hispanic %","median_income":"Median Income"}
-    attr    = st.sidebar.selectbox("Sensitive Attribute:", SENSITIVE_ATTRS,
-                                   format_func=lambda x: labels[x])
+    fair_labels = {"pct_black":"Black %","pct_hisp":"Hispanic %","median_income":"Median Income"}
+    attr      = st.sidebar.selectbox("Attribute:", SENSITIVE_ATTRS,
+                                     format_func=lambda x: fair_labels[x])
     col_point   = f"{attr}_fairness_score"
     col_uncert  = None
-    title_point = f"Fairness Score – {labels[attr]}"
-    title_uncert= ""
+    title_point = col_point
+    title_unc   = ""
 
-# Default to point estimate
+# ─── Handle SHAP Uncertainty ────────────────────────────────────────────────
 col_to_map, title = col_point, title_point
-
-# Handle SHAP uncertainty
 if mode=="SHAP" and view=="Uncertainty":
-    row = boot_df[boot_df['feature']==feature]
+    row = boot_df.loc[boot_df["feature"]==feature.removeprefix("phi_")]
     if not row.empty:
-        stdv = float(row['std_phi'])
-        map_df['uncertainty'] = stdv
-        col_to_map, title = 'uncertainty', title_uncert
+        map_df["uncertainty"] = float(row["std_phi"])
+        col_to_map, title = "uncertainty", title_unc
     else:
-        st.sidebar.warning("No bootstrap std available for this feature.")
+        st.sidebar.warning("No bootstrap std available.")
 
-# --- Render Map ---
+# ─── Render Map ──────────────────────────────────────────────────────────────
 st.subheader(title)
-
 plot_df = map_df.copy()
 if mode=="Fairness":
     plot_df = plot_df.merge(fair_df, on="GEOID", how="left")
@@ -123,50 +121,46 @@ def load_data():
 if col_to_map not in plot_df.columns:
     st.error(f"Column '{col_to_map}' not found. Available: {plot_df.columns.tolist()}")
 else:
-        m = folium.Map(location=[37.8, -96], zoom_start=4, tiles='cartodbpositron')
-
-        choropleth = folium.Choropleth(
-            geo_data=plot_df,
-            data=plot_df,
-            columns=["GEOID", col_to_map],
-            key_on="feature.properties.GEOID",
-            fill_color='YlGnBu' if mode!="Fairness" else 'RdYlBu_r',
-            fill_opacity=0.7,
-            line_opacity=0.2,
-            legend_name=title,
-            nan_fill_color="white"
-        ).add_to(m)
-
-    # Correctly attach tooltips to the GeoJson sub‐layer
-        choropleth.geojson.add_child(
-            folium.features.GeoJsonTooltip(
+    m = folium.Map(location=[37.8,-96], zoom_start=4, tiles="cartodbpositron")
+    chor = folium.Choropleth(
+        geo_data=plot_df,
+        data=plot_df,
+        columns=["GEOID", col_to_map],
+        key_on="feature.properties.GEOID",
+        fill_color=("YlGnBu" if mode!="Fairness" else "RdYlBu_r"),
+        fill_opacity=0.7,
+        line_opacity=0.2,
+        legend_name=title,
+        nan_fill_color="white"
+    ).add_to(m)
+
+    chor.geojson.add_child(
+        folium.features.GeoJsonTooltip(
             fields=["GEOID", col_to_map],
             aliases=["GEOID", title],
             localize=True
-            )
-         )
-
-        components.html(m._repr_html_(), height=550)
+        )
+    )
 
+    components.html(m._repr_html_(), height=550)
 
-# --- SHAP Feature Importance Bar (SHAP only) ---
+# ─── SHAP Global Importance ─────────────────────────────────────────────────
 if mode=="SHAP" and view=="Point Estimate":
-    st.subheader("Global SHAP Feature Importance")
-    imp_df = boot_df.copy()
-    imp_df['abs_mean'] = imp_df['mean_phi'].abs()
-    top10 = imp_df.nlargest(10, 'abs_mean')
+    st.subheader("Global SHAP Importance")
+    imp = boot_df.copy()
+    imp["abs_mean"] = imp["mean_phi"].abs()
+    top10 = imp.nlargest(10, "abs_mean")
     fig = px.bar(
-        top10,
-        x='feature', y='mean_phi', error_y='std_phi',
-        labels={'mean_phi':'Mean SHAP'},
-        title='Top 10 SHAP Feature Importances'
+        top10, x="feature", y="mean_phi", error_y="std_phi",
+        labels={"mean_phi":"Mean SHAP"},
+        title="Top 10 SHAP Features"
     )
     st.plotly_chart(fig, use_container_width=True)
 
-# --- Downloads ---
+# ─── Downloads ───────────────────────────────────────────────────────────────
 st.markdown("---")
 c1, c2, c3, c4 = st.columns(4)
-c1.download_button("Download SHAP CSV",    shap_df.to_csv(index=False),    'shap_explanations.csv')
-c2.download_button("Download GeoShapley",  geoshap_df.to_csv(index=False), 'geoshapley_explanations.csv')
-c3.download_button("Download MGWR CSV",    mgwr_df.to_csv(index=False),    'mgwr_coefficients.csv')
-c4.download_button("Download Fairness CSV", fair_df.to_csv(index=False),   'fairness_metrics.csv')
+c1.download_button("Download SHAP",       shap_df.to_csv(index=False),    "shap_explanations.csv")
+c2.download_button("Download GeoShapley", geoshap_df.to_csv(index=False), "geoshapley_explanations.csv")
+c3.download_button("Download MGWR",       mgwr_df.to_csv(index=False),    "mgwr_coefficients.csv")
+c4.download_button("Download Fairness",   fair_df.to_csv(index=False),    "fairness_metrics.csv")

From d0ca72cd3de78e68c843ca709ce2205f52e82472 Mon Sep 17 00:00:00 2001
From: Abhinav Shukla <67401627+maxprogrammer007@users.noreply.github.com>
Date: Mon, 5 May 2025 12:21:08 +0530
Subject: [PATCH 04/10] Create dockerfile

---
 dockerfile | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 dockerfile

diff --git a/dockerfile b/dockerfile
new file mode 100644
index 0000000..f6f8067
--- /dev/null
+++ b/dockerfile
@@ -0,0 +1,13 @@
+# Dockerfile
+FROM python:3.12-slim
+
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy the rest of your repo
+COPY . .
+
+# Tell Streamlit to run your dashboard
+ENV STREAMLIT_SERVER_HEADLESS=true
+ENTRYPOINT ["streamlit", "run", "dashboard/app.py"]

From a75da43eabed0493dc4510ef98e2e7437e5ec897 Mon Sep 17 00:00:00 2001
From: Abhinav Shukla <67401627+maxprogrammer007@users.noreply.github.com>
Date: Mon, 5 May 2025 12:21:09 +0530
Subject: [PATCH 05/10] Update README.md

---
 README.md | 142 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 142 insertions(+)

diff --git a/README.md b/README.md
index e69de29..9596857 100644
--- a/README.md
+++ b/README.md
@@ -0,0 +1,142 @@
+
+# Explainable GeoAI: Interpreting Socio-Spatial Patterns
+
+An end-to-end spatial XAI pipeline combining XGBoost, SHAP, GeoShapley, and MGWR to uncover and visualize interpretable spatial effects in U.S. county-level voting data.
+
+---
+
+## 📂 Repository Structure
+
+```
+
+explainable-geoai/
+├── data/
+│   ├── raw/
+│   │   ├── census/                   # raw ACS downloads
+│   │   ├── shapefiles/              # county geometries
+│   │   └── voting\_2021.csv          # raw vote share
+│   └── processed/
+│       ├── voting\_clean.csv         # cleaned tabular data
+│       ├── voting\_features.csv      # with engineered features & spatial lags
+│       ├── xgb\_automl\_model.pkl     # trained FLAML+XGBoost model
+│       ├── shap\_explanations.csv    # SHAP outputs
+│       ├── geoshapley\_explanations.csv # GeoShapley outputs
+│       ├── mgwr\_coefficients.csv    # MGWR baseline
+│       ├── bootstrap\_shap\_stats.csv # SHAP uncertainty stats
+│       └── fairness\_metrics.csv     # spatial fairness gaps
+├── src/
+│   ├── data\_loader.py               # load & clean
+│   ├── feature\_engineering.py       # spatial lags, GeoDataFrame
+│   ├── model\_training.py            # FLAML + XGBoost training
+│   ├── shap\_explainer.py            # Kernel SHAP wrapper
+│   ├── geoshapley\_explainer.py      # GeoShapley computations
+│   ├── mgwr\_comparison.py           # MGWR baseline scripts
+│   ├── bootstrap\_uncertainty.py     # bootstrap SHAP stats
+│   ├── spatial\_fairness.py          # compute residual‐fairness
+│   └── config.py                    # paths & constants
+├── dashboard/
+│   └── app.py                       # Streamlit + Folium dashboard
+├── docs/
+│   ├── implementation\_notes.md      # detailed pipeline doc
+│   └── paper\_summary.pdf            # summary of Li (2025) chapter
+├── README.md                        # this file
+└── requirements.txt                 # pip dependencies
+
+````
+
+---
+
+## ⚙️ Installation
+
+1. **Clone repo**  
+   ```bash
+   git clone https://github.com/yourusername/explainable-geoai.git
+   cd explainable-geoai
+````
+
+2. **Create & activate** a virtual environment
+
+   ```bash
+   python3 -m venv venv
+   source venv/bin/activate   # macOS/Linux
+   venv\Scripts\activate      # Windows
+   ```
+
+3. **Install dependencies**
+
+   ```bash
+   pip install -r requirements.txt
+   ```
+
+4. **Download raw data**
+
+   * Place `voting_2021.csv` in `data/raw/`
+   * Download ACS and shapefiles via `src/download_census.py` or manually.
+
+---
+
+## 🚀 Quick Start
+
+1. **Data & features**
+
+   ```bash
+   python src/data_loader.py
+   python src/feature_engineering.py
+   ```
+
+2. **Train model**
+
+   ```bash
+   python src/model_training.py
+   ```
+
+3. **Generate explanations**
+
+   ```bash
+   python src/shap_explainer.py
+   python src/geoshapley_explainer.py
+   python src/mgwr_comparison.py
+   python src/bootstrap_uncertainty.py
+   python src/spatial_fairness.py
+   ```
+
+4. **Launch dashboard**
+
+   ```bash
+   cd dashboard
+   streamlit run app.py
+   ```
+
+---
+
+## 📝 Scripts & Modules
+
+* **`data_loader.py`**: cleans raw vote + ACS, saves `voting_clean.csv`.
+* **`feature_engineering.py`**: builds spatial lags, exports `voting_features.csv`.
+* **`model_training.py`**: uses FLAML to find best XGBoost; saves model.
+* **`shap_explainer.py`**: Kernel SHAP over FLAML model → `shap_explanations.csv`.
+* **`geoshapley_explainer.py`**: computes GeoShapley components → `geoshapley_explanations.csv`.
+* **`mgwr_comparison.py`**: fits MGWR baseline → `mgwr_coefficients.csv`.
+* **`bootstrap_uncertainty.py`**: bootstraps SHAP → `bootstrap_shap_stats.csv`.
+* **`spatial_fairness.py`**: calculates fairness gaps → `fairness_metrics.csv`.
+* **`dashboard/app.py`**: interactive Streamlit + Folium map.
+
+---
+
+## 📊 Dashboard Overview
+
+* **SHAP**: county-level attributions, with uncertainty.
+* **GeoShapley**: decomposed intrinsic (GEO), main, and interaction effects.
+* **MGWR/OLS**: local regression coefficients for comparison.
+* **Fairness**: residual differences across demographic groups.
+* **Download** any CSV for offline analysis.
+
+---
+
+## 🧾 Citing
+
+If you use this work, please cite:
+
+> Li, Ziqi (2025). *Explainable AI in Spatial Analysis*. In:
+> *Advances in Spatial Data Science*, Springer.
+

From 10f1749285ded16ce1b6d8c4fc2151ff499ec142 Mon Sep 17 00:00:00 2001
From: Abhinav Shukla <67401627+maxprogrammer007@users.noreply.github.com>
Date: Mon, 5 May 2025 12:21:10 +0530
Subject: [PATCH 06/10] Create requirements.txt

---
 requirements.txt | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 requirements.txt

diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..da876da
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,13 @@
+# requirements.txt
+streamlit==1.25.0
+geopandas==0.13.0
+pandas==2.1.0
+plotly==5.17.0
+folium==0.14.0
+branca==0.8.1
+geoshapley==0.1.2
+xgboost==1.7.6
+flaml==1.1.3
+mgwr==2.5.2
+numpy==1.24.4
+scikit-learn==1.5.0

From 9ff8ff77804b69e5f19ed8cb12147f769b1c90db Mon Sep 17 00:00:00 2001
From: Abhinav Shukla <67401627+maxprogrammer007@users.noreply.github.com>
Date: Mon, 5 May 2025 12:23:10 +0530
Subject: [PATCH 07/10] Update README.md

---
 README.md | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 9596857..7b73afc 100644
--- a/README.md
+++ b/README.md
@@ -44,8 +44,6 @@ explainable-geoai/
 
 ````
 
----
-
 ## ⚙️ Installation
 
 1. **Clone repo**  
@@ -131,7 +129,7 @@ explainable-geoai/
 * **Fairness**: residual differences across demographic groups.
 * **Download** any CSV for offline analysis.
 
----
+
 
 ## 🧾 Citing
 

From e678fd776209db88add12b5b628ce3c012fc0478 Mon Sep 17 00:00:00 2001
From: Abhinav Shukla <67401627+maxprogrammer007@users.noreply.github.com>
Date: Mon, 5 May 2025 12:24:04 +0530
Subject: [PATCH 08/10] Update README.md

---
 README.md | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 7b73afc..d73b0bc 100644
--- a/README.md
+++ b/README.md
@@ -71,7 +71,7 @@ explainable-geoai/
    * Place `voting_2021.csv` in `data/raw/`
    * Download ACS and shapefiles via `src/download_census.py` or manually.
 
----
+
 
 ## 🚀 Quick Start
 
@@ -105,7 +105,7 @@ explainable-geoai/
    streamlit run app.py
    ```
 
----
+
 
 ## 📝 Scripts & Modules
 
@@ -119,7 +119,6 @@ explainable-geoai/
 * **`spatial_fairness.py`**: calculates fairness gaps → `fairness_metrics.csv`.
 * **`dashboard/app.py`**: interactive Streamlit + Folium map.
 
----
 
 ## 📊 Dashboard Overview
 

From 6e0c9916024e22613d9a886e1f678ea18dc8233b Mon Sep 17 00:00:00 2001
From: Abhinav Shukla <67401627+maxprogrammer007@users.noreply.github.com>
Date: Mon, 5 May 2025 12:25:02 +0530
Subject: [PATCH 09/10] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index d73b0bc..2f64398 100644
--- a/README.md
+++ b/README.md
@@ -42,7 +42,7 @@ explainable-geoai/
 ├── README.md                        # this file
 └── requirements.txt                 # pip dependencies
 
-````
+```
 
 ## ⚙️ Installation
 

From ce32b54474db3b98fd9dfbf351403bf3253518bd Mon Sep 17 00:00:00 2001
From: Abhinav Shukla <67401627+maxprogrammer007@users.noreply.github.com>
Date: Mon, 5 May 2025 12:25:38 +0530
Subject: [PATCH 10/10] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 2f64398..10ce911 100644
--- a/README.md
+++ b/README.md
@@ -50,7 +50,7 @@ explainable-geoai/
    ```bash
    git clone https://github.com/yourusername/explainable-geoai.git
    cd explainable-geoai
-````
+```
 
 2. **Create & activate** a virtual environment