diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..b8e3f8e
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,26 @@
+name: CI
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+
+jobs:
+  lint-and-test:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+      - name: Install dependencies
+        run: |
+          pip install -r requirements.txt
+      - name: Lint with flake8
+        run: |
+          pip install flake8
+          flake8 src/ dashboard/
+      - name: Smoke-run Streamlit (health check)
+        run: |
+          streamlit run dashboard/app.py -- --headless --run-once || true
diff --git a/README.md b/README.md
index e69de29..10ce911 100644
--- a/README.md
+++ b/README.md
@@ -0,0 +1,139 @@
+
+# Explainable GeoAI: Interpreting Socio-Spatial Patterns
+
+An end-to-end spatial XAI pipeline combining XGBoost, SHAP, GeoShapley, and MGWR to uncover and visualize interpretable spatial effects in U.S. county-level voting data.
+
+---
+
+## 📂 Repository Structure
+
+```
+
+explainable-geoai/
+├── data/
+│   ├── raw/
+│   │   ├── census/                   # raw ACS downloads
+│   │   ├── shapefiles/              # county geometries
+│   │   └── voting\_2021.csv          # raw vote share
+│   └── processed/
+│       ├── voting\_clean.csv         # cleaned tabular data
+│       ├── voting\_features.csv      # with engineered features & spatial lags
+│       ├── xgb\_automl\_model.pkl     # trained FLAML+XGBoost model
+│       ├── shap\_explanations.csv    # SHAP outputs
+│       ├── geoshapley\_explanations.csv # GeoShapley outputs
+│       ├── mgwr\_coefficients.csv    # MGWR baseline
+│       ├── bootstrap\_shap\_stats.csv # SHAP uncertainty stats
+│       └── fairness\_metrics.csv     # spatial fairness gaps
+├── src/
+│   ├── data\_loader.py               # load & clean
+│   ├── feature\_engineering.py       # spatial lags, GeoDataFrame
+│   ├── model\_training.py            # FLAML + XGBoost training
+│   ├── shap\_explainer.py            # Kernel SHAP wrapper
+│   ├── geoshapley\_explainer.py      # GeoShapley computations
+│   ├── mgwr\_comparison.py           # MGWR baseline scripts
+│   ├── bootstrap\_uncertainty.py     # bootstrap SHAP stats
+│   ├── spatial\_fairness.py          # compute residual‐fairness
+│   └── config.py                    # paths & constants
+├── dashboard/
+│   └── app.py                       # Streamlit + Folium dashboard
+├── docs/
+│   ├── implementation\_notes.md      # detailed pipeline doc
+│   └── paper\_summary.pdf            # summary of Li (2025) chapter
+├── README.md                        # this file
+└── requirements.txt                 # pip dependencies
+
+```
+
+## ⚙️ Installation
+
+1. **Clone repo**  
+   ```bash
+   git clone https://github.com/yourusername/explainable-geoai.git
+   cd explainable-geoai
+```
+
+2. **Create & activate** a virtual environment
+
+   ```bash
+   python3 -m venv venv
+   source venv/bin/activate   # macOS/Linux
+   venv\Scripts\activate      # Windows
+   ```
+
+3. **Install dependencies**
+
+   ```bash
+   pip install -r requirements.txt
+   ```
+
+4. **Download raw data**
+
+   * Place `voting_2021.csv` in `data/raw/`
+   * Download ACS and shapefiles via `src/download_census.py` or manually.
+
+
+
+## 🚀 Quick Start
+
+1. **Data & features**
+
+   ```bash
+   python src/data_loader.py
+   python src/feature_engineering.py
+   ```
+
+2. **Train model**
+
+   ```bash
+   python src/model_training.py
+   ```
+
+3. **Generate explanations**
+
+   ```bash
+   python src/shap_explainer.py
+   python src/geoshapley_explainer.py
+   python src/mgwr_comparison.py
+   python src/bootstrap_uncertainty.py
+   python src/spatial_fairness.py
+   ```
+
+4. **Launch dashboard**
+
+   ```bash
+   cd dashboard
+   streamlit run app.py
+   ```
+
+
+
+## 📝 Scripts & Modules
+
+* **`data_loader.py`**: cleans raw vote + ACS, saves `voting_clean.csv`.
+* **`feature_engineering.py`**: builds spatial lags, exports `voting_features.csv`.
+* **`model_training.py`**: uses FLAML to find best XGBoost; saves model.
+* **`shap_explainer.py`**: Kernel SHAP over FLAML model → `shap_explanations.csv`.
+* **`geoshapley_explainer.py`**: computes GeoShapley components → `geoshapley_explanations.csv`.
+* **`mgwr_comparison.py`**: fits MGWR baseline → `mgwr_coefficients.csv`.
+* **`bootstrap_uncertainty.py`**: bootstraps SHAP → `bootstrap_shap_stats.csv`.
+* **`spatial_fairness.py`**: calculates fairness gaps → `fairness_metrics.csv`.
+* **`dashboard/app.py`**: interactive Streamlit + Folium map.
+
+
+## 📊 Dashboard Overview
+
+* **SHAP**: county-level attributions, with uncertainty.
+* **GeoShapley**: decomposed intrinsic (GEO), main, and interaction effects.
+* **MGWR/OLS**: local regression coefficients for comparison.
+* **Fairness**: residual differences across demographic groups.
+* **Download** any CSV for offline analysis.
+
+
+
+## 🧾 Citing
+
+If you use this work, please cite:
+
+> Li, Ziqi (2025). *Explainable AI in Spatial Analysis*. In:
+> *Advances in Spatial Data Science*, Springer.
+
diff --git a/dashboard/app.py b/dashboard/app.py
index 116ec46..6d18135 100644
--- a/dashboard/app.py
+++ b/dashboard/app.py
@@ -1,3 +1,5 @@
+# dashboard/app.py
+
 import os
 import streamlit as st
 import pandas as pd
@@ -6,134 +8,133 @@
 import streamlit.components.v1 as components
 import folium
 
-# --- Configuration ---
-PROJECT_ROOT  = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir))
-DATA_DIR      = os.path.join(PROJECT_ROOT, 'data', 'processed')
-SHAPE_PATH    = os.path.join(PROJECT_ROOT, 'data', 'raw', 'shapefiles', 'cb_2018_us_county_500k.shp')
+# ─── Paths & Config ─────────────────────────────────────────────────────────
+PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir))
+DATA_DIR     = os.path.join(PROJECT_ROOT, "data", "processed")
+SHAPE_PATH   = os.path.join(PROJECT_ROOT, "data", "raw", "shapefiles",
+                            "cb_2018_us_county_500k.shp")
 
-SHAP_CSV      = os.path.join(DATA_DIR, 'shap_explanations.csv')
-MGWR_CSV      = os.path.join(DATA_DIR, 'mgwr_coefficients.csv')
-BOOT_CSV      = os.path.join(DATA_DIR, 'bootstrap_shap_stats.csv')
-FAIR_CSV      = os.path.join(DATA_DIR, 'fairness_metrics.csv')
+SHAP_CSV     = os.path.join(DATA_DIR, "shap_explanations.csv")
+GEOSHAP_CSV  = os.path.join(DATA_DIR, "geoshapley_explanations.csv")
+MGWR_CSV     = os.path.join(DATA_DIR, "mgwr_coefficients.csv")
+BOOT_CSV     = os.path.join(DATA_DIR, "bootstrap_shap_stats.csv")
+FAIR_CSV     = os.path.join(DATA_DIR, "fairness_metrics.csv")
 
 SENSITIVE_ATTRS = ["pct_black", "pct_hisp", "median_income"]
 
-# Must be first Streamlit call
 st.set_page_config(layout="wide", page_title="🗺️ Explainable GeoAI Dashboard")
 
-# --- Load Data ---
-@st.cache_data
+# ─── Sidebar Help ───────────────────────────────────────────────────────────
+st.sidebar.title("Controls")
+st.sidebar.markdown("""
+**Mode Descriptions**  
+- **SHAP:** Exact `phi_…` columns from your SHAP output + bootstrap uncertainty  
+- **GeoShapley:** Decomposed spatial–feature effects  
+- **MGWR/OLS:** Local regression coefficients  
+- **Fairness:** Residual-based fairness gaps  
+""")
+with st.expander("❓ How to use"):
+    st.write("""
+      1. Pick a **Mode**.  
+      2. Pick **View** (Point vs Uncertainty).  
+      3. For SHAP, choose exactly one `phi_…` column from your CSV.  
+      4. Hover on the map or download any CSV below.
+    """)
+
+# ─── Data Loader ─────────────────────────────────────────────────────────────
+@st.cache_data(ttl=86400)
 def load_data():
     gdf = gpd.read_file(SHAPE_PATH).to_crs("EPSG:4326")
-    gdf['GEOID'] = gdf['GEOID'].astype(str).str.zfill(5)
-
-    shap_df = pd.read_csv(SHAP_CSV, dtype={'GEOID': str})
-    mgwr_df = pd.read_csv(MGWR_CSV, dtype={'GEOID': str})
-    boot_df = pd.read_csv(BOOT_CSV)
-    fair_df = pd.read_csv(FAIR_CSV, dtype={'GEOID': str})
-
-    # Merge shap & mgwr into GeoDataFrame
-    map_df = gdf.merge(shap_df, on='GEOID', how='left')
-    map_df = map_df.merge(mgwr_df, on='GEOID', how='left', suffixes=('_shap','_mgwr'))
-
-    return map_df, shap_df, mgwr_df, boot_df, fair_df
-
-map_df, shap_df, mgwr_df, boot_df, fair_df = load_data()
-
-# --- Sidebar Controls ---
-st.sidebar.title("Controls")
+    gdf["GEOID"] = gdf["GEOID"].astype(str).str.zfill(5)
+
+    shap_df    = pd.read_csv(SHAP_CSV,    dtype={"GEOID": str})
+    geoshap_df = pd.read_csv(GEOSHAP_CSV, dtype={"GEOID": str})
+    mgwr_df    = pd.read_csv(MGWR_CSV,    dtype={"GEOID": str})
+    boot_df    = pd.read_csv(BOOT_CSV)
+    fair_df    = pd.read_csv(FAIR_CSV,    dtype={"GEOID": str})
+
+    merged = (
+        gdf
+        .merge(shap_df,    on="GEOID", how="left")
+        .merge(geoshap_df, on="GEOID", how="left", suffixes=("_shap","_geoshap"))
+        .merge(mgwr_df,    on="GEOID", how="left", suffixes=("", "_mgwr"))
+    )
+    return merged, shap_df, geoshap_df, mgwr_df, boot_df, fair_df
 
-mode = st.sidebar.radio(
-    "Select Mode:",
-    ["SHAP", "MGWR/OLS", "Fairness"]
-)
+map_df, shap_df, geoshap_df, mgwr_df, boot_df, fair_df = load_data()
 
-view = st.sidebar.radio(
-    "View:",
-    ["Point Estimate", "Uncertainty"]
-)
+# ─── Mode & View ─────────────────────────────────────────────────────────────
+mode = st.sidebar.radio("Select Mode:", ["SHAP", "GeoShapley", "MGWR/OLS", "Fairness"])
+view = st.sidebar.radio("View:", ["Point Estimate", "Uncertainty"])
 
-# Determine feature & columns
+# ─── Sidebar selectors & titles ──────────────────────────────────────────────
 if mode == "SHAP":
-    features = [c.replace('phi_', '') for c in shap_df.columns if c.startswith('phi_')]
-    feature = st.sidebar.selectbox("Feature:", features)
-    col_point = f"phi_{feature}"
-    col_uncert = 'std_phi'
-    title_point  = f"SHAP Attribution: {feature}"
-    title_uncert = f"SHAP Uncertainty (Std Dev): {feature}"
+    # list all existing phi_ columns
+    phi_cols = [c for c in shap_df.columns if c.startswith("phi_")]
+    feature  = st.sidebar.selectbox("SHAP Column:", sorted(phi_cols))
+    col_point   = feature
+    col_uncert  = "std_phi"
+    title_point = feature
+    title_unc   = f"Bootstrap std of {feature}"
+
+elif mode == "GeoShapley":
+    geosh_cols = [c for c in geoshap_df.columns if c.startswith("phi_")]
+    comp       = st.sidebar.selectbox("GeoShapley Column:", sorted(geosh_cols))
+    col_point   = comp
+    col_uncert  = None
+    title_point = comp
+    title_unc   = ""
 
 elif mode == "MGWR/OLS":
-    features = [c for c in mgwr_df.columns if c != 'GEOID']
-    feature = st.sidebar.selectbox("Coefficient:", features)
-    col_point  = feature
-    col_uncert = None
-    title_point  = f"MGWR/OLS Coefficient: {feature}"
-    title_uncert = ""
+    mgwr_cols = [c for c in mgwr_df.columns if c != "GEOID"]
+    coef      = st.sidebar.selectbox("MGWR/OLS Column:", sorted(mgwr_cols))
+    col_point   = coef
+    col_uncert  = None
+    title_point = coef
+    title_unc   = ""
 
 else:  # Fairness
-    fair_labels = {
-        "pct_black":"Black %",
-        "pct_hisp":"Hispanic %",
-        "median_income":"Median Income"
-    }
-    attr = st.sidebar.selectbox(
-        "Sensitive Attribute:",
-        SENSITIVE_ATTRS,
-        format_func=lambda x: fair_labels[x]
-    )
-    feature = attr
-    col_point  = f"{attr}_fairness_score"
-    col_uncert = None
-    title_point  = f"Fairness Score – {fair_labels[attr]}"
-    title_uncert = ""
-
-# Default to point estimate
-col_to_map = col_point
-title = title_point
-
-# Handle SHAP uncertainty view
-if mode == "SHAP" and view == "Uncertainty":
-    # Get std_phi for selected feature
-    std_row = boot_df.loc[boot_df["feature"] == feature]
-    if not std_row.empty:
-        std_val = float(std_row["std_phi"])
-        # inject into map_df copy
-        map_df["uncertainty"] = std_val
-        col_to_map = "uncertainty"
-        title = title_uncert
+    fair_labels = {"pct_black":"Black %","pct_hisp":"Hispanic %","median_income":"Median Income"}
+    attr      = st.sidebar.selectbox("Attribute:", SENSITIVE_ATTRS,
+                                     format_func=lambda x: fair_labels[x])
+    col_point   = f"{attr}_fairness_score"
+    col_uncert  = None
+    title_point = col_point
+    title_unc   = ""
+
+# ─── Handle SHAP Uncertainty ────────────────────────────────────────────────
+col_to_map, title = col_point, title_point
+if mode=="SHAP" and view=="Uncertainty":
+    row = boot_df.loc[boot_df["feature"]==feature.removeprefix("phi_")]
+    if not row.empty:
+        map_df["uncertainty"] = float(row["std_phi"])
+        col_to_map, title = "uncertainty", title_unc
     else:
-        st.sidebar.warning("No bootstrap std available for this feature.")
-        col_to_map = col_point
-        title = title_point
+        st.sidebar.warning("No bootstrap std available.")
 
-# --- Build Map ---
+# ─── Render Map ──────────────────────────────────────────────────────────────
 st.subheader(title)
-
-# Prepare DataFrame to map
 plot_df = map_df.copy()
-if mode == "Fairness":
+if mode=="Fairness":
     plot_df = plot_df.merge(fair_df, on="GEOID", how="left")
 
-# Verify column exists
 if col_to_map not in plot_df.columns:
-    st.error(f"🛑 Column '{col_to_map}' not found. Available columns: {plot_df.columns.tolist()}")
+    st.error(f"Column '{col_to_map}' not found. Available: {plot_df.columns.tolist()}")
 else:
-    m = folium.Map(location=[37.8, -96], zoom_start=4, tiles='cartodbpositron')
-
-    choropleth = folium.Choropleth(
+    m = folium.Map(location=[37.8,-96], zoom_start=4, tiles="cartodbpositron")
+    chor = folium.Choropleth(
         geo_data=plot_df,
         data=plot_df,
         columns=["GEOID", col_to_map],
         key_on="feature.properties.GEOID",
-        fill_color='YlGnBu' if mode!="Fairness" else 'RdYlBu_r',
+        fill_color=("YlGnBu" if mode!="Fairness" else "RdYlBu_r"),
         fill_opacity=0.7,
         line_opacity=0.2,
         legend_name=title,
         nan_fill_color="white"
     ).add_to(m)
 
-    # Add tooltip
-    choropleth.geojson.add_child(
+    chor.geojson.add_child(
         folium.features.GeoJsonTooltip(
             fields=["GEOID", col_to_map],
             aliases=["GEOID", title],
@@ -141,30 +142,25 @@ def load_data():
         )
     )
 
-    map_html = m._repr_html_()
-    components.html(map_html, height=500, scrolling=True)
-
+    components.html(m._repr_html_(), height=550)
 
-# --- SHAP Feature Importance ---
-if mode == "SHAP" and view == "Point Estimate":
-    st.subheader("Global SHAP Feature Importance")
-    imp_df = boot_df.copy()
-    imp_df['abs_mean'] = imp_df['mean_phi'].abs()
-    top10 = imp_df.sort_values('abs_mean', ascending=False).head(10)
+# ─── SHAP Global Importance ─────────────────────────────────────────────────
+if mode=="SHAP" and view=="Point Estimate":
+    st.subheader("Global SHAP Importance")
+    imp = boot_df.copy()
+    imp["abs_mean"] = imp["mean_phi"].abs()
+    top10 = imp.nlargest(10, "abs_mean")
     fig = px.bar(
-        top10,
-        x='feature',
-        y='mean_phi',
-        error_y='std_phi',
-        labels={'mean_phi':'Mean SHAP'},
-        title='Top 10 SHAP Feature Importances'
+        top10, x="feature", y="mean_phi", error_y="std_phi",
+        labels={"mean_phi":"Mean SHAP"},
+        title="Top 10 SHAP Features"
     )
     st.plotly_chart(fig, use_container_width=True)
 
-# --- Download Buttons ---
+# ─── Downloads ───────────────────────────────────────────────────────────────
 st.markdown("---")
 c1, c2, c3, c4 = st.columns(4)
-c1.download_button("Download SHAP CSV", shap_df.to_csv(index=False), 'shap_explanations.csv')
-c2.download_button("Download MGWR CSV", mgwr_df.to_csv(index=False), 'mgwr_coefficients.csv')
-c3.download_button("Download Bootstrap Stats", boot_df.to_csv(index=False), 'bootstrap_shap_stats.csv')
-c4.download_button("Download Fairness CSV", fair_df.to_csv(index=False), 'fairness_metrics.csv')
+c1.download_button("Download SHAP",       shap_df.to_csv(index=False),    "shap_explanations.csv")
+c2.download_button("Download GeoShapley", geoshap_df.to_csv(index=False), "geoshapley_explanations.csv")
+c3.download_button("Download MGWR",       mgwr_df.to_csv(index=False),    "mgwr_coefficients.csv")
+c4.download_button("Download Fairness",   fair_df.to_csv(index=False),    "fairness_metrics.csv")
diff --git a/dockerfile b/dockerfile
new file mode 100644
index 0000000..f6f8067
--- /dev/null
+++ b/dockerfile
@@ -0,0 +1,13 @@
+# Dockerfile
+FROM python:3.12-slim
+
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy the rest of your repo
+COPY . .
+
+# Tell Streamlit to run your dashboard
+ENV STREAMLIT_SERVER_HEADLESS=true
+ENTRYPOINT ["streamlit", "run", "dashboard/app.py"]
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..da876da
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,13 @@
+# requirements.txt
+streamlit==1.25.0
+geopandas==0.13.0
+pandas==2.1.0
+plotly==5.17.0
+folium==0.14.0
+branca==0.8.1
+geoshapley==0.1.2
+xgboost==1.7.6
+flaml==1.1.3
+mgwr==2.5.2
+numpy==1.24.4
+scikit-learn==1.5.0