From 564c4aaa5ac0f6267551c8c16664a9e91a3b417f Mon Sep 17 00:00:00 2001
From: daryaguettler <daryag@mit.edu>
Date: Wed, 11 Mar 2026 11:33:23 -0400
Subject: [PATCH 1/3] update docs

---
 .../run-simulations/inputs_and_outputs.md     | 644 ++++++++++++++++++
 .../run-simulations/simulate_building.md      |  37 +-
 .../run-simulations/simulation_tasking.md     |  95 ++-
 docs/tutorials/visualization/visualization.md | 241 +++++++
 mkdocs.yml                                    |   3 +
 5 files changed, 974 insertions(+), 46 deletions(-)
 create mode 100644 docs/tutorials/run-simulations/inputs_and_outputs.md
 create mode 100644 docs/tutorials/visualization/visualization.md

diff --git a/docs/tutorials/run-simulations/inputs_and_outputs.md b/docs/tutorials/run-simulations/inputs_and_outputs.md
new file mode 100644
index 0000000..af02938
--- /dev/null
+++ b/docs/tutorials/run-simulations/inputs_and_outputs.md
@@ -0,0 +1,644 @@
+## Simulation inputs and outputs
+
+This page documents every input file required to run globi simulations (both single-building and batch/manifest), how to populate them, and the format of the output files produced.
+
+---
+
+### Input files overview
+
+The table below summarizes all input files. Which files you need depends on whether you are running a single building or a batch via manifest.
+
+| File                     | Single building | Batch (manifest) | Description                                        |
+| ------------------------ | :-------------: | :--------------: | -------------------------------------------------- |
+| `building.yml`           |    required     |        --        | single-building specification                      |
+| `manifest.yml`           |       --        |     required     | experiment specification                           |
+| `artifacts.yml`          |       --        |     required     | file paths for GIS, DB, weather, etc.              |
+| `semantic-fields.yml`    |    required     |     required     | semantic field definitions and GIS column mappings |
+| `component-map.yml`      |    required     |     required     | maps semantic fields to component selection rules  |
+| `components-lib.db`      |    required     |     required     | SQLite component database                          |
+| `buildings.parquet`      |       --        |     required     | GIS building footprints (GeoDataFrame)             |
+| `gis-preprocessor.yml`   |       --        |     optional     | geometry validation and defaults                   |
+| `hourly-data-config.yml` |       --        |     optional     | hourly output variable configuration               |
+| `overheating-config.yml` |       --        |     optional     | overheating analysis thresholds                    |
+| EPW weather file         |    required     |     required     | EnergyPlus weather data (URL or local path)        |
+
+---
+
+### Input file details
+
+#### `building.yml` -- single building specification
+
+Used by `make cli-native simulate` (or `make cli simulate`). Defines a single building's geometry, envelope, and the semantic context used to look up components.
+
+**Required fields**:
+
+| Field                    | Type        | Description                                                   |
+| ------------------------ | ----------- | ------------------------------------------------------------- |
+| `db_file`                | path        | path to the component database (SQLite)                       |
+| `semantic_fields_file`   | path        | path to the semantic fields config                            |
+| `component_map_file`     | path        | path to the component map config                              |
+| `epwzip_file`            | path or URL | EPW weather file                                              |
+| `semantic_field_context` | dict        | key-value pairs matching semantic field names to their values |
+
+**Optional fields**:
+
+| Field                   | Type  | Default  | Constraints | Description                         |
+| ----------------------- | ----- | -------- | ----------- | ----------------------------------- |
+| `length`                | float | 15.0     | >= 3.0      | long edge of the building [m]       |
+| `width`                 | float | 15.0     | >= 3.0      | short edge of the building [m]      |
+| `num_floors`            | int   | 2        | >= 1        | number of floors                    |
+| `f2f_height`            | float | 3.0      | >= 0        | floor-to-floor height [m]           |
+| `wwr`                   | float | 0.2      | 0.0 -- 1.0  | window-to-wall ratio                |
+| `basement`              | str   | `"none"` | see below   | basement type                       |
+| `attic`                 | str   | `"none"` | see below   | attic type                          |
+| `exposed_basement_frac` | float | 0.25     | 0.0 -- 1.0  | fraction of basement exposed to air |
+
+Valid values for `basement` and `attic`: `"none"`, `"unoccupied_unconditioned"`, `"unoccupied_conditioned"`, `"occupied_unconditioned"`, `"occupied_conditioned"`.
+
+If `length < width`, they are automatically swapped so `length` is always the longer edge.
+
+**Example** (`inputs/building.yml`):
+
+```yaml
+db_file: inputs/components-lib.db
+semantic_fields_file: inputs/semantic-fields.yml
+component_map_file: inputs/component-map.yml
+epwzip_file: "https://climate.onebuilding.org/WMO_Region_4_North_and_Central_America/USA_United_States_of_America/MA_Massachusetts/USA_MA_Boston-Logan.Intl.AP.725090_TMYx.2009-2023.zip"
+semantic_field_context:
+  Region: TestRegion
+  Typology: Residential
+  Age_bracket: Post_2000
+  Scenario: Baseline
+  Income: Low
+length: 20.0
+width: 15.0
+num_floors: 2
+f2f_height: 3.5
+wwr: 0.3
+basement: none
+attic: none
+exposed_basement_frac: 0.25
+```
+
+---
+
+#### `manifest.yml` -- experiment specification
+
+Used by `make cli-native submit manifest` (or `make cli submit manifest`). Defines a batch experiment over a set of buildings. All referenced config files can be either inline YAML or file paths -- when a path is given, the file is loaded automatically.
+
+| Field                     | Type                  | Required | Description                                            |
+| ------------------------- | --------------------- | -------- | ------------------------------------------------------ |
+| `name`                    | str                   | yes      | experiment/region name (used in `run_name`)            |
+| `scenario`                | str                   | yes      | scenario identifier (e.g. `Baseline`, `Retrofit`)      |
+| `file_config`             | path or inline        | yes      | path to `artifacts.yml` or inline file config          |
+| `gis_preprocessor_config` | path or inline        | no       | path to `gis-preprocessor.yml` or inline config        |
+| `hourly_data_config`      | path, inline, or null | no       | path to `hourly-data-config.yml`, or `null` to disable |
+| `overheating_config`      | path, inline, or null | no       | path to `overheating-config.yml`, or `null` to disable |
+
+**Example** (`inputs/manifest.yml`):
+
+```yaml
+name: TestRegion
+scenario: Baseline
+hourly_data_config: null
+file_config: inputs/artifacts.yml
+gis_preprocessor_config: inputs/gis-preprocessor.yml
+```
+
+**Example with overheating and hourly data enabled**:
+
+```yaml
+name: TestRegion
+scenario: Baseline
+hourly_data_config: inputs/hourly-data-config.yml
+overheating_config: inputs/overheating-config.yml
+file_config: inputs/artifacts.yml
+gis_preprocessor_config: inputs/gis-preprocessor.yml
+```
+
+---
+
+#### `artifacts.yml` -- file references
+
+Points to the data files used during batch simulation. Referenced by `manifest.yml` via the `file_config` field.
+
+| Field                  | Type        | Description                                            |
+| ---------------------- | ----------- | ------------------------------------------------------ |
+| `gis_file`             | path        | path to the buildings GeoDataFrame (parquet)           |
+| `db_file`              | path        | path to the component database (SQLite)                |
+| `semantic_fields_file` | path        | path to the semantic fields config                     |
+| `component_map_file`   | path        | path to the component map config                       |
+| `epwzip_file`          | path or URL | EPW weather file (or `null` to use nearest EPW lookup) |
+
+**Example** (`inputs/artifacts.yml`):
+
+```yaml
+gis_file: inputs/buildings.parquet
+db_file: inputs/components-lib.db
+semantic_fields_file: inputs/semantic-fields.yml
+epwzip_file: "https://climate.onebuilding.org/WMO_Region_4_North_and_Central_America/USA_United_States_of_America/MA_Massachusetts/USA_MA_Boston-Logan.Intl.AP.725090_TMYx.2009-2023.zip"
+component_map_file: inputs/component-map.yml
+```
+
+---
+
+#### `semantic-fields.yml` -- semantic field definitions
+
+Defines the semantic fields (categorical variables) used to look up building components in the database, and maps GIS column names to building attributes.
+
+| Field              | Type      | Description                                                               |
+| ------------------ | --------- | ------------------------------------------------------------------------- |
+| `Name`             | str       | model name                                                                |
+| `Fields`           | list      | list of semantic field definitions                                        |
+| `Fields[].Name`    | str       | field name (must match keys in `semantic_field_context` and component DB) |
+| `Fields[].Options` | list[str] | allowed values for this field                                             |
+| `Height_col`       | str       | GIS column name for building height                                       |
+| `Num_Floors_col`   | str       | GIS column name for number of floors                                      |
+| `Building_ID_col`  | str       | GIS column name for building ID                                           |
+| `GFA_col`          | str       | GIS column name for gross floor area / footprint area                     |
+
+**Example** (`inputs/semantic-fields.yml`):
+
+```yaml
+Name: Test Region Model
+Fields:
+  - Name: Region
+    Options:
+      - TestRegion
+  - Name: Typology
+    Options:
+      - Office
+      - School
+      - Residential
+      - Hospital
+      - Hotel
+  - Name: Age_bracket
+    Options:
+      - Pre_1980
+      - 1980_to_2000
+      - Post_2000
+  - Name: Income
+    Options:
+      - Low
+      - High
+  - Name: Scenario
+    Options:
+      - Baseline
+      - Retrofit
+
+Height_col: height
+Num_Floors_col: num_floors
+Building_ID_col: building_id
+GFA_col: footprint_area
+```
+
+The `Fields` entries define the categorical axes of the component database. Each building is assigned a value for each field (either from the GIS data or from `semantic_field_context`), and those values are used to select the appropriate envelope, HVAC, and other components.
+
+---
+
+#### `component-map.yml` -- component selection rules
+
+Maps semantic fields to component types. Each component category has a `selector` that specifies which semantic fields are used to look up the matching entry in the component database.
+
+The top-level structure has two sections:
+
+- **Envelope**: construction and infiltration components
+- **Operations**: space use, HVAC, and DHW components
+
+**Example** (`inputs/component-map.yml`):
+
+```yaml
+Envelope:
+  selector:
+    source_fields:
+      - Region
+      - Typology
+      - Scenario
+      - Age_bracket
+
+Operations:
+  SpaceUse:
+    selector:
+      source_fields:
+        - Region
+        - Typology
+        - Income
+        - Scenario
+  HVAC:
+    selector:
+      source_fields:
+        - Region
+        - Typology
+        - Scenario
+        - Age_bracket
+  DHW:
+    selector:
+      source_fields:
+        - Region
+        - Typology
+```
+
+Each `source_fields` list names the semantic fields whose values are concatenated to form the lookup key in the component database. For example, an envelope lookup with `Region=TestRegion`, `Typology=Office`, `Scenario=Baseline`, `Age_bracket=Post_2000` would search for a matching entry in the database.
+
+For more complex models, you can nest sub-components under each category. For example, envelope can be split into `Infiltration`, `Window`, and `Assemblies`, each with their own selector:
+
+```yaml
+Envelope:
+  Infiltration:
+    selector:
+      source_fields:
+        - Region
+        - TypologySpaceUse
+        - Weatherization
+      suffix: Main
+  Window:
+    selector:
+      source_fields:
+        - Region
+  Assemblies:
+    selector:
+      source_fields:
+        - Region
+        - TypologySpaceUse
+        - Age_bracket
+
+Operations:
+  SpaceUse:
+    Occupancy:
+      selector:
+        source_fields:
+          - Region
+          - TypologySpaceUse
+    Lighting:
+      selector:
+        source_fields:
+          - Region
+          - TypologySpaceUse
+          - Lighting
+    Equipment:
+      selector:
+        source_fields:
+          - Region
+          - TypologySpaceUse
+    Thermostat:
+      selector:
+        source_fields:
+          - Region
+          - TypologySpaceUse
+          - Thermostat
+    WaterUse:
+      selector:
+        source_fields:
+          - Region
+          - TypologySpaceUse
+  HVAC:
+    ConditioningSystems:
+      Heating:
+        selector:
+          source_fields:
+            - Region
+            - Heating
+      Cooling:
+        selector:
+          source_fields:
+            - Region
+            - Cooling
+    Ventilation:
+      selector:
+        source_fields:
+          - Region
+          - TypologyVentilation
+          - Weatherization
+  DHW:
+    selector:
+      source_fields:
+        - Region
+        - DHW
+```
+
+---
+
+#### `components-lib.db` -- component database
+
+A SQLite database containing building component definitions (materials, assemblies, glazing, HVAC systems, schedules, etc.). This database is populated separately and is referenced by both single-building and batch simulations.
+
+The database uses a Prisma-managed schema with tables including:
+
+- **Envelope**: `ConstructionMaterial`, `ConstructionAssembly`, `ConstructionAssemblyLayer`, `GlazingConstructionSimple`, `Infiltration`, `Envelope`, `EnvelopeAssembly`
+- **Operations**: `Occupancy`, `Lighting`, `Equipment`, `Thermostat`, `WaterUse`, `SpaceUse`, `ThermalSystem`, `ConditioningSystems`, `HVAC`, `Ventilation`, `DHW`, `Operations`
+- **Schedule**: `Day`, `Week`, `Year`
+- **Zone**: `Zone`
+
+Component records are keyed by concatenated semantic field values (e.g. `TestRegion_Office_Baseline_Post_2000`).
+
+---
+
+#### `buildings.parquet` -- building footprints
+
+A GeoParquet file containing building footprint geometries and attributes for batch simulations. Each row represents one building.
+
+**Required columns** (column names are defined in `semantic-fields.yml`):
+
+| Column (from semantic-fields)              | Description                             |
+| ------------------------------------------ | --------------------------------------- |
+| building ID column (`Building_ID_col`)     | unique building identifier              |
+| height column (`Height_col`)               | building height [m]                     |
+| number of floors column (`Num_Floors_col`) | number of floors                        |
+| GFA column (`GFA_col`)                     | gross floor area or footprint area [m2] |
+| geometry                                   | building footprint polygon              |
+
+Additionally, the parquet must contain columns for each semantic field defined in `semantic-fields.yml` that the GIS preprocessor needs to assign to each building (e.g. `Typology`, `Age_bracket`, etc.). Fields not present in the GIS data can be set via the `scenario` field in the manifest.
+
+---
+
+#### `gis-preprocessor.yml` -- geometry validation and defaults
+
+Controls how GIS building data is validated and preprocessed before simulation. All fields are optional with sensible defaults.
+
+| Field                           | Type         | Default                | Description                                   |
+| ------------------------------- | ------------ | ---------------------- | --------------------------------------------- |
+| `cart_crs`                      | str          | `EPSG:3857`            | cartesian CRS for geometry operations         |
+| `min_building_area`             | float        | 10.0                   | minimum building footprint area [m2]          |
+| `min_edge_length`               | float        | 3.0                    | minimum edge length [m]                       |
+| `max_edge_length`               | float        | 1000.0                 | maximum edge length [m]                       |
+| `neighbor_threshold`            | float        | 100.0                  | distance threshold for neighbor detection [m] |
+| `f2f_height`                    | float        | 3.0                    | floor-to-floor height [m]                     |
+| `min_building_height`           | float        | 3.0                    | minimum building height [m]                   |
+| `max_building_height`           | float        | 300.0                  | maximum building height [m]                   |
+| `min_num_floors`                | int          | 1                      | minimum number of floors                      |
+| `max_num_floors`                | int          | 125                    | maximum number of floors                      |
+| `default_wwr`                   | float        | 0.2                    | default window-to-wall ratio                  |
+| `default_num_floors`            | int          | 2                      | default number of floors when missing         |
+| `default_basement`              | str          | `"none"`               | default basement type                         |
+| `default_attic`                 | str          | `"none"`               | default attic type                            |
+| `default_exposed_basement_frac` | float        | 0.25                   | default exposed basement fraction             |
+| `epwzip_file`                   | path or null | null                   | override EPW file for all buildings           |
+| `epw_query`                     | str or null  | `"source in ['tmyx']"` | filter for closest EPW lookup                 |
+
+**Example** (`inputs/gis-preprocessor.yml`):
+
+```yaml
+cart_crs: EPSG:4326
+min_building_area: 10.0
+min_edge_length: 3.0
+max_edge_length: 1000.0
+neighbor_threshold: 100.0
+f2f_height: 3.0
+min_building_height: 3.0
+max_building_height: 300.0
+min_num_floors: 1
+max_num_floors: 125
+default_wwr: 0.2
+default_num_floors: 2
+default_basement: none
+default_attic: none
+default_exposed_basement_frac: 0.25
+epwzip_file: null
+epw_query: source in ['tmyx']
+```
+
+---
+
+#### `hourly-data-config.yml` -- hourly output configuration
+
+Configures which hourly EnergyPlus output variables to report. When enabled (by setting `hourly_data_config` in the manifest), the simulation produces additional per-building time series dataframes.
+
+| Field         | Type      | Description                                                               |
+| ------------- | --------- | ------------------------------------------------------------------------- |
+| `data`        | list[str] | EnergyPlus output variable names to report                                |
+| `output_mode` | str       | one of `"dataframes-and-filerefs"`, `"fileref-only"`, `"dataframes-only"` |
+
+Available hourly variables include (among others):
+
+- `"Zone Mean Air Temperature"`
+- `"Zone Air Relative Humidity"`
+
+**Example** (`inputs/hourly-data-config.yml`):
+
+```yaml
+data:
+  - "Zone Mean Air Temperature"
+  - "Zone Air Relative Humidity"
+
+output_mode: dataframes-and-filerefs
+```
+
+---
+
+#### `overheating-config.yml` -- overheating analysis configuration
+
+Configures overheating analysis thresholds. When enabled (by setting `overheating_config` in the manifest), the simulation produces `BasicOverheating.pq` and related dataframes.
+
+| Field                 | Type | Description                                         |
+| --------------------- | ---- | --------------------------------------------------- |
+| `heat_thresholds`     | list | temperature thresholds for heat exceedance analysis |
+| `cold_thresholds`     | list | temperature thresholds for cold exceedance analysis |
+| `heat_index_criteria` | dict | heat index hour limits (set to `null` to skip)      |
+| `thermal_comfort`     | dict | thermal comfort parameters (met, clo, v)            |
+
+**Example** (`inputs/overheating-config.yml`):
+
+```yaml
+heat_thresholds:
+  - threshold: 26.0
+  - threshold: 30.0
+  - threshold: 35.0
+cold_thresholds:
+  - threshold: 10.0
+  - threshold: 5.0
+heat_index_criteria:
+  extreme_danger_hours: null
+  danger_or_worse_hours: null
+  caution_or_worse_hours: null
+thermal_comfort:
+  met: 1.1
+  clo: 0.5
+  v: 0.1
+```
+
+---
+
+### How inputs relate to each other
+
+For a **single building** simulation, the relationship is straightforward:
+
+```
+building.yml
+├── db_file ──────────────► components-lib.db
+├── semantic_fields_file ─► semantic-fields.yml
+├── component_map_file ───► component-map.yml
+└── epwzip_file ──────────► weather file (URL or local)
+```
+
+For a **batch** (manifest) simulation, the chain is:
+
+```
+manifest.yml
+├── file_config ──────────────────► artifacts.yml
+│                                   ├── gis_file ──────────────► buildings.parquet
+│                                   ├── db_file ───────────────► components-lib.db
+│                                   ├── semantic_fields_file ──► semantic-fields.yml
+│                                   ├── component_map_file ────► component-map.yml
+│                                   └── epwzip_file ───────────► weather file
+├── gis_preprocessor_config ──────► gis-preprocessor.yml
+├── hourly_data_config (optional) ► hourly-data-config.yml
+└── overheating_config (optional) ► overheating-config.yml
+```
+
+!!! warning
+
+    all file paths in your configs should be relative to the repository root, or use absolute paths. for dockerized runs (`make cli`), all input files must be located under the `inputs/` directory.
+
+---
+
+### Output files
+
+#### Single building output
+
+Running `make cli-native simulate` produces the following directory structure:
+
+```
+outputs/
+├── ep/                              # EnergyPlus working directory
+│   └── eplus_simulation/
+│       └── {hash}/                  # simulation run (hash of the IDF)
+│           ├── Minimal.idf          # generated EnergyPlus model
+│           ├── *.epw                # weather file used
+│           ├── eplusout.csv         # hourly outputs
+│           ├── eplusmtr.csv         # meter outputs
+│           ├── eplustbl.csv         # tabular summary report
+│           ├── epluszsz.csv         # zone sizing data
+│           └── ...                  # other EnergyPlus artifacts
+└── results/
+    ├── EnergyAndPeak.parquet        # main results (parquet)
+    ├── EnergyAndPeak.csv            # flattened CSV export
+    └── EnergyAndPeak.xlsx           # multi-sheet Excel workbook
+```
+
+#### Batch simulation output (from S3)
+
+Running `make cli-native get experiment` downloads results to:
+
+```
+outputs/
+└── {run_name}/
+    └── {version}/
+        ├── EnergyAndPeak.pq         # main results (parquet)
+        ├── EnergyAndPeak.csv        # CSV export (auto-generated)
+        └── EnergyAndPeak.xlsx       # Excel workbook (auto-generated)
+```
+
+When hourly data or overheating analysis is enabled, additional dataframes are stored in S3 and can be fetched by specifying `--dataframe-key`:
+
+- `BasicOverheating` -- overheating hours per building
+- `ExceedanceDegreeHours` -- degree hours above each threshold
+- `HeatIndexCategories` -- heat index classification hours
+- `ConsecutiveExceedances` -- consecutive exceedance periods
+- `HourlyData.Zone_Mean_Air_Temperature` -- hourly zone temperatures
+- `HourlyData.Zone_Air_Relative_Humidity` -- hourly zone humidity
+
+---
+
+#### `EnergyAndPeak` dataframe format
+
+This is the primary output. It uses a multi-index column structure with four levels:
+
+| Level | Name        | Values                                                                                                                         |
+| ----- | ----------- | ------------------------------------------------------------------------------------------------------------------------------ |
+| 0     | Measurement | `Energy`, `Peak`                                                                                                               |
+| 1     | Aggregation | `Raw`, `End Uses`, `Utilities`                                                                                                 |
+| 2     | Meter       | `Lighting`, `Equipment`, `Domestic Hot Water`, `Heating`, `Cooling`, `ChilledWater`, `Coal`, `Electricity`, `NaturalGas`, etc. |
+| 3     | Month       | `1` through `12`                                                                                                               |
+
+**Index** (for batch runs): a multi-index containing `building_id` and all semantic field feature columns (e.g. `feature.semantic.Typology`, `feature.semantic.Age_bracket`).
+
+**Units**:
+
+- Energy values are in **kWh/m2**
+- Peak values are in **kW/m2**
+
+**CSV format** (flattened):
+
+The CSV export stacks the Month level, producing one row per month. The header is four rows deep (one per column level):
+
+```
+                  Energy  Energy  Energy  ...  Peak    Peak    ...
+                  Raw     Raw     Raw     ...  Raw     Raw     ...
+                  Lighting Equipment DHW  ...  Lighting Equipment ...
+Month
+1                 1.588   1.860   0.432   ...  0.0065  0.0076  ...
+2                 1.434   1.680   0.390   ...  0.0065  0.0076  ...
+...
+12                1.416   1.860   0.432   ...  0.0174  0.0076  ...
+```
+
+**Excel format**:
+
+The Excel workbook contains one sheet per Measurement+Aggregation combination:
+
+- `Energy_Raw` -- raw energy by meter and month
+- `Energy_EndUses` -- energy grouped by end use
+- `Energy_Utilities` -- energy grouped by utility/fuel type
+- `Peak_Raw` -- raw peak by meter and month
+- `Peak_EndUses` -- peak grouped by end use
+- `Peak_Utilities` -- peak grouped by utility/fuel type
+- `Feature Index` -- building IDs and semantic field values (batch runs only)
+
+---
+
+#### EnergyPlus raw outputs
+
+The `ep/` directory contains the raw EnergyPlus simulation artifacts for each run. Key files:
+
+| File                      | Description                                                            |
+| ------------------------- | ---------------------------------------------------------------------- |
+| `Minimal.idf`             | the generated EnergyPlus input file                                    |
+| `*.epw`                   | the weather file used                                                  |
+| `eplusout.csv`            | all hourly output variables (temperatures, humidity, energy in Joules) |
+| `eplusmtr.csv`            | meter-level outputs (energy by fuel type, hourly/monthly)              |
+| `eplustbl.csv`            | tabular summary report (annual totals, end-use breakdown)              |
+| `epluszsz.csv`            | zone sizing data (design loads, mass flows)                            |
+| `eplusout.eso`            | EnergyPlus standard output (binary)                                    |
+| `*.eio`, `*.rdd`, `*.mdd` | variable dictionaries and metadata                                     |
+
+---
+
+### Quick reference
+
+#### Minimal single-building setup
+
+```
+inputs/
+├── building.yml
+├── components-lib.db
+├── semantic-fields.yml
+└── component-map.yml
+```
+
+#### Minimal batch setup
+
+```
+inputs/
+├── manifest.yml
+├── artifacts.yml
+├── buildings.parquet
+├── components-lib.db
+├── semantic-fields.yml
+├── component-map.yml
+└── gis-preprocessor.yml        # optional
+```
+
+#### Batch with hourly data and overheating
+
+```
+inputs/
+├── manifest.yml
+├── artifacts.yml
+├── buildings.parquet
+├── components-lib.db
+├── semantic-fields.yml
+├── component-map.yml
+├── gis-preprocessor.yml
+├── hourly-data-config.yml
+└── overheating-config.yml
+```
diff --git a/docs/tutorials/run-simulations/simulate_building.md b/docs/tutorials/run-simulations/simulate_building.md
index 83b9420..ed655c3 100644
--- a/docs/tutorials/run-simulations/simulate_building.md
+++ b/docs/tutorials/run-simulations/simulate_building.md
@@ -16,7 +16,7 @@ It assumes you have already completed the [setup guide](../getting-started/requi
 
 !!! note
 
-    the commands in this guide are the same for macOS, linux, and windows (using a unix‑like shell such as git bash or wsl).
+    the commands in this guide are the same for macOS, linux, and windows (using a unix-like shell such as git bash or wsl).
 
 ---
 
@@ -77,23 +77,31 @@ exposed_basement_frac: 0.25
 
 ### Step 2: Run the simulation
 
-Use the `make cli simulate` command to run a single building simulation:
+Use the `make cli-native simulate` command to run a single building simulation locally:
 
 ```bash
-make cli simulate -- --config inputs/building.yml --output-dir outputs
+make cli-native simulate -- --config inputs/building.yml --output-dir outputs
 ```
 
-**Command options**:
+If you use the default paths, you can simply run:
 
-- `--config {PATH}`: path to your building configuration YAML file (default: `inputs/building.yml`)
-- `--output-dir {PATH}`: directory where simulation results will be saved (default: `outputs`)
+```bash
+make cli-native simulate
+```
 
-If you use the default paths, you can simply run:
+Alternatively, run via Docker:
 
 ```bash
-make cli simulate
+make cli simulate -- --config inputs/building.yml --output-dir outputs
 ```
 
+**Command options**:
+
+| Option         | Type | Default               | Description                                      |
+| -------------- | ---- | --------------------- | ------------------------------------------------ |
+| `--config`     | path | `inputs/building.yml` | path to your building configuration YAML file    |
+| `--output-dir` | path | `outputs`             | directory where simulation results will be saved |
+
 !!! warning
 
     **critical**: you must include the two dashes `--` after `simulate` and before any options. this separator is required to pass arguments correctly to the underlying CLI command. if you forget it, the command will fail with an error.
@@ -183,13 +191,16 @@ outputs/
 
 ```bash
 # run simulation with default paths (inputs/building.yml -> outputs/)
-make cli simulate
+make cli-native simulate
 
 # run simulation with custom config and output directory
-make cli simulate -- --config inputs/my_building.yml --output-dir outputs/my_results
+make cli-native simulate -- --config inputs/my_building.yml --output-dir outputs/my_results
 
 # run simulation with only custom output directory
-make cli simulate -- --output-dir outputs/custom
+make cli-native simulate -- --output-dir outputs/custom
+
+# run via docker (same options)
+make cli simulate -- --config inputs/building.yml --output-dir outputs
 ```
 
 ### Building configuration file structure
@@ -210,8 +221,8 @@ width: 15.0 # default: 15.0
 num_floors: 3 # default: 2
 f2f_height: 3.5 # default: 3.0
 wwr: 0.3 # default: 0.2
-basement: none # default: "none" (options: "none", "unoccupied_unconditioned", "unoccupied_conditioned", "occupied_unconditioned", "occupied_conditioned")
-attic: none # default: "none" (options: "none", "unoccupied_unconditioned", "unoccupied_conditioned", "occupied_unconditioned", "occupied_conditioned")
+basement: none # default: "none"
+attic: none # default: "none"
 exposed_basement_frac: 0.25 # default: 0.25
 ```
 
diff --git a/docs/tutorials/run-simulations/simulation_tasking.md b/docs/tutorials/run-simulations/simulation_tasking.md
index c3e9191..1781dcf 100644
--- a/docs/tutorials/run-simulations/simulation_tasking.md
+++ b/docs/tutorials/run-simulations/simulation_tasking.md
@@ -1,6 +1,6 @@
 ## Run simulations with Hatchet and Docker
 
-This guide walks you through running `globi` simulations end‑to‑end using Hatchet and Docker.
+This guide walks you through running `globi` simulations end-to-end using Hatchet and Docker. This is the workflow for running batches of buildings across a manifest configuration.
 
 It assumes you have already completed the [setup guide](../getting-started/requirements.md), including:
 
@@ -27,7 +27,7 @@ The steps below cover:
 
 !!! note
 
-    the commands in this guide are the same for macOS, linux, and windows (using a unix‑like shell such as git bash or wsl).
+    the commands in this guide are the same for macOS, linux, and windows (using a unix-like shell such as git bash or wsl).
 
 ---
 
@@ -67,7 +67,7 @@ Look for a `hatchet-lite` container with a `running` status.
 
 ### Step 2: Create and configure Hatchet environment files
 
-Hatchet uses a client token stored in environment files that are loaded by the `make cli` (dockerized) or `make cli-native` (non-dockerized) target.
+Hatchet uses a client token stored in environment files that are loaded by `make cli` (dockerized) or `make cli-native` (non-dockerized).
 
 1. **Generate a Hatchet client token**:
 
@@ -156,7 +156,7 @@ You should see containers for Hatchet and the simulation services with a `runnin
     make: *** [engine] Error 1
     ```
 
-    this is usually transient. re‑run:
+    this is usually transient. re-run:
 
     ```bash
     make engine
@@ -194,7 +194,7 @@ If you do **not** see workers, refer to the troubleshooting section below.
 
 ### Step 5: Run a test simulation
 
-Now you can submit a simulation manifest via the `make cli` (dockerized) or `make cli-native` (non-dockerized) target, which wraps the `globi` CLI with the correct environment files.
+Now you can submit a simulation manifest via `make cli` (dockerized) or `make cli-native` (non-dockerized), which wraps the `globi` CLI with the correct environment files.
 
 !!! warning
 
@@ -219,10 +219,10 @@ Now you can submit a simulation manifest via the `make cli` (dockerized) or `mak
 
     ```bash
     # dockerized
-    make cli submit manifest -- --path inputs/manifest.yml --grid-run --max-tests 100
+    make cli submit manifest -- --path inputs/manifest.yml --grid-run --max-sims 100
 
     # non-dockerized
-    make cli-native submit manifest -- --path inputs/manifest.yml --grid-run --max-tests 100
+    make cli-native submit manifest -- --path inputs/manifest.yml --grid-run --max-sims 100
     ```
 
     !!! warning
@@ -242,23 +242,25 @@ Now you can submit a simulation manifest via the `make cli` (dockerized) or `mak
     where:
 
     - `{PATH_TO_MANIFEST}` is your manifest file path (for example `inputs/manifest.yml`)
-    - `--grid-run` enables grid‑style execution over the manifest configuration
+    - `--grid-run` enables grid-style execution over the manifest configuration (only simulates semantic field combinations)
 
     **Optional flags**:
 
-    - `--max-tests {NUMBER}`: override the maximum number of tests in a grid run (default: 1000). example: `--max-tests 100`
-    - `--scenario {SCENARIO_NAME}`: override the scenario listed in the manifest file with the provided scenario
-    - `--skip-model-constructability-check`: skip the model constructability check (flag, no value)
-    - `--epwzip-file {PATH}`: override the EPWZip file listed in the manifest file with the provided EPWZip file
+    | Flag                                  | Type | Description                                          |
+    | ------------------------------------- | ---- | ---------------------------------------------------- |
+    | `--max-sims {NUMBER}`                 | int  | override the maximum number of simulations to run    |
+    | `--scenario {SCENARIO_NAME}`          | str  | override the scenario listed in the manifest file    |
+    | `--skip-model-constructability-check` | flag | skip the model constructability check                |
+    | `--epwzip-file {PATH}`                | path | override the EPW weather file listed in the manifest |
 
     Example with multiple optional flags:
 
     ```bash
     # dockerized
-    make cli submit manifest -- --path inputs/manifest.yml --grid-run --max-tests 50 --scenario baseline
+    make cli submit manifest -- --path inputs/manifest.yml --grid-run --max-sims 50 --scenario baseline
 
     # non-dockerized
-    make cli-native submit manifest -- --path inputs/manifest.yml --grid-run --max-tests 50 --scenario baseline
+    make cli-native submit manifest -- --path inputs/manifest.yml --grid-run --max-sims 50 --scenario baseline
     ```
 
 4.  **Monitor progress in the Hatchet UI**:
@@ -266,13 +268,13 @@ Now you can submit a simulation manifest via the `make cli` (dockerized) or `mak
     - go to `http://localhost:8888`
     - navigate to **workflows** or **runs**
     - locate the workflow corresponding to your manifest submission
-    - watch status transition from `pending` → `running` → `completed` (or `failed` if there is an error)
+    - watch status transition from `pending` -> `running` -> `completed` (or `failed` if there is an error)
 
-You can click into the workflow to view task‑level logs and any errors.
+    You can click into the workflow to view task-level logs and any errors.
 
 5.  **Note the run_name from the output**:
 
-    When the simulation completes, the CLI prints a summary with a `run_name` (for example `TestRegion/dryrun/Baseline`). **save this run_name** — you will need it to fetch results in the next step.
+    When the simulation completes, the CLI prints a summary with a `run_name` (for example `TestRegion/dryrun/Baseline`). **save this run_name** -- you will need it to fetch results in the next step.
 
     !!! note
 
@@ -342,6 +344,30 @@ This command:
 - prints the exact location where files were saved
 - automatically generates CSV and Excel files for the `EnergyAndPeak` dataframe
 
+#### Fetch overheating and other dataframes
+
+If your manifest includes an `overheating_config`, the run produces extra dataframes that are **not** downloaded by the default command. Request them with `--dataframe-key`:
+
+| dataframe key            | description                              |
+| ------------------------ | ---------------------------------------- |
+| `EnergyAndPeak`          | (default) monthly energy and peak demand |
+| `BasicOverheating`       | hours above/below temperature thresholds |
+| `ExceedanceDegreeHours`  | degree-hours above thresholds            |
+| `HeatIndexCategories`    | heat index category hours                |
+| `ConsecutiveExceedances` | consecutive exceedance streaks (if any)  |
+
+Example: download overheating outputs for a run that used an overheating config:
+
+```bash
+# basic overheating (hours above threshold per building)
+make cli-native get experiment -- --run-name "TestRegion_Tutorial/Baseline" --dataframe-key BasicOverheating
+
+# exceedance degree hours
+make cli-native get experiment -- --run-name "TestRegion_Tutorial/Baseline" --dataframe-key ExceedanceDegreeHours
+```
+
+Files are written to `outputs/{run_name}/{version}/{dataframe_key}.pq`. Overheating dataframes are saved as parquet only (no automatic Excel/CSV).
+
 **Example output structure**:
 
 ```
@@ -357,20 +383,20 @@ outputs/
 
 #### Fetch a specific version and output directory
 
-If you have multiple versions of the same run, or you want to control exactly where results are written, include `--version` and `--output_dir`:
+If you have multiple versions of the same run, or you want to control exactly where results are written, include `--version` and `--output-dir`:
 
 ```bash
 # dockerized
 make cli get experiment -- \
   --run-name {YOUR_RUN_NAME_HERE} \
   --version {VERSION} \
-  --output_dir {YOUR_CHOSEN_OUTPUT_DIR}
+  --output-dir {YOUR_CHOSEN_OUTPUT_DIR}
 
 # non-dockerized
 make cli-native get experiment -- \
   --run-name {YOUR_RUN_NAME_HERE} \
   --version {VERSION} \
-  --output_dir {YOUR_CHOSEN_OUTPUT_DIR}
+  --output-dir {YOUR_CHOSEN_OUTPUT_DIR}
 ```
 
 where:
@@ -380,8 +406,10 @@ where:
 
 **Additional options**:
 
-- `--dataframe-key {KEY}`: specify which dataframe to download (default: `EnergyAndPeak`). if hourly data was configured, each time series is a separate dataframe (e.g. `HourlyData.Zone_Mean_Air_Temperature`, `HourlyData.Zone_Air_Relative_Humidity`)
-- `--include-csv`: include CSV export in addition to parquet (CSV is automatically included for `EnergyAndPeak` dataframe)
+| Option                  | Type | Default         | Description                                                                                                                                        |
+| ----------------------- | ---- | --------------- | -------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `--dataframe-key {KEY}` | str  | `EnergyAndPeak` | which dataframe to download. if hourly data was configured, each time series is a separate dataframe (e.g. `HourlyData.Zone_Mean_Air_Temperature`) |
+| `--include-csv`         | flag | off             | include CSV export in addition to parquet (CSV is automatically included for `EnergyAndPeak`)                                                      |
 
 **Example with all options**:
 
@@ -390,14 +418,14 @@ where:
 make cli get experiment -- \
   --run-name TestRegion/dryrun/Baseline \
   --version 1.0.0 \
-  --output_dir outputs/my_analysis \
+  --output-dir outputs/my_analysis \
   --include-csv
 
 # non-dockerized
 make cli-native get experiment -- \
   --run-name TestRegion/dryrun/Baseline \
   --version 1.0.0 \
-  --output_dir outputs/my_analysis \
+  --output-dir outputs/my_analysis \
   --include-csv
 ```
 
@@ -421,7 +449,7 @@ make down
 
 This:
 
-- stops and removes containers from `docker-compose.yml`, `docker-compose.hatchet.yml`, and `docker-compose.aws.yml`
+- stops and removes containers from all compose files (including `docker-compose.st.yml` for the visualizer)
 - keeps docker images on disk so future runs start faster
 
 Run `make engine` again the next time you want to use the system.
@@ -462,7 +490,7 @@ This section lists common issues and concrete steps to diagnose and fix them.
     target simulations: failed to solve: image ".../hatchet/globi:latest": already exists
     ```
 
-  - simply re‑run:
+  - simply re-run:
 
     ```bash
     make engine
@@ -480,7 +508,7 @@ This section lists common issues and concrete steps to diagnose and fix them.
   - if `hatchet-lite` fails to start because port `8080` is in use:
     - close any other application using port `8080`
     - or stop the conflicting container/process
-    - then re‑run `make hatchet-lite` or `make engine`
+    - then re-run `make hatchet-lite` or `make engine`
 
 ---
 
@@ -515,7 +543,7 @@ This section lists common issues and concrete steps to diagnose and fix them.
     ```
 
   - check for worker containers in `docker compose ... ps`
-  - open Hatchet UI → **workers** and verify that they show as healthy
+  - open Hatchet UI -> **workers** and verify that they show as healthy
   - if workers crash repeatedly, inspect their logs using `docker compose ... logs <service-name>`
 
 ---
@@ -556,7 +584,7 @@ This section lists common issues and concrete steps to diagnose and fix them.
 
 - **jobs stuck in `pending`**
 
-  - check that workers are running (Hatchet UI → **workers**)
+  - check that workers are running (Hatchet UI -> **workers**)
   - confirm worker containers are healthy with `docker compose ... ps`
   - inspect worker logs for errors (for example configuration or connectivity issues)
 
@@ -576,7 +604,7 @@ This section lists common issues and concrete steps to diagnose and fix them.
 
 - **`module not found` or missing dependency**
 
-  - re‑sync dependencies:
+  - re-sync dependencies:
 
     ```bash
     uv sync --all-extras --all-groups
@@ -617,10 +645,11 @@ make engine
 
 # submit a simulation manifest (note the -- separator is required!)
 # dockerized
-make cli submit manifest -- --path inputs/manifest.yml --grid-run --max-tests 100
+make cli submit manifest -- --path inputs/manifest.yml --grid-run --max-sims 100
 # non-dockerized
-make cli-native submit manifest -- --path inputs/manifest.yml --grid-run --max-tests 100
+make cli-native submit manifest -- --path inputs/manifest.yml --grid-run --max-sims 100
 
+# fetch experiment results
 # dockerized
 make cli get experiment -- --run-name {YOUR_RUN_NAME_HERE}
 # non-dockerized
@@ -631,7 +660,7 @@ make down
 
 # open hatchet ui
 open http://localhost:8888  # macos
-# or manually paste http://localhost:8080 into your browser
+# or manually paste http://localhost:8888 into your browser
 ```
 
 ### Key file locations
diff --git a/docs/tutorials/visualization/visualization.md b/docs/tutorials/visualization/visualization.md
new file mode 100644
index 0000000..9195c83
--- /dev/null
+++ b/docs/tutorials/visualization/visualization.md
@@ -0,0 +1,241 @@
+## Visualization engine
+
+The globi visualization engine is a Streamlit app for exploring and analyzing simulation results. It provides interactive charts, 3D building maps, and purpose-built analysis views for retrofit comparison, overheating assessment, and scenario comparison.
+
+It assumes you have already completed the [setup guide](../getting-started/requirements.md) and have simulation results available either locally or in S3.
+
+---
+
+### Before you start
+
+- **simulation results**: you need at least one completed simulation run with output parquet files (see [simulate a building](../run-simulations/simulate_building.md) or [simulation tasking](../run-simulations/simulation_tasking.md))
+- **terminal location**: run commands from the repository root
+
+---
+
+### Starting the visualizer
+
+=== "Native (local)"
+
+    ```bash
+    make viz-native
+    ```
+
+    This runs `streamlit run src/globi/tools/visualization/main.py` with the required environment files loaded. The app opens in your browser automatically.
+
+=== "Docker"
+
+    ```bash
+    make viz
+    ```
+
+    This builds and starts the visualizer container via Docker Compose, including the `docker-compose.st.yml` configuration.
+
+Once running, the app is available at the URL printed in the terminal (typically `http://localhost:8501`).
+
+---
+
+### Data sources
+
+The sidebar lets you choose between two data sources:
+
+#### Local
+
+Point the app at a local directory containing simulation output folders. Each subfolder that contains `.pq` or `.parquet` files is treated as a separate run.
+
+- **default directory**: `outputs`
+- **optional**: place a `buildings.parquet` file in the `inputs/` directory with building location data (latitude, longitude, geometry) to enable 3D map visualizations
+
+#### S3
+
+Connect directly to your S3 experiment storage. The app lists available experiments and lets you pick a run name, version, and dataframe key.
+
+**Required environment variables** (set in `.env.*.aws` and `.env.scythe.storage`):
+
+- `SCYTHE_STORAGE_BUCKET`: the S3 bucket name
+- `SCYTHE_STORAGE_BUCKET_PREFIX` (optional): prefix within the bucket
+
+AWS credentials must be configured for S3 access.
+
+---
+
+### Pages
+
+The app has three pages, accessible from the navigation menu.
+
+---
+
+#### Overview
+
+The landing page. It describes the available data sources, how to use the app, and what file formats are expected. Use this as a reference when first opening the visualizer.
+
+---
+
+#### Raw Data Visualization
+
+Explore the output of any individual simulation run. Select a run from the dropdown and the app loads the corresponding parquet file.
+
+The behavior depends on the file format detected:
+
+##### Results / EnergyAndPeak format
+
+For files with the standard multi-index column structure (Measurement, Aggregation, Meter, Month), two tabs are available:
+
+**Summary tab**:
+
+| Chart                  | Description                                                       |
+| ---------------------- | ----------------------------------------------------------------- |
+| EUI histogram          | distribution of energy use intensity across buildings             |
+| Peak demand histogram  | distribution of peak demand across buildings                      |
+| End-use pie chart      | breakdown of energy by end use (heating, cooling, lighting, etc.) |
+| Utilities pie chart    | breakdown of energy by fuel type (electricity, gas, etc.)         |
+| Monthly EUI by end use | stacked bar chart showing monthly energy by end use               |
+| Monthly EUI by utility | stacked bar chart showing monthly energy by fuel type             |
+
+**Map tab**:
+
+A 3D pydeck map showing buildings as extruded polygons. Requires building geometry data (either embedded in the parquet or from `inputs/buildings.parquet`). Color can be mapped to:
+
+- EUI (energy use intensity)
+- total energy
+- peak demand per sqm
+- total peak demand
+
+##### Generic parquet format
+
+For any other parquet file, the app provides:
+
+- column selection with automatic numeric/categorical detection
+- D3 histograms for numeric columns
+- summary statistics grouped by a categorical column
+- configurable value and category layers from the column structure
+
+##### Export
+
+All charts can be exported as:
+
+- **CSV**: raw data behind the chart
+- **HTML**: interactive standalone chart
+- **PNG**: static image (requires Playwright: `playwright install chromium`)
+
+---
+
+#### Use Cases
+
+Purpose-built analysis views for common workflows. Select the use case type from the sidebar.
+
+---
+
+##### Retrofit analysis
+
+Compare energy, cost, and emissions across two or more scenarios (e.g. baseline vs. retrofit). Requires at least two runs loaded from the data source.
+
+**Configuration** (sidebar):
+
+- select baseline and retrofit scenario(s)
+- enter per-scenario energy costs ($/kWh per fuel type)
+- enter per-scenario emissions factors (kgCO2/kWh per fuel type)
+- enter system costs per sqm ($/m2) for each scenario
+- assign display names to each scenario
+
+**Visualizations**:
+
+| Chart                | Description                                                            |
+| -------------------- | ---------------------------------------------------------------------- |
+| EUI KDE plot         | kernel density estimate comparing EUI distributions across scenarios   |
+| End-use stacked bars | energy breakdown by end use, per scenario                              |
+| Fuel stacked bars    | energy breakdown by fuel type, per scenario                            |
+| Cost bar chart       | total energy cost by scenario                                          |
+| Emissions bar chart  | total emissions by scenario                                            |
+| 3D building map      | buildings colored by selected metric (EUI, peak, percent change, etc.) |
+
+The map supports switching between metrics and adjusting elevation scale, radius, and view parameters (zoom, pitch, bearing).
+
+---
+
+##### Overheating analysis
+
+Visualize overheating risk across buildings on a 3D map. Requires that the simulation produced a `BasicOverheating.pq` file (enabled via hourly data configuration in the manifest).
+
+**Configuration** (sidebar):
+
+- temperature threshold: 26, 30, or 35 degrees C
+- aggregation method: zone-weighted average or worst zone
+
+**Visualization**:
+
+- 3D pydeck map with buildings colored by overheating hours above the selected threshold
+- configurable elevation scale and view parameters
+- hover tooltips showing building-level overheating details
+
+!!! note
+
+    if the selected run does not contain a `BasicOverheating.pq` file, the overheating use case will not be available. make sure hourly data output is enabled in your simulation configuration.
+
+---
+
+##### Scenario comparison
+
+A lightweight comparison between two or more scenarios without cost/emissions data. Useful for quickly comparing energy profiles across different simulation configurations.
+
+**Configuration** (sidebar):
+
+- select the scenarios to compare
+- assign display names
+
+**Visualizations**:
+
+| Chart                | Description                              |
+| -------------------- | ---------------------------------------- |
+| EUI KDE plot         | distribution comparison across scenarios |
+| End-use stacked bars | energy by end use per scenario           |
+| Fuel stacked bars    | energy by fuel type per scenario         |
+
+---
+
+### Supported data formats
+
+| File                       | Structure                                                   | Used by                                 |
+| -------------------------- | ----------------------------------------------------------- | --------------------------------------- |
+| `EnergyAndPeak.pq`         | multi-index columns: Measurement, Aggregation, Meter, Month | raw data, retrofit, scenario comparison |
+| `Results.pq`               | same structure as EnergyAndPeak (legacy name)               | raw data, retrofit, scenario comparison |
+| `BasicOverheating.pq`      | overheating hours per building per zone                     | overheating analysis                    |
+| generic `.pq` / `.parquet` | any flat or index-flattened parquet                         | raw data (generic mode)                 |
+| `buildings.parquet`        | building locations with lat/lon and geometry                | 3D map views                            |
+
+---
+
+### Troubleshooting
+
+- **no runs found**: ensure your output directory contains subfolders with `.pq` or `.parquet` files. the app scans recursively for these.
+
+- **map not showing**: 3D maps require building geometry data. either the parquet file must contain `latitude`, `longitude`, and `rotated_rectangle` columns, or you must have an `inputs/buildings.parquet` file with this data that can be joined on `building_id`.
+
+- **PNG export fails**: PNG export uses Playwright for headless browser rendering. install it with:
+
+  ```bash
+  playwright install chromium
+  ```
+
+- **S3 connection errors**: verify your AWS credentials are configured and the environment variables `SCYTHE_STORAGE_BUCKET` (and optionally `SCYTHE_STORAGE_BUCKET_PREFIX`) are set in your env files.
+
+- **streamlit not found**: re-sync dependencies:
+
+  ```bash
+  uv sync --all-extras --all-groups
+  ```
+
+---
+
+### Quick reference
+
+```bash
+# start visualizer locally
+make viz-native
+
+# start visualizer via docker
+make viz
+
+# stop all docker services (including visualizer)
+make down
+```
diff --git a/mkdocs.yml b/mkdocs.yml
index c8663a7..37ec6f6 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -17,6 +17,9 @@ nav:
       - Run Simulations:
           - Simulation Tasking: tutorials/run-simulations/simulation_tasking.md
           - Simulate Building: tutorials/run-simulations/simulate_building.md
+          - Inputs and Outputs: tutorials/run-simulations/inputs_and_outputs.md
+      - Visualization:
+          - Visualization Engine: tutorials/visualization/visualization.md
   - Reference:
       - CLI: reference/cli.md
 

From abca5e7464f437a4d7dd7a38739154ee5be521a5 Mon Sep 17 00:00:00 2001
From: daryaguettler <daryag@mit.edu>
Date: Wed, 11 Mar 2026 14:23:01 -0700
Subject: [PATCH 2/3] make minor UI updates to visualizations

---
 src/globi/tools/visualization/data_sources.py |  86 +++-
 src/globi/tools/visualization/main.py         |   1 +
 src/globi/tools/visualization/models.py       |  14 +-
 src/globi/tools/visualization/plotting.py     | 206 ++++++++
 src/globi/tools/visualization/results_data.py |  64 ++-
 src/globi/tools/visualization/utils.py        | 357 +++++++++++--
 .../tools/visualization/views/use_cases.py    | 473 ++++++++++++++----
 7 files changed, 1048 insertions(+), 153 deletions(-)

diff --git a/src/globi/tools/visualization/data_sources.py b/src/globi/tools/visualization/data_sources.py
index e68434b..6b416d6 100644
--- a/src/globi/tools/visualization/data_sources.py
+++ b/src/globi/tools/visualization/data_sources.py
@@ -17,8 +17,12 @@
 )
 from globi.tools.visualization.utils import (
     build_overheating_map_df,
+    build_overheating_summary_df,
     find_output_run_dirs,
+    get_overheating_thresholds,
     get_pq_file_for_run,
+    list_overheating_files_for_run,
+    load_heat_index_summary_for_chart,
     load_output_table,
     run_has_overheating,
 )
@@ -173,16 +177,42 @@ def list_runs_with_overheating(self) -> list[str]:
         """List run ids that have overheating outputs. Override for support."""
         return []
 
+    def list_overheating_files(self, run_id: str) -> list[str]:
+        """List available overheating df keys for a run. Override for support."""
+        return []
+
     def load_overheating_map_data(
         self,
         run_id: str,
         cart_crs: str = "EPSG:3857",
         heat_threshold_c: float = 26.0,
         aggregation: str = "Zone Weighted",
+        data_source_type: str = "BasicOverheating",
+        heat_index_metric: str = "danger_hours",
     ) -> pd.DataFrame | None:
         """Load map-ready overheating data for a run. Override for support."""
         return None
 
+    def load_overheating_heat_index_summary(
+        self,
+        run_id: str,
+        aggregation: str = "Zone Weighted",
+    ) -> dict[str, float] | None:
+        """Load HeatIndexCategories summary for stacked bar. Override for support."""
+        return None
+
+    def get_overheating_thresholds(self, run_id: str) -> list[float]:
+        """Get available heat thresholds for a run. Override for support."""
+        return [26.0, 30.0, 35.0]
+
+    def load_overheating_summary(
+        self,
+        run_id: str,
+        aggregation: str = "Zone Weighted",
+    ) -> pd.DataFrame | None:
+        """Load summary stats across all buildings for heatmap. Override for support."""
+        return None
+
     @classmethod
     def from_config(cls, config: DataSourceConfig) -> DataSource:
         """Factory method to create appropriate data source."""
@@ -246,18 +276,30 @@ def load_building_locations(self) -> pd.DataFrame | None:
         return pd.DataFrame(gdf.drop(columns=["geometry"], errors="ignore"))
 
     def list_runs_with_overheating(self) -> list[str]:
-        """List run ids that have BasicOverheating output."""
+        """List run ids that have any overheating output."""
         self.list_available_runs()
         return [rid for rid, d in self._run_dirs.items() if run_has_overheating(d)]
 
+    def list_overheating_files(self, run_id: str) -> list[str]:
+        """List available overheating df keys for a run."""
+        run_dir = self._run_dirs.get(run_id)
+        if run_dir is None:
+            self.list_available_runs()
+            run_dir = self._run_dirs.get(run_id)
+        if run_dir is None:
+            return []
+        return list_overheating_files_for_run(run_dir)
+
     def load_overheating_map_data(
         self,
         run_id: str,
         cart_crs: str = "EPSG:3857",
         heat_threshold_c: float = 26.0,
         aggregation: str = "Zone Weighted",
+        data_source_type: str = "BasicOverheating",
+        heat_index_metric: str = "danger_hours",
     ) -> pd.DataFrame | None:
-        """Load map-ready overheating data (geometry + hours above threshold)."""
+        """Load map-ready overheating data (geometry + selected metric)."""
         run_dir = self._run_dirs.get(run_id)
         if run_dir is None:
             self.list_available_runs()
@@ -269,8 +311,48 @@ def load_overheating_map_data(
             cart_crs=cart_crs,
             heat_threshold_c=heat_threshold_c,
             aggregation=aggregation,
+            data_source_type=data_source_type,
+            heat_index_metric=heat_index_metric,
         )
 
+    def load_overheating_heat_index_summary(
+        self,
+        run_id: str,
+        aggregation: str = "Zone Weighted",
+    ) -> dict[str, float] | None:
+        """Load HeatIndexCategories summary (hours by category) for stacked bar chart."""
+        run_dir = self._run_dirs.get(run_id)
+        if run_dir is None:
+            self.list_available_runs()
+            run_dir = self._run_dirs.get(run_id)
+        if run_dir is None:
+            return None
+        return load_heat_index_summary_for_chart(run_dir, aggregation=aggregation)
+
+    def get_overheating_thresholds(self, run_id: str) -> list[float]:
+        """Get available heat thresholds from the data."""
+        run_dir = self._run_dirs.get(run_id)
+        if run_dir is None:
+            self.list_available_runs()
+            run_dir = self._run_dirs.get(run_id)
+        if run_dir is None:
+            return [26.0, 30.0, 35.0]
+        return get_overheating_thresholds(run_dir)
+
+    def load_overheating_summary(
+        self,
+        run_id: str,
+        aggregation: str = "Zone Weighted",
+    ) -> pd.DataFrame | None:
+        """Load summary stats across all buildings for heatmap."""
+        run_dir = self._run_dirs.get(run_id)
+        if run_dir is None:
+            self.list_available_runs()
+            run_dir = self._run_dirs.get(run_id)
+        if run_dir is None:
+            return None
+        return build_overheating_summary_df(run_dir, aggregation=aggregation)
+
 
 class S3DataSource(DataSource):
     """Data source for S3-stored experiment results."""
diff --git a/src/globi/tools/visualization/main.py b/src/globi/tools/visualization/main.py
index ce072f2..44ddfb1 100644
--- a/src/globi/tools/visualization/main.py
+++ b/src/globi/tools/visualization/main.py
@@ -13,6 +13,7 @@
 
 st.set_page_config(page_title="GLOBI Visualization", layout="wide")
 st.title("GLOBI Visualization")
+# update the icon of the webpage to be a globe
 
 data_source = render_data_source_sidebar()
 st.session_state["data_source"] = data_source
diff --git a/src/globi/tools/visualization/models.py b/src/globi/tools/visualization/models.py
index 1a65b73..6b5234c 100644
--- a/src/globi/tools/visualization/models.py
+++ b/src/globi/tools/visualization/models.py
@@ -127,19 +127,19 @@ class RetrofitUseCaseConfig(BaseModel):
 
 
 class RetrofitCostParams(BaseModel):
-    """User-configurable retrofit cost and emissions parameters."""
+    """User-configurable retrofit cost and emissions parameters (per scenario)."""
 
-    energy_cost_factors: dict[str, float] = Field(
+    per_scenario_energy_costs: dict[str, dict[str, float]] = Field(
         default_factory=dict,
-        description="Cost per kWh ($/kWh) by fuel type.",
+        description="Per-scenario energy cost factors: scenario -> fuel -> $/kWh.",
     )
-    emissions_factors: dict[str, float] = Field(
+    per_scenario_emissions: dict[str, dict[str, float]] = Field(
         default_factory=dict,
-        description="Emissions factor (kg CO2/kWh) by fuel type.",
+        description="Per-scenario emissions factors: scenario -> fuel -> kg CO2/kWh.",
     )
-    unit_costs: dict[str, float] = Field(
+    system_costs_per_sqm: dict[str, float] = Field(
         default_factory=dict,
-        description="Capital cost per scenario (scenario name -> total $).",
+        description="System cost per scenario in $/m², applied per building by conditioned area.",
     )
 
 
diff --git a/src/globi/tools/visualization/plotting.py b/src/globi/tools/visualization/plotting.py
index 99647c8..5bad819 100644
--- a/src/globi/tools/visualization/plotting.py
+++ b/src/globi/tools/visualization/plotting.py
@@ -987,6 +987,212 @@ def create_comparison_stacked_bar_d3_html(
     return dedent(html)
 
 
+_METRIC_GROUP_INTERPOLATORS = {
+    "Basic": "interpolateReds",
+    "EDH": "interpolateYlOrRd",
+    "HeatIndex": "interpolateOrRd",
+}
+_METRIC_GROUP_UNITS = {
+    "Basic": "hr",
+    "EDH": "degC-hr",
+    "HeatIndex": "hr",
+}
+_METRIC_GROUP_CSS_GRADIENTS = {
+    "Basic": "linear-gradient(to right, #fff5f0, #fb6a4a, #a50f15)",
+    "EDH": "linear-gradient(to right, #ffffcc, #fd8d3c, #bd0026)",
+    "HeatIndex": "linear-gradient(to right, #fff7ec, #fc8d59, #7f0000)",
+}
+
+
+def _classify_metric_group(col_name: str) -> str:
+    """Map a column name like 'Basic 25.0C' to its metric group key."""
+    if col_name.startswith("Basic"):
+        return "Basic"
+    if col_name.startswith("EDH"):
+        return "EDH"
+    return "HeatIndex"
+
+
+def create_overheating_heatmap_d3_html(
+    df: pd.DataFrame,
+    row_col: str = "statistic",
+    theme: Theme = "light",
+) -> str:
+    """Build D3 heatmap of summary stats x overheating metrics.
+
+    Each metric group (Basic, EDH, HeatIndex) gets its own color palette and
+    is independently normalized, since they have different units.
+    """
+    c = _theme_colors(theme)
+    value_cols = [
+        col
+        for col in df.columns
+        if col != row_col and pd.api.types.is_numeric_dtype(df[col])
+    ]
+    if not value_cols:
+        return "<div class='placeholder-text'>no numeric columns</div>"
+
+    rows = df[row_col].astype(str).tolist()
+    values = df[value_cols].fillna(0).values.tolist()
+
+    # per-group normalization: all columns in a group share the same max
+    col_groups = [_classify_metric_group(vc) for vc in value_cols]
+    group_maxes: dict[str, float] = {}
+    for vc, grp in zip(value_cols, col_groups, strict=True):
+        mx = float(df[vc].max())
+        group_maxes[grp] = max(group_maxes.get(grp, 0), mx)
+    # map each column to its group max
+    col_maxes = [max(group_maxes.get(grp, 1), 1e-9) for grp in col_groups]
+    # map each column to its interpolator name
+    col_interps = [
+        _METRIC_GROUP_INTERPOLATORS.get(grp, "d3.interpolateReds") for grp in col_groups
+    ]
+
+    payload = {
+        "rows": rows,
+        "cols": value_cols,
+        "values": values,
+        "col_maxes": col_maxes,
+        "col_interps": col_interps,
+    }
+    data_json = json.dumps(payload, ensure_ascii=False)
+
+    # build legend html: one gradient bar per group present
+    seen_groups = dict.fromkeys(col_groups)
+    legend_parts: list[str] = []
+    for grp in seen_groups:
+        gradient = _METRIC_GROUP_CSS_GRADIENTS.get(
+            grp, _METRIC_GROUP_CSS_GRADIENTS["Basic"]
+        )
+        unit = _METRIC_GROUP_UNITS.get(grp, "")
+        mx = group_maxes.get(grp, 0)
+        legend_parts.append(
+            f'<div style="display:flex;align-items:center;gap:6px;margin-right:18px;">'
+            f'<span style="font-size:11px;font-weight:600;">{grp}</span>'
+            f'<span style="font-size:10px;color:{c["axis"]}">0</span>'
+            f'<div style="width:60px;height:10px;border-radius:3px;background:{gradient};'
+            f'border:1px solid {c["axis_line"]};"></div>'
+            f'<span style="font-size:10px;color:{c["axis"]}">{mx:,.1f} {unit}</span>'
+            f"</div>"
+        )
+    legend_html_str = "".join(legend_parts)
+
+    html = f"""
+    <!doctype html>
+    <html lang="en">
+      <head>
+        <meta charset="utf-8" />
+        <title>Overheating summary heatmap</title>
+        <meta name="viewport" content="width=device-width, initial-scale=1" />
+        <style>{_comparison_pane_css(c)}</style>
+        <script src="https://d3js.org/d3.v7.min.js"></script>
+      </head>
+      <body>
+        <div id="chart" class="chart"></div>
+        <div id="legend" class="legend" style="display:flex;flex-wrap:wrap;gap:8px;margin-top:12px;padding-top:4px;">
+          {legend_html_str}
+        </div>
+        <script>
+          const payload = {data_json};
+          const rows = payload.rows || [];
+          const cols = payload.cols || [];
+          const values = payload.values || [];
+          const colMaxes = payload.col_maxes || [];
+          const colInterps = payload.col_interps || [];
+          const tooltip = d3.select("body").append("div").attr("class", "tooltip").style("opacity", 0);
+
+          const interpMap = {{
+            "interpolateReds": d3.interpolateReds,
+            "interpolateYlOrRd": d3.interpolateYlOrRd,
+            "interpolateOrRd": d3.interpolateOrRd,
+          }};
+          const colorScales = cols.map((c, j) =>
+            d3.scaleSequential(interpMap[colInterps[j]] || d3.interpolateReds)
+              .domain([0, colMaxes[j] || 1])
+          );
+
+          const container = document.getElementById("chart");
+          if (!rows.length || !cols.length) {{
+            container.innerHTML = '<span class="placeholder-text">no data</span>';
+          }} else {{
+            const width = container.clientWidth || 560;
+            const bottomMargin = 130;
+            const height = Math.max(280, rows.length * 50 + 16 + bottomMargin);
+            const leftMargin = 64;
+            const topMargin = 16;
+            const chartWidth = width - leftMargin - 40;
+            const chartHeight = height - topMargin - bottomMargin;
+
+            const x = d3.scaleBand()
+              .domain(cols)
+              .range([0, chartWidth])
+              .paddingInner(0.08);
+            const y = d3.scaleBand()
+              .domain(rows)
+              .range([0, chartHeight])
+              .paddingInner(0.12);
+
+            const svg = d3.select(container)
+              .append("svg")
+              .attr("width", width)
+              .attr("height", height);
+
+            const g = svg.append("g")
+              .attr("transform", "translate(" + leftMargin + "," + topMargin + ")");
+
+            rows.forEach((r, i) => {{
+              cols.forEach((c, j) => {{
+                const v = values[i]?.[j] ?? 0;
+                g.append("rect")
+                  .attr("x", x(c))
+                  .attr("y", y(r))
+                  .attr("width", x.bandwidth())
+                  .attr("height", y.bandwidth())
+                  .attr("fill", colorScales[j](v))
+                  .attr("rx", 3)
+                  .attr("stroke", "#e5e7eb")
+                  .attr("stroke-width", 0.5)
+                  .on("mouseover", (ev) => {{
+                    tooltip.style("opacity", 1)
+                      .html(r + " / " + c + ": " + d3.format(",.2f")(v))
+                      .style("left", (ev.pageX + 10) + "px")
+                      .style("top", (ev.pageY - 28) + "px");
+                  }})
+                  .on("mouseout", () => tooltip.style("opacity", 0));
+
+                g.append("text")
+                  .attr("x", x(c) + x.bandwidth() / 2)
+                  .attr("y", y(r) + y.bandwidth() / 2)
+                  .attr("text-anchor", "middle")
+                  .attr("dominant-baseline", "central")
+                  .attr("font-size", "11px")
+                  .attr("fill", v / (colMaxes[j] || 1) > 0.6 ? "#fff" : "{c["text"]}")
+                  .text(d3.format(",.1f")(v));
+              }});
+            }});
+
+            g.append("g")
+              .attr("class", "axis")
+              .attr("transform", "translate(0," + chartHeight + ")")
+              .call(d3.axisBottom(x).tickSize(0))
+              .selectAll("text")
+              .attr("transform", "rotate(-40)")
+              .attr("dx", "-0.6em")
+              .attr("dy", "0.25em")
+              .style("text-anchor", "end")
+              .style("font-size", "10px");
+
+            g.append("g")
+              .attr("class", "axis")
+              .call(d3.axisLeft(y).tickSize(0));
+          }}
+        </script>
+      </body>
+    </html>
+    """
+    return dedent(html)
+
+
 def create_comparison_bar_d3_html(
     data: dict,
     value_key: str,
diff --git a/src/globi/tools/visualization/results_data.py b/src/globi/tools/visualization/results_data.py
index aa50eda..a7ccaf8 100644
--- a/src/globi/tools/visualization/results_data.py
+++ b/src/globi/tools/visualization/results_data.py
@@ -230,6 +230,24 @@ def _get_per_building_utilities(df: pd.DataFrame) -> pd.DataFrame | None:
     return cast(pd.DataFrame, result)
 
 
+def _get_total_conditioned_area(df: pd.DataFrame) -> float:
+    """Sum conditioned area (m²) across all buildings from index."""
+    area_name = "feature.geometry.energy_model_conditioned_area"
+    if area_name not in (df.index.names or []):
+        return 0.0
+    level = list(df.index.names).index(area_name)
+    areas = df.index.get_level_values(level)
+    total = 0.0
+    for a in areas:
+        try:
+            v = float(a)
+            if v > 0:
+                total += v
+        except (TypeError, ValueError):
+            pass
+    return total
+
+
 def compute_per_building_cost_emissions(
     df: pd.DataFrame,
     energy_cost_factors: dict[str, float],
@@ -259,7 +277,7 @@ def build_retrofit_map_df(
     df: pd.DataFrame,
     energy_cost_factors: dict[str, float],
     emissions_factors: dict[str, float],
-    unit_cost: float = 0.0,
+    system_cost_per_sqm: float = 0.0,
     cart_crs: str = "EPSG:3857",
 ) -> pd.DataFrame | None:
     """Build map-ready df with geometry and retrofit metrics (eui, energy_cost, emissions, etc)."""
@@ -297,69 +315,79 @@ def build_retrofit_map_df(
         geo_df["energy_cost"] = 0.0
         geo_df["emissions"] = 0.0
 
-    n = len(geo_df)
-    geo_df["capital_cost"] = (unit_cost / n) if n > 0 and unit_cost else 0.0
+    if "conditioned_area" in geo_df.columns and system_cost_per_sqm:
+        geo_df["capital_cost"] = geo_df["conditioned_area"] * system_cost_per_sqm
+    else:
+        geo_df["capital_cost"] = 0.0
     geo_df["total_cost"] = geo_df["energy_cost"] + geo_df["capital_cost"]
     return geo_df
 
 
 def compute_retrofit_cost_emissions(
     dfs: dict[str, pd.DataFrame],
-    energy_cost_factors: dict[str, float],
-    emissions_factors: dict[str, float],
-    unit_costs: dict[str, float] | None = None,
+    per_scenario_energy_costs: dict[str, dict[str, float]],
+    per_scenario_emissions: dict[str, dict[str, float]],
+    system_costs_per_sqm: dict[str, float] | None = None,
 ) -> tuple[dict[str, dict[str, float]], dict[str, dict[str, float]], dict[str, float]]:
     """Compute energy cost and emissions by scenario from Utilities consumption.
 
+    Each scenario can have its own energy cost and emissions factors.
+
     Returns:
         cost_by_fuel: scenario -> fuel -> $ (annual energy cost)
         emissions_by_fuel: scenario -> fuel -> kg CO2
-        capital_costs: scenario -> $ (from unit_costs)
+        capital_costs: scenario -> $ (system_cost_per_sqm * total area)
     """
     cost_by_fuel: dict[str, dict[str, float]] = {}
     emissions_by_fuel: dict[str, dict[str, float]] = {}
-    capital_costs: dict[str, float] = dict(unit_costs or {})
+    capital_costs: dict[str, float] = {}
 
     for scenario_name, df in dfs.items():
         utilities = _get_utilities_kwh_by_fuel(df)
+        energy_cost_factors = per_scenario_energy_costs.get(scenario_name, {})
+        em_factors = per_scenario_emissions.get(scenario_name, {})
         cost_by_fuel[scenario_name] = {}
         emissions_by_fuel[scenario_name] = {}
 
         for meter, kwh in utilities.items():
             fuel_key = normalize_fuel_name(meter)
             cost_factor = energy_cost_factors.get(fuel_key, 0.0)
-            emissions_factor = emissions_factors.get(fuel_key, 0.0)
+            emissions_factor = em_factors.get(fuel_key, 0.0)
             cost_by_fuel[scenario_name][meter] = kwh * cost_factor
             emissions_by_fuel[scenario_name][meter] = kwh * emissions_factor
 
+        if system_costs_per_sqm:
+            cost_per_sqm = system_costs_per_sqm.get(scenario_name, 0.0)
+            total_area = _get_total_conditioned_area(df)
+            capital_costs[scenario_name] = cost_per_sqm * total_area
+
     return cost_by_fuel, emissions_by_fuel, capital_costs
 
 
 def extract_retrofit_comparison_data(
     dfs: dict[str, pd.DataFrame],
     region_name: str = "",
-    energy_cost_factors: dict[str, float] | None = None,
-    emissions_factors: dict[str, float] | None = None,
-    unit_costs: dict[str, float] | None = None,
+    per_scenario_energy_costs: dict[str, dict[str, float]] | None = None,
+    per_scenario_emissions: dict[str, dict[str, float]] | None = None,
+    system_costs_per_sqm: dict[str, float] | None = None,
 ) -> dict:
-    """Extract comparison data with optional cost and emissions.
+    """Extract comparison data with optional per-scenario cost and emissions.
 
     Merges extract_comparison_data output with cost_data, emissions_data,
     cost_by_fuel, emissions_by_fuel when factors are provided.
     """
     base = extract_comparison_data(dfs, region_name)
 
-    if energy_cost_factors or emissions_factors:
+    if per_scenario_energy_costs or per_scenario_emissions:
         cost_by_fuel, emissions_by_fuel, capital = compute_retrofit_cost_emissions(
             dfs,
-            energy_cost_factors or {},
-            emissions_factors or {},
-            unit_costs,
+            per_scenario_energy_costs or {},
+            per_scenario_emissions or {},
+            system_costs_per_sqm,
         )
         base["cost_by_fuel"] = cost_by_fuel
         base["emissions_by_fuel"] = emissions_by_fuel
         base["capital_costs"] = capital
-        # totals for bar chart
         base["cost_totals"] = {
             s: sum(cf.values()) + capital.get(s, 0) for s, cf in cost_by_fuel.items()
         }
diff --git a/src/globi/tools/visualization/utils.py b/src/globi/tools/visualization/utils.py
index 6812a8f..53dec44 100644
--- a/src/globi/tools/visualization/utils.py
+++ b/src/globi/tools/visualization/utils.py
@@ -75,17 +75,67 @@ def find_output_run_dirs(base_dir: Path | str) -> list[Path]:
     return sorted(seen)
 
 
-OVERHEATING_PQ_NAMES = ("BasicOverheating.pq", "BasicOverheating.parquet")
+OVERHEATING_DF_KEYS = (
+    "BasicOverheating",
+    "ExceedanceDegreeHours",
+    "HeatIndexCategories",
+)
+OVERHEATING_FILE_MAP = {
+    "BasicOverheating": ("BasicOverheating.pq", "BasicOverheating.parquet"),
+    "ExceedanceDegreeHours": (
+        "ExceedanceDegreeHours.pq",
+        "ExceedanceDegreeHours.parquet",
+    ),
+    "HeatIndexCategories": ("HeatIndexCategories.pq", "HeatIndexCategories.parquet"),
+}
 
 
 def run_has_overheating(run_dir: Path) -> bool:
-    """True if run directory contains overheating output (BasicOverheating)."""
-    return any((run_dir / name).is_file() for name in OVERHEATING_PQ_NAMES)
+    """True if run directory contains any overheating output."""
+    return any(
+        (run_dir / name).is_file()
+        for names in OVERHEATING_FILE_MAP.values()
+        for name in names
+    )
+
 
+def list_overheating_files_for_run(run_dir: Path) -> list[str]:
+    """Return list of available overheating df keys (e.g. BasicOverheating, ExceedanceDegreeHours)."""
+    available: list[str] = []
+    for key, names in OVERHEATING_FILE_MAP.items():
+        if any((run_dir / n).is_file() for n in names):
+            available.append(key)
+    return available
 
-def get_overheating_file_for_run(run_dir: Path) -> Path | None:
-    """Return BasicOverheating file path if present."""
-    for name in OVERHEATING_PQ_NAMES:
+
+def get_overheating_thresholds(run_dir: Path) -> list[float]:
+    """Read available heat thresholds from BasicOverheating (or ExceedanceDegreeHours)."""
+    for key in ("BasicOverheating", "ExceedanceDegreeHours"):
+        oh_path = get_overheating_file_for_run(run_dir, key)
+        if oh_path is None:
+            continue
+        df = load_output_table(oh_path)
+        flat = df.reset_index()
+        thresh_col = _find_col(flat, "Threshold [degC]")
+        polarity_col = _find_col(flat, "Polarity")
+        if thresh_col is None:
+            continue
+        if polarity_col is not None:
+            flat = flat[flat[polarity_col] == "Overheat"]
+        vals = sorted(pd.Series(flat[thresh_col]).dropna().unique().tolist())
+        if vals:
+            return vals
+    return [26.0, 30.0, 35.0]
+
+
+def get_overheating_file_for_run(
+    run_dir: Path, df_key: str = "BasicOverheating"
+) -> Path | None:
+    """Return overheating file path for given df_key if present."""
+    names = OVERHEATING_FILE_MAP.get(
+        df_key, ("BasicOverheating.pq", "BasicOverheating.parquet")
+    )
+    for name in names:
         p = run_dir / name
         if p.is_file():
             return p
@@ -429,6 +479,7 @@ def build_map_df_from_output(  # noqa: C901
             LON_COL: float(lon),
             ROTATED_RECTANGLE_COL: wkt,
             "height": height_m,
+            "conditioned_area": area,
             "eui": eui,
             "peak_per_sqm": peak_per_sqm,
             "total_energy": total_energy,
@@ -451,24 +502,117 @@ def build_map_df_from_output(  # noqa: C901
     return out
 
 
+def _extract_basic_overheating(
+    oh_flat: pd.DataFrame,
+    bid_col,
+    heat_threshold_c: float,
+    aggregation: str,
+) -> pd.DataFrame | None:
+    """Extract building-level overheating hours from BasicOverheating flat df."""
+    polarity_col = _find_col(oh_flat, "Polarity")
+    thresh_col = _find_col(oh_flat, "Threshold [degC]")
+    agg_col = _find_col(oh_flat, "Aggregation Unit")
+    group_col = _find_col(oh_flat, "Group")
+    val_col = "Total Hours [hr]" if "Total Hours [hr]" in oh_flat.columns else None
+    if not all([polarity_col, thresh_col, agg_col, group_col, val_col]):
+        return None
+    mask = (
+        (oh_flat[polarity_col] == "Overheat")
+        & (oh_flat[thresh_col] == heat_threshold_c)
+        & (oh_flat[agg_col] == "Building")
+        & (oh_flat[group_col] == aggregation)
+    )
+    oh_sub = oh_flat.loc[mask, [bid_col, val_col]].drop_duplicates(subset=[bid_col])
+    oh_sub = oh_sub.rename(columns={val_col: "map_value"})
+    return oh_sub
+
+
+def _extract_exceedance_degree_hours(
+    oh_flat: pd.DataFrame,
+    bid_col,
+    heat_threshold_c: float,
+    aggregation: str,
+) -> pd.DataFrame | None:
+    """Extract building-level EDH from ExceedanceDegreeHours flat df."""
+    polarity_col = _find_col(oh_flat, "Polarity")
+    thresh_col = _find_col(oh_flat, "Threshold [degC]")
+    agg_col = _find_col(oh_flat, "Aggregation Unit")
+    group_col = _find_col(oh_flat, "Group")
+    val_col = "EDH [degC-hr]" if "EDH [degC-hr]" in oh_flat.columns else None
+    if not all([polarity_col, thresh_col, agg_col, group_col, val_col]):
+        return None
+    mask = (
+        (oh_flat[polarity_col] == "Overheat")
+        & (oh_flat[thresh_col] == heat_threshold_c)
+        & (oh_flat[agg_col] == "Building")
+        & (oh_flat[group_col] == aggregation)
+    )
+    oh_sub = oh_flat.loc[mask, [bid_col, val_col]].drop_duplicates(subset=[bid_col])
+    oh_sub = oh_sub.rename(columns={val_col: "map_value"})
+    return oh_sub
+
+
+def _extract_heat_index_categories(
+    oh_flat: pd.DataFrame,
+    bid_col,
+    aggregation: str,
+    metric: str,
+) -> pd.DataFrame | None:
+    """Extract building-level heat index metric from HeatIndexCategories flat df."""
+    agg_col = _find_col(oh_flat, "Aggregation Unit")
+    group_col = _find_col(oh_flat, "Group")
+    if agg_col is None or group_col is None:
+        return None
+    # map UI aggregation to HeatIndex Group values
+    group_map = {"Zone Weighted": "Zone Weighted", "Worst Zone": "Worst per Timestep"}
+    group_val = group_map.get(aggregation, aggregation)
+    mask = (oh_flat[agg_col] == "Building") & (oh_flat[group_col] == group_val)
+    hi_sub = oh_flat.loc[mask].copy()
+    if hi_sub.empty:
+        return None
+    danger_cols = [
+        c
+        for c in hi_sub.columns
+        if c
+        in (
+            "Extreme Danger [hr]",
+            "Danger [hr]",
+            "Extreme Caution [hr]",
+            "Caution [hr]",
+        )
+    ]
+    if metric == "danger_hours" and danger_cols:
+        hi_sub["map_value"] = hi_sub[danger_cols].sum(axis=1)
+    elif metric in hi_sub.columns:
+        hi_sub["map_value"] = hi_sub[metric]
+    else:
+        return None
+    oh_sub = hi_sub[[bid_col, "map_value"]].drop_duplicates(subset=[bid_col])
+    return oh_sub
+
+
 def build_overheating_map_df(
     run_dir: Path,
     cart_crs: str = "EPSG:3857",
     heat_threshold_c: float = 26.0,
     aggregation: str = "Zone Weighted",
+    data_source_type: str = "BasicOverheating",
+    heat_index_metric: str = "danger_hours",
 ) -> pd.DataFrame | None:
-    """Build map-ready df with overheating hours per building.
+    """Build map-ready df with overheating metric per building.
 
-    Merges BasicOverheating (hours above threshold) with EnergyAndPeak geometry.
-    Returns df with lat, lon, rotated_rectangle, height, overheating_hours.
+    Merges overheating data with EnergyAndPeak geometry. Returns df with lat, lon,
+    rotated_rectangle, height, map_value.
 
     Args:
-        run_dir: Run directory containing BasicOverheating and EnergyAndPeak.
+        run_dir: Run directory containing overheating and EnergyAndPeak files.
         cart_crs: CRS for rotated_rectangle.
-        heat_threshold_c: Overheating threshold (default 26C).
-        aggregation: "Zone Weighted" or "Worst Zone".
+        heat_threshold_c: Overheating threshold (BasicOverheating, ExceedanceDegreeHours).
+        aggregation: Zone Weighted, Worst Zone, etc.
+        data_source_type: BasicOverheating, ExceedanceDegreeHours, or HeatIndexCategories.
+        heat_index_metric: For HeatIndexCategories: danger_hours or column name.
     """
-    oh_path = get_overheating_file_for_run(run_dir)
+    oh_path = get_overheating_file_for_run(run_dir, data_source_type)
     energy_path = get_pq_file_for_run(run_dir)
     if oh_path is None or energy_path is None:
         return None
@@ -485,29 +629,186 @@ def build_overheating_map_df(
     if bid_col is None:
         return None
 
-    polarity_col = _find_col(oh_flat, "Polarity")
-    thresh_col = _find_col(oh_flat, "Threshold [degC]")
-    agg_col = _find_col(oh_flat, "Aggregation Unit")
-    group_col = _find_col(oh_flat, "Group")
-    val_col = "Total Hours [hr]" if "Total Hours [hr]" in oh_flat.columns else None
-    if not all([polarity_col, thresh_col, agg_col, group_col, val_col]):
+    if data_source_type == "BasicOverheating":
+        oh_sub = _extract_basic_overheating(
+            oh_flat, bid_col, heat_threshold_c, aggregation
+        )
+    elif data_source_type == "ExceedanceDegreeHours":
+        oh_sub = _extract_exceedance_degree_hours(
+            oh_flat, bid_col, heat_threshold_c, aggregation
+        )
+    elif data_source_type == "HeatIndexCategories":
+        oh_sub = _extract_heat_index_categories(
+            oh_flat, bid_col, aggregation, heat_index_metric
+        )
+    else:
         return None
 
-    mask = (
-        (oh_flat[polarity_col] == "Overheat")
-        & (oh_flat[thresh_col] == heat_threshold_c)
-        & (oh_flat[agg_col] == "Building")
-        & (oh_flat[group_col] == aggregation)
-    )
-    oh_sub = oh_flat.loc[mask, [bid_col, val_col]].drop_duplicates(subset=[bid_col])
-    oh_sub = oh_sub.rename(columns={val_col: "overheating_hours"})
-    oh_sub[bid_col] = oh_sub[bid_col].astype(str)
+    if oh_sub is None or oh_sub.empty:
+        return None
 
+    oh_sub[bid_col] = oh_sub[bid_col].astype(str)
     geo_df[BUILDING_ID_COL] = geo_df[BUILDING_ID_COL].astype(str)
     merged = geo_df.merge(oh_sub, on=BUILDING_ID_COL, how="inner")
     return merged if not merged.empty else None
 
 
+def _load_one_overheating_metric(
+    run_dir: Path,
+    df_key: str,
+    bid_col: str,
+    heat_threshold_c: float,
+    aggregation: str,
+) -> pd.DataFrame | None:
+    """Load one overheating metric as building_id + value df."""
+    oh_path = get_overheating_file_for_run(run_dir, df_key)
+    if not oh_path:
+        return None
+    df = load_output_table(oh_path)
+    flat = df.reset_index()
+    bid = _find_col(flat, bid_col)
+    if not bid:
+        return None
+    if df_key == "BasicOverheating":
+        sub = _extract_basic_overheating(flat, bid, heat_threshold_c, aggregation)
+        col_name = "BasicOverheating_hr"
+    elif df_key == "ExceedanceDegreeHours":
+        sub = _extract_exceedance_degree_hours(flat, bid, heat_threshold_c, aggregation)
+        col_name = "ExceedanceDegreeHours"
+    elif df_key == "HeatIndexCategories":
+        sub = _extract_heat_index_categories(flat, bid, aggregation, "danger_hours")
+        col_name = "HeatIndex_danger_hr"
+    else:
+        return None
+    if sub is None:
+        return None
+    sub = sub.rename(columns={"map_value": col_name})
+    sub[bid] = sub[bid].astype(str)
+    return sub
+
+
+def _summarize_values(vals) -> dict[str, float]:
+    """Compute mean, median, p95, max from a numeric array."""
+    import numpy as np
+
+    return {
+        "mean": float(np.mean(vals)),
+        "median": float(np.median(vals)),
+        "p95": float(np.percentile(vals, 95)),
+        "max": float(np.max(vals)),
+    }
+
+
+def _build_basic_edh_records(
+    run_dir: Path,
+    available: list[str],
+    thresholds: list[float],
+    aggregation: str,
+) -> dict[str, dict[str, float]]:
+    records: dict[str, dict[str, float]] = {}
+    for df_key in ("BasicOverheating", "ExceedanceDegreeHours"):
+        if df_key not in available:
+            continue
+        label_prefix = "Basic" if df_key == "BasicOverheating" else "EDH"
+        for thresh in thresholds:
+            sub = _load_one_overheating_metric(
+                run_dir, df_key, BUILDING_ID_COL, thresh, aggregation
+            )
+            if sub is None or sub.empty:
+                continue
+            vals = sub.iloc[:, -1].dropna().values
+            if len(vals) > 0:
+                records[f"{label_prefix} {thresh}C"] = _summarize_values(vals)
+    return records
+
+
+def _build_heat_index_record(
+    run_dir: Path,
+    available: list[str],
+    aggregation: str,
+) -> dict[str, dict[str, float]] | None:
+    if "HeatIndexCategories" not in available:
+        return None
+    sub = _load_one_overheating_metric(
+        run_dir, "HeatIndexCategories", BUILDING_ID_COL, 0.0, aggregation
+    )
+    if sub is None or sub.empty:
+        return None
+    vals = sub.iloc[:, -1].dropna().values
+    if len(vals) == 0:
+        return None
+    return {"HeatIndex discomfort": _summarize_values(vals)}
+
+
+def build_overheating_summary_df(
+    run_dir: Path,
+    aggregation: str = "Zone Weighted",
+) -> pd.DataFrame | None:
+    """Build summary stats (mean, median, max, p95) per metric and threshold.
+
+    Rows = stat names, columns = metric/threshold combos. Suitable for heatmap.
+    """
+    available = list_overheating_files_for_run(run_dir)
+    if not available:
+        return None
+
+    thresholds = get_overheating_thresholds(run_dir)
+    records: dict[str, dict[str, float]] = {}
+    records.update(
+        _build_basic_edh_records(run_dir, available, thresholds, aggregation)
+    )
+    hi_rec = _build_heat_index_record(run_dir, available, aggregation)
+    if hi_rec:
+        records.update(hi_rec)
+
+    if not records:
+        return None
+
+    df = pd.DataFrame(records)
+    df.index.name = "statistic"
+    return df.reset_index()
+
+
+def load_heat_index_summary_for_chart(
+    run_dir: Path,
+    aggregation: str = "Zone Weighted",
+) -> dict[str, float] | None:
+    """Load HeatIndexCategories and return summed hours by category for stacked bar.
+
+    Returns dict like {"Extreme Danger [hr]": 0, "Danger [hr]": 10, ...}.
+    """
+    oh_path = get_overheating_file_for_run(run_dir, "HeatIndexCategories")
+    if oh_path is None:
+        return None
+    oh_df = load_output_table(oh_path)
+    oh_flat = oh_df.reset_index()
+    agg_col = _find_col(oh_flat, "Aggregation Unit")
+    group_col = _find_col(oh_flat, "Group")
+    if agg_col is None or group_col is None:
+        return None
+    group_map = {"Zone Weighted": "Zone Weighted", "Worst Zone": "Worst per Timestep"}
+    group_val = group_map.get(aggregation, aggregation)
+    mask = (oh_flat[agg_col] == "Building") & (oh_flat[group_col] == group_val)
+    hi_sub = oh_flat.loc[mask]
+    if hi_sub.empty:
+        return None
+    cat_cols = [
+        c
+        for c in hi_sub.columns
+        if c
+        in (
+            "Extreme Danger [hr]",
+            "Danger [hr]",
+            "Extreme Caution [hr]",
+            "Caution [hr]",
+            "Normal [hr]",
+        )
+    ]
+    if not cat_cols:
+        return None
+    return hi_sub[cat_cols].sum().to_dict()
+
+
 def merge_with_building_locations(  # noqa: C901
     df: pd.DataFrame,
     locations_df: pd.DataFrame,
diff --git a/src/globi/tools/visualization/views/use_cases.py b/src/globi/tools/visualization/views/use_cases.py
index 0f2b2bc..4d5d118 100644
--- a/src/globi/tools/visualization/views/use_cases.py
+++ b/src/globi/tools/visualization/views/use_cases.py
@@ -2,6 +2,8 @@
 
 from __future__ import annotations
 
+from typing import cast
+
 import pandas as pd
 import streamlit as st
 import streamlit.components.v1 as components
@@ -9,9 +11,12 @@
 from globi.tools.visualization.data_sources import DataSource
 from globi.tools.visualization.models import UseCaseType
 from globi.tools.visualization.plotting import (
+    Theme,
     create_comparison_bar_d3_html,
     create_comparison_kde_d3_html,
     create_comparison_stacked_bar_d3_html,
+    create_histogram_d3_html,
+    create_overheating_heatmap_d3_html,
 )
 from globi.tools.visualization.results_data import (
     apply_scenario_display_names,
@@ -20,6 +25,7 @@
     is_results_format,
     normalize_fuel_name,
 )
+from globi.tools.visualization.views.raw_data import _chart_download, _streamlit_theme
 
 
 def render_use_cases_page(data_source: DataSource) -> None:
@@ -46,55 +52,141 @@ def render_use_cases_page(data_source: DataSource) -> None:
 _DEFAULT_EMISSIONS = (0.4, 0.2, 0.27, 0.23)
 
 
+def _build_eui_csv(comparison_data: dict) -> pd.DataFrame:
+    """Build csv-friendly dataframe from eui distribution data."""
+    eui = comparison_data.get("eui_data", {})
+    if not eui:
+        return pd.DataFrame()
+    max_len = max((len(v) for v in eui.values()), default=0)
+    return pd.DataFrame({k: v + [None] * (max_len - len(v)) for k, v in eui.items()})
+
+
+def _build_stacked_csv(comparison_data: dict, data_key: str) -> pd.DataFrame:
+    """Build csv-friendly dataframe from stacked bar data."""
+    data = comparison_data.get(data_key, {})
+    if not data:
+        return pd.DataFrame()
+    df = pd.DataFrame(data).T.fillna(0)
+    df.index.name = "scenario"
+    return df.reset_index()
+
+
+def _build_totals_csv(
+    comparison_data: dict,
+    value_key: str,
+    label: str = "value",
+) -> pd.DataFrame:
+    """Build csv-friendly dataframe from scenario totals."""
+    scenarios = comparison_data.get("scenarios", [])
+    values = comparison_data.get(value_key, {})
+    if not values:
+        return pd.DataFrame()
+    return pd.DataFrame({
+        "scenario": [s for s in scenarios if s in values],
+        label: [values[s] for s in scenarios if s in values],
+    })
+
+
+def _uniquify_display_names(
+    run_ids,
+    raw_names: dict[str, str],
+) -> dict[str, str]:
+    """Build run_id -> unique display name mapping, appending (n) on collisions."""
+    seen: dict[str, int] = {}
+    out: dict[str, str] = {}
+    for rid in run_ids:
+        d = (raw_names.get(rid, rid) or "").strip() or rid
+        if d in seen:
+            seen[d] += 1
+            d = f"{d} ({seen[d]})"
+        else:
+            seen[d] = 1
+        out[rid] = d
+    return out
+
+
 def _retrofit_params_form(
     selected_runs: list[str],
-) -> tuple[dict, dict, dict | None, bool]:
-    """Render retrofit cost/emissions form, return factors and unit costs."""
-    st.markdown("**Energy cost factors** ($/kWh by fuel type)")
-    energy_cost_factors = {}
-    emissions_factors = {}
-    ec_cols = st.columns(4)
-    for i, (label, default) in enumerate(
-        zip(_FUEL_LABELS, _DEFAULT_ENERGY_COSTS, strict=True)
-    ):
-        with ec_cols[i % 4]:
-            key = normalize_fuel_name(label)
-            energy_cost_factors[key] = st.number_input(
-                label,
-                min_value=0.0,
-                value=default,
-                format="%.3f",
-                key=f"ec_{key}",
+) -> tuple[
+    dict[str, dict[str, float]],
+    dict[str, dict[str, float]],
+    dict[str, float],
+    dict[str, str],
+]:
+    """Render per-scenario cost/emissions/system-cost form.
+
+    Returns (per_scenario_energy_costs, per_scenario_emissions,
+    system_costs_per_sqm, display_names).  All dicts keyed by run_id.
+    """
+    per_scenario_energy_costs: dict[str, dict[str, float]] = {}
+    per_scenario_emissions: dict[str, dict[str, float]] = {}
+    system_costs_per_sqm: dict[str, float] = {}
+    display_names: dict[str, str] = {}
+
+    for run_id in selected_runs:
+        with st.expander(
+            f"Parameters: {run_id}",
+            expanded=(run_id == selected_runs[0]),
+        ):
+            val = st.text_input(
+                "Display name",
+                value=run_id,
+                key=f"retrofit_display_{run_id}",
+                placeholder=run_id,
+            )
+            display_names[run_id] = (val.strip() or run_id) if val else run_id
+
+            st.markdown("**Energy cost factors** ($/kWh by fuel type)")
+            ec_factors: dict[str, float] = {}
+            ec_cols = st.columns(4)
+            for i, (label, default) in enumerate(
+                zip(_FUEL_LABELS, _DEFAULT_ENERGY_COSTS, strict=True)
+            ):
+                with ec_cols[i % 4]:
+                    key = normalize_fuel_name(label)
+                    ec_factors[key] = st.number_input(
+                        label,
+                        min_value=0.0,
+                        value=default,
+                        format="%.3f",
+                        key=f"ec_{key}_{run_id}",
+                    )
+            per_scenario_energy_costs[run_id] = ec_factors
+
+            st.markdown("**Emissions factors** (kg CO2/kWh by fuel type)")
+            em_factors: dict[str, float] = {}
+            em_cols = st.columns(4)
+            for i, (label, default) in enumerate(
+                zip(_FUEL_LABELS, _DEFAULT_EMISSIONS, strict=True)
+            ):
+                with em_cols[i % 4]:
+                    key = normalize_fuel_name(label)
+                    em_factors[key] = st.number_input(
+                        label,
+                        min_value=0.0,
+                        value=default,
+                        format="%.3f",
+                        key=f"em_{key}_{run_id}",
+                    )
+            per_scenario_emissions[run_id] = em_factors
+
+            st.markdown(
+                "**System cost** ($/m² applied per building by conditioned area)"
             )
-    st.markdown("**Emissions factors** (kg CO2/kWh by fuel type)")
-    em_cols = st.columns(4)
-    for i, (label, default) in enumerate(
-        zip(_FUEL_LABELS, _DEFAULT_EMISSIONS, strict=True)
-    ):
-        with em_cols[i % 4]:
-            key = normalize_fuel_name(label)
-            emissions_factors[key] = st.number_input(
-                label,
+            system_costs_per_sqm[run_id] = st.number_input(
+                "System cost ($/m²)",
                 min_value=0.0,
-                value=default,
-                format="%.3f",
-                key=f"em_{key}",
+                value=0.0,
+                format="%.2f",
+                key=f"syscost_{run_id}",
             )
-    st.markdown("**Unit costs** (capital cost $ per scenario, optional)")
-    use_unit_costs = st.checkbox("Include capital costs per scenario", value=False)
-    unit_costs: dict[str, float] = {}
-    if use_unit_costs:
-        uc_cols = st.columns(min(4, len(selected_runs)))
-        for i, run_id in enumerate(selected_runs):
-            with uc_cols[i % 4]:
-                unit_costs[run_id] = st.number_input(
-                    run_id,
-                    min_value=0.0,
-                    value=0.0,
-                    format="%.0f",
-                    key=f"uc_{run_id}",
-                )
-    return energy_cost_factors, emissions_factors, unit_costs or None, use_unit_costs
+
+    return (
+        per_scenario_energy_costs,
+        per_scenario_emissions,
+        system_costs_per_sqm,
+        display_names,
+    )
 
 
 def _render_retrofit_use_case(data_source: DataSource) -> None:
@@ -102,7 +194,8 @@ def _render_retrofit_use_case(data_source: DataSource) -> None:
     st.markdown("### Retrofit Analysis")
     st.markdown(
         "Compare retrofit scenarios with energy savings, costs, and emissions. "
-        "Enter unit costs, emissions factors, and energy cost factors to see cost/emissions comparison."
+        "Each scenario has its own energy cost factors, emissions factors, and "
+        "system cost ($/m², applied per building by conditioned area)."
     )
 
     available_runs = data_source.list_available_runs()
@@ -118,9 +211,12 @@ def _render_retrofit_use_case(data_source: DataSource) -> None:
     )
 
     with st.expander("Retrofit cost and emissions parameters", expanded=True):
-        energy_cost_factors, emissions_factors, unit_costs, use_unit_costs = (
-            _retrofit_params_form(selected_runs)
-        )
+        (
+            per_scenario_energy_costs,
+            per_scenario_emissions,
+            system_costs_per_sqm,
+            display_names,
+        ) = _retrofit_params_form(selected_runs)
 
     if len(selected_runs) < 2:
         st.info("Select at least 2 scenarios to generate a comparison.")
@@ -146,34 +242,55 @@ def _render_retrofit_use_case(data_source: DataSource) -> None:
         st.error("Could not load enough valid scenarios for comparison.")
         return
 
+    # build unique display-name mapping (disambiguate duplicates)
+    name_map = _uniquify_display_names(dfs.keys(), display_names)
+
+    # remap all dicts to display names so charts/map use them throughout
+    dfs = {name_map[k]: v for k, v in dfs.items()}
+    per_scenario_energy_costs = {
+        name_map.get(k, k): v for k, v in per_scenario_energy_costs.items()
+    }
+    per_scenario_emissions = {
+        name_map.get(k, k): v for k, v in per_scenario_emissions.items()
+    }
+    system_costs_per_sqm = {
+        name_map.get(k, k): v for k, v in system_costs_per_sqm.items()
+    }
+
     with st.spinner("Building comparison dashboard..."):
         comparison_data = extract_retrofit_comparison_data(
             dfs,
             region_name="",
-            energy_cost_factors=energy_cost_factors,
-            emissions_factors=emissions_factors,
-            unit_costs=unit_costs if use_unit_costs else None,
+            per_scenario_energy_costs=per_scenario_energy_costs,
+            per_scenario_emissions=per_scenario_emissions,
+            system_costs_per_sqm=system_costs_per_sqm,
         )
         _render_retrofit_charts(
             comparison_data,
             dfs=dfs,
-            energy_cost_factors=energy_cost_factors,
-            emissions_factors=emissions_factors,
-            unit_costs=unit_costs if use_unit_costs else None,
+            per_scenario_energy_costs=per_scenario_energy_costs,
+            per_scenario_emissions=per_scenario_emissions,
+            system_costs_per_sqm=system_costs_per_sqm,
         )
 
 
 def _render_retrofit_charts(
     comparison_data: dict,
     dfs: dict[str, pd.DataFrame] | None = None,
-    energy_cost_factors: dict[str, float] | None = None,
-    emissions_factors: dict[str, float] | None = None,
-    unit_costs: dict[str, float] | None = None,
+    per_scenario_energy_costs: dict[str, dict[str, float]] | None = None,
+    per_scenario_emissions: dict[str, dict[str, float]] | None = None,
+    system_costs_per_sqm: dict[str, float] | None = None,
 ) -> None:
     """Render retrofit comparison charts (EUI, end uses, fuel, cost, emissions) and map."""
     st.markdown("#### EUI distribution comparison")
     kde_html = create_comparison_kde_d3_html(comparison_data)
     components.html(kde_html, height=360, scrolling=False)
+    _chart_download(
+        "retro_kde",
+        _build_eui_csv(comparison_data).to_csv(index=False),
+        kde_html,
+        "eui_distribution",
+    )
 
     col_left, col_right = st.columns(2)
     with col_left:
@@ -185,6 +302,12 @@ def _render_retrofit_charts(
             title="end uses comparison",
         )
         components.html(eu_html, height=360, scrolling=False)
+        _chart_download(
+            "retro_eu",
+            _build_stacked_csv(comparison_data, "end_uses_data").to_csv(index=False),
+            eu_html,
+            "end_uses",
+        )
     with col_right:
         st.markdown("#### Fuel/utilities comparison")
         fuel_html = create_comparison_stacked_bar_d3_html(
@@ -194,9 +317,15 @@ def _render_retrofit_charts(
             title="fuel/utilities comparison",
         )
         components.html(fuel_html, height=360, scrolling=False)
+        _chart_download(
+            "retro_fuel",
+            _build_stacked_csv(comparison_data, "utilities_data").to_csv(index=False),
+            fuel_html,
+            "fuel_utilities",
+        )
 
     if comparison_data.get("cost_totals"):
-        st.markdown("#### Total cost comparison (energy + capital)")
+        st.markdown("#### Total cost comparison (energy + system)")
         cost_data = {
             "scenarios": comparison_data["scenarios"],
             "cost_totals": comparison_data["cost_totals"],
@@ -208,6 +337,14 @@ def _render_retrofit_charts(
             value_label="annual cost ($)",
         )
         components.html(cost_html, height=200, scrolling=False)
+        _chart_download(
+            "retro_cost",
+            _build_totals_csv(comparison_data, "cost_totals", "annual_cost_usd").to_csv(
+                index=False
+            ),
+            cost_html,
+            "total_cost",
+        )
 
     if comparison_data.get("emissions_totals"):
         st.markdown("#### Total emissions comparison")
@@ -222,21 +359,29 @@ def _render_retrofit_charts(
             value_label="kg CO2/year",
         )
         components.html(em_html, height=200, scrolling=False)
+        _chart_download(
+            "retro_em",
+            _build_totals_csv(
+                comparison_data, "emissions_totals", "kg_co2_per_year"
+            ).to_csv(index=False),
+            em_html,
+            "emissions",
+        )
 
-    if dfs and energy_cost_factors is not None and emissions_factors is not None:
+    if dfs and per_scenario_energy_costs and per_scenario_emissions:
         _render_retrofit_map(
             dfs,
-            energy_cost_factors,
-            emissions_factors,
-            unit_costs or {},
+            per_scenario_energy_costs,
+            per_scenario_emissions,
+            system_costs_per_sqm or {},
         )
 
 
 def _render_retrofit_map(
     dfs: dict[str, pd.DataFrame],
-    energy_cost_factors: dict[str, float],
-    emissions_factors: dict[str, float],
-    unit_costs: dict[str, float],
+    per_scenario_energy_costs: dict[str, dict[str, float]],
+    per_scenario_emissions: dict[str, dict[str, float]],
+    system_costs_per_sqm: dict[str, float],
 ) -> None:
     """Render pydeck map with selectable metric and colormap."""
     from globi.tools.visualization.plotting import create_building_map_deck
@@ -260,7 +405,7 @@ def _render_retrofit_map(
                 ("total_energy", "viridis", "Total energy (kWh)"),
                 ("energy_cost", "reds", "Energy cost ($)"),
                 ("emissions", "reds", "Emissions (kg CO2)"),
-                ("capital_cost", "plasma", "Capital cost ($)"),
+                ("capital_cost", "plasma", "System cost ($)"),
                 ("total_cost", "reds", "Total cost ($)"),
                 ("peak_per_sqm", "reds", "Peak per sqm (kW/m²)"),
                 ("total_peak", "plasma", "Total peak (kW)"),
@@ -284,12 +429,14 @@ def _render_retrofit_map(
         key="retrofit_map_crs",
     )
 
-    unit_cost = unit_costs.get(scenario, 0.0)
+    scenario_ec = per_scenario_energy_costs.get(scenario, {})
+    scenario_em = per_scenario_emissions.get(scenario, {})
+    scenario_syscost = system_costs_per_sqm.get(scenario, 0.0)
     map_df = build_retrofit_map_df(
         dfs[scenario],
-        energy_cost_factors,
-        emissions_factors,
-        unit_cost=unit_cost,
+        scenario_ec,
+        scenario_em,
+        system_cost_per_sqm=scenario_syscost,
         cart_crs=cart_crs,
     )
     if map_df is None or map_df.empty:
@@ -321,35 +468,68 @@ def _render_retrofit_map(
         _render_colormap_legend(metric_label, value_stats, cmap)
 
 
-def _render_overheating_use_case(data_source: DataSource) -> None:
-    """Render overheating map: pydeck reds map highlighting buildings with higher overheating."""
-    st.markdown("### Overheating Analysis")
-    st.markdown(
-        "Identify buildings at risk of overheating. Requires runs with overheating "
-        "outputs (manifest with calculate_overheating: true)."
-    )
+_HEAT_INDEX_METRICS = [
+    ("danger_hours", "Total discomfort hours (Danger + Caution + etc)"),
+    ("Extreme Danger [hr]", "Extreme Danger [hr]"),
+    ("Danger [hr]", "Danger [hr]"),
+    ("Extreme Caution [hr]", "Extreme Caution [hr]"),
+    ("Caution [hr]", "Caution [hr]"),
+    ("Normal [hr]", "Normal [hr]"),
+]
 
-    runs_with_oh = data_source.list_runs_with_overheating()
-    if not runs_with_oh:
-        st.warning(
-            "No runs with overheating outputs found. Enable overheating in your "
-            "manifest (calculate_overheating: true) and re-run simulations."
-        )
-        return
 
-    selected_run = st.selectbox(
-        "Select Run",
-        options=runs_with_oh,
-        key="overheating_run",
+def _render_overheating_summary_charts(
+    map_df: pd.DataFrame,
+    data_source_type: str,
+    heat_threshold: float,
+    heat_index_metric: str,
+    theme: Theme,
+) -> None:
+    """Render histogram with download."""
+    map_values = map_df["map_value"].dropna().tolist()
+    if not map_values:
+        return
+    if data_source_type == "BasicOverheating":
+        x_label = f"Hours above {heat_threshold}C"
+    elif data_source_type == "ExceedanceDegreeHours":
+        x_label = f"Degree-hours above {heat_threshold}C"
+    else:
+        x_label = heat_index_metric.replace("_", " ").title()
+    hist_html = create_histogram_d3_html(
+        map_values, title="Distribution", x_label=x_label, theme=theme
+    )
+    components.html(hist_html, height=320, scrolling=False)
+    hist_df = pd.DataFrame({
+        "building_id": map_df["building_id"],
+        "value": map_df["map_value"],
+    })
+    _chart_download(
+        "oh_hist",
+        hist_df.to_csv(index=False),
+        hist_html,
+        "overheating_distribution",
     )
 
+
+def _overheating_form_controls(
+    available_files: list[str],
+    thresholds: list[float],
+) -> tuple[str, float, str, str, str] | None:
+    """Render overheating form, return (data_source_type, threshold, aggregation, metric, crs) or None."""
+    data_source_type = st.selectbox(
+        "Data source",
+        options=available_files,
+        format_func=lambda x: x.replace("_", " "),
+        key="overheating_data_source",
+    )
     col1, col2 = st.columns(2)
     with col1:
         heat_threshold = st.selectbox(
             "Temperature threshold (C)",
-            options=[26.0, 30.0, 35.0],
+            options=thresholds,
             index=0,
             key="overheating_threshold",
+            disabled=(data_source_type == "HeatIndexCategories"),
         )
     with col2:
         aggregation = st.selectbox(
@@ -358,23 +538,86 @@ def _render_overheating_use_case(data_source: DataSource) -> None:
             index=0,
             key="overheating_aggregation",
         )
-
+    heat_index_metric = "danger_hours"
+    if data_source_type == "HeatIndexCategories":
+        heat_index_metric = st.selectbox(
+            "Metric",
+            options=[m[0] for m in _HEAT_INDEX_METRICS],
+            format_func=lambda x: next(
+                (m[1] for m in _HEAT_INDEX_METRICS if m[0] == x), x
+            ),
+            index=0,
+            key="overheating_heat_index_metric",
+        )
     cart_crs = st.selectbox(
         "Polygon CRS (rotated_rectangle)",
         options=["EPSG:3857", "EPSG:32633", "EPSG:32632", "EPSG:4326"],
         index=0,
         key="overheating_crs",
     )
-
     if not st.button("Show Overheating Map", key="overheating_map_btn"):
+        return None
+    return data_source_type, heat_threshold, aggregation, heat_index_metric, cart_crs
+
+
+def _render_overheating_use_case(data_source: DataSource) -> None:
+    """Render overheating analysis: map, summary stats, and D3 charts."""
+    st.markdown("### Overheating Analysis")
+    st.markdown(
+        "Identify buildings at risk of overheating. Supports BasicOverheating, "
+        "ExceedanceDegreeHours, and HeatIndexCategories outputs."
+    )
+
+    runs_with_oh = data_source.list_runs_with_overheating()
+    if not runs_with_oh:
+        st.warning(
+            "No runs with overheating outputs found. Enable overheating in your "
+            "manifest (overheating_config) and re-run simulations."
+        )
         return
 
+    selected_run = st.selectbox(
+        "Select Run",
+        options=runs_with_oh,
+        key="overheating_run",
+    )
+    available_files = data_source.list_overheating_files(selected_run)
+    if not available_files:
+        st.warning("No overheating parquet files found for this run.")
+        return
+
+    # summary heatmap: aggregate stats across all buildings and thresholds
+    theme = cast(Theme, _streamlit_theme())
+    summary_df = data_source.load_overheating_summary(
+        selected_run, aggregation="Zone Weighted"
+    )
+    if summary_df is not None and not summary_df.empty:
+        st.markdown("#### Overheating summary across metrics and thresholds")
+        st.caption(
+            "Summary statistics (mean, median, p95, max) across all buildings. "
+            "Each column is independently color-scaled."
+        )
+        heatmap_html = create_overheating_heatmap_d3_html(summary_df, theme=theme)
+        components.html(heatmap_html, height=480, scrolling=False)
+        heatmap_csv = summary_df.to_csv(index=False)
+        _chart_download("oh_heatmap", heatmap_csv, heatmap_html, "overheating_summary")
+
+    thresholds = data_source.get_overheating_thresholds(selected_run)
+    form_result = _overheating_form_controls(available_files, thresholds)
+    if form_result is None:
+        return
+    data_source_type, heat_threshold, aggregation, heat_index_metric, cart_crs = (
+        form_result
+    )
+
     with st.spinner("Loading overheating data..."):
         map_df = data_source.load_overheating_map_data(
             selected_run,
             cart_crs=cart_crs,
             heat_threshold_c=heat_threshold,
             aggregation=aggregation,
+            data_source_type=data_source_type,
+            heat_index_metric=heat_index_metric,
         )
 
     if map_df is None or map_df.empty:
@@ -384,10 +627,27 @@ def _render_overheating_use_case(data_source: DataSource) -> None:
     from globi.tools.visualization.plotting import create_building_map_deck
     from globi.tools.visualization.views.raw_data import _render_colormap_legend
 
+    theme = cast(Theme, _streamlit_theme())
+
+    st.markdown("#### Summary statistics")
+    _render_overheating_summary_charts(
+        map_df, data_source_type, heat_threshold, heat_index_metric, theme
+    )
+
+    # map
+    metric_label = "map value"
+    if data_source_type == "BasicOverheating":
+        metric_label = f"Hours above {heat_threshold}C"
+    elif data_source_type == "ExceedanceDegreeHours":
+        metric_label = f"Degree-hours above {heat_threshold}C"
+    else:
+        metric_label = heat_index_metric.replace("_", " ").title()
+
+    st.markdown("#### Building map")
     result = create_building_map_deck(
         map_df,
         cart_crs=cart_crs,
-        value_col="overheating_hours",
+        value_col="map_value",
         cmap="reds",
     )
     if result is None:
@@ -395,16 +655,11 @@ def _render_overheating_use_case(data_source: DataSource) -> None:
         return
 
     deck, n_features, value_stats = result
-    st.markdown("#### Overheating hours above threshold")
     st.pydeck_chart(deck)
     st.caption(f"{n_features} buildings displayed")
 
     if value_stats:
-        _render_colormap_legend(
-            f"Hours above {heat_threshold}C",
-            value_stats,
-            "reds",
-        )
+        _render_colormap_legend(metric_label, value_stats, "reds")
 
 
 def _render_scenario_comparison(data_source: DataSource) -> None:
@@ -472,6 +727,12 @@ def _render_scenario_comparison(data_source: DataSource) -> None:
         st.markdown("#### EUI distribution comparison")
         kde_html = create_comparison_kde_d3_html(comparison_data)
         components.html(kde_html, height=360, scrolling=False)
+        _chart_download(
+            "sc_kde",
+            _build_eui_csv(comparison_data).to_csv(index=False),
+            kde_html,
+            "eui_distribution",
+        )
 
         # end uses and utilities side by side
         col_left, col_right = st.columns(2)
@@ -484,6 +745,14 @@ def _render_scenario_comparison(data_source: DataSource) -> None:
                 title="end uses comparison",
             )
             components.html(eu_html, height=360, scrolling=False)
+            _chart_download(
+                "sc_eu",
+                _build_stacked_csv(comparison_data, "end_uses_data").to_csv(
+                    index=False
+                ),
+                eu_html,
+                "end_uses",
+            )
         with col_right:
             st.markdown("#### Fuel/utilities comparison")
             fuel_html = create_comparison_stacked_bar_d3_html(
@@ -493,3 +762,11 @@ def _render_scenario_comparison(data_source: DataSource) -> None:
                 title="fuel/utilities comparison",
             )
             components.html(fuel_html, height=360, scrolling=False)
+            _chart_download(
+                "sc_fuel",
+                _build_stacked_csv(comparison_data, "utilities_data").to_csv(
+                    index=False
+                ),
+                fuel_html,
+                "fuel_utilities",
+            )

From 8893d9775ad88b93ab087b49bc88177b7840e0c3 Mon Sep 17 00:00:00 2001
From: daryaguettler <daryag@mit.edu>
Date: Thu, 12 Mar 2026 21:04:09 +0900
Subject: [PATCH 3/3] add caching

---
 src/globi/tools/visualization/plotting.py     |  94 ++++--
 src/globi/tools/visualization/utils.py        | 274 ++++++++++++++----
 .../tools/visualization/views/raw_data.py     |  38 ++-
 .../tools/visualization/views/use_cases.py    |  38 ++-
 4 files changed, 352 insertions(+), 92 deletions(-)

diff --git a/src/globi/tools/visualization/plotting.py b/src/globi/tools/visualization/plotting.py
index 5bad819..1e77b70 100644
--- a/src/globi/tools/visualization/plotting.py
+++ b/src/globi/tools/visualization/plotting.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import contextlib
 import json
 import math
 from itertools import pairwise
@@ -1626,10 +1627,43 @@ def create_building_map_deck(
         )
     if features is None:
         return None
+    return _deck_from_features(features, config, cmap)
+
+
+def create_building_map_deck_from_cache(
+    geometry: list[dict],
+    map_df: pd.DataFrame,
+    value_col: str | None,
+    cmap: str = "viridis",
+    config: Building3DConfig | None = None,
+) -> tuple[pdk.Deck, int, dict | None] | None:
+    """Build pydeck deck from cached geometry and map_df. No WKT parsing.
+
+    Use when geometry and map_df are already computed (e.g. from prior run/CRS
+    selection). Only adds the selected metric for coloring.
+    """
+    if len(geometry) != len(map_df):
+        return None
+    features = []
+    for i, feat in enumerate(geometry):
+        f = {"polygon": feat["polygon"], "height": feat["height"]}
+        if value_col and value_col in map_df.columns:
+            v = map_df.iloc[i][value_col]
+            if v == v and v is not None:
+                with contextlib.suppress(TypeError, ValueError):
+                    f["value"] = float(v)
+        features.append(f)
+    return _deck_from_features(features, config, cmap)
+
+
+def _deck_from_features(
+    features: list[dict],
+    config: Building3DConfig | None,
+    cmap: str,
+) -> tuple[pdk.Deck, int, dict | None]:
+    """Create deck and stats from features (polygon, height, value)."""
     vals = [f["value"] for f in features if "value" in f and f["value"] is not None]
-    value_stats = None
-    if vals:
-        value_stats = {"min": min(vals), "max": max(vals)}
+    value_stats = {"min": min(vals), "max": max(vals)} if vals else None
     config = config or Building3DConfig(elevation_scale=1.0)
     deck = create_polygon_layer_chart(
         features,
@@ -1665,17 +1699,28 @@ def create_polygon_layer_chart(
     v_min = min(vals) if vals else 0.0
     v_max = max(vals) if vals else 1.0
     span = v_max - v_min if v_max > v_min else 1.0
+    default_color = [*list(config.fill_color[:3]), 160]
 
+    # build minimal layer data: polygon, height, color, value (for tooltip only)
+    layer_data: list[dict[str, Any]] = []
     for f in features:
         if value_key in f and f[value_key] is not None:
             t = (float(f[value_key]) - v_min) / span
-            f["color"] = _colormap_color(cmap, t)
+            color = _colormap_color(cmap, t)
         else:
-            f["color"] = [*list(config.fill_color[:3]), 160]
+            color = default_color
+        row: dict[str, Any] = {
+            "polygon": f["polygon"],
+            "height": f["height"],
+            "color": color,
+        }
+        if value_key in f and f[value_key] is not None:
+            row["value"] = f[value_key]
+        layer_data.append(row)
 
     layer = pdk.Layer(
         "PolygonLayer",
-        data=features,
+        data=layer_data,
         get_polygon="polygon",
         get_elevation="height",
         elevation_scale=config.elevation_scale,
@@ -1687,30 +1732,15 @@ def create_polygon_layer_chart(
     )
 
     # derive a reasonable center/zoom from feature polygons
-    lons: list[float] = []
-    lats: list[float] = []
-    for f in features:
-        for x, y in f["polygon"]:
-            lons.append(float(x))
-            lats.append(float(y))
-
-    if lons and lats:
+    all_coords = [(float(x), float(y)) for f in layer_data for x, y in f["polygon"]]
+    if all_coords:
+        lons, lats = zip(*all_coords, strict=True)
         lon_center = sum(lons) / len(lons)
         lat_center = sum(lats) / len(lats)
-        lon_span = max(lons) - min(lons)
-        lat_span = max(lats) - min(lats)
-        span = max(lon_span, lat_span)
-        if span < 0.005:
-            zoom = 15
-        elif span < 0.02:
-            zoom = 14
-        elif span < 0.05:
-            zoom = 13
-        else:
-            zoom = 12
+        span = max(max(lons) - min(lons), max(lats) - min(lats))
+        zoom = 15 if span < 0.005 else 14 if span < 0.02 else 13 if span < 0.05 else 12
     else:
-        lon_center = 0.0
-        lat_center = 0.0
+        lon_center = lat_center = 0.0
         zoom = 0.8
 
     view_state = pdk.ViewState(
@@ -1794,15 +1824,23 @@ def extract_building_polygons(
         msg = f"No height column '{height_col}' found"
         raise ValueError(msg)
 
+    from pyproj import Transformer
+
     rect_series = df_reset[ROTATED_RECTANGLE_COL]
     height_series = df_reset[height_col].astype("float64")
+    transformer = Transformer.from_crs(cart_crs, "EPSG:4326", always_xy=True)
 
     polygons: list[list[list[float]]] = []
     heights: list[float] = []
     values: list[float | None] = []
 
     for i, wkt_value in enumerate(rect_series):
-        poly_lonlat = transform_rotated_rectangle_to_latlon(wkt_value, cart_crs)
+        wkt_str = (
+            getattr(wkt_value, "wkt", wkt_value) if wkt_value is not None else None
+        )
+        poly_lonlat = transform_rotated_rectangle_to_latlon(
+            wkt_str or "", cart_crs, _transformer=transformer
+        )
         if not poly_lonlat:
             continue
 
diff --git a/src/globi/tools/visualization/utils.py b/src/globi/tools/visualization/utils.py
index 53dec44..e3b3daf 100644
--- a/src/globi/tools/visualization/utils.py
+++ b/src/globi/tools/visualization/utils.py
@@ -243,6 +243,107 @@ def _find_col(df: pd.DataFrame, name: str):
 HEIGHT_FALLBACK_COLS = ("num_floors", "f2f_height")
 
 
+def _geom_to_polygon_coords(geom) -> list[list[float]] | None:
+    """Extract exterior coords from Polygon or MultiPolygon as [[lon, lat], ...]."""
+    from shapely.geometry import MultiPolygon, Polygon
+
+    if isinstance(geom, Polygon):
+        coords = list(geom.exterior.coords)
+    elif isinstance(geom, MultiPolygon):
+        poly = max(geom.geoms, key=lambda g: g.area)
+        coords = list(poly.exterior.coords)
+    else:
+        return None
+    if len(coords) < 3:
+        return None
+    return [[float(x), float(y)] for x, y in coords]
+
+
+def _compute_heights_vectorized(
+    sub: pd.DataFrame,
+    df_flat: pd.DataFrame,
+    has_height: bool,
+    default_height_m: float,
+) -> pd.Series:
+    """Compute height series for vectorized features."""
+    nf_col = _find_col(df_flat, "num_floors")
+    f2f_col = _find_col(df_flat, "f2f_height")
+    f2f_default = 3.0
+    if f2f_col is not None and f2f_col in df_flat.columns:
+        f2f_vals = sub[f2f_col].apply(
+            lambda v: float(v) if v == v and v is not None else f2f_default
+        )
+    else:
+        f2f_vals = pd.Series(f2f_default, index=sub.index)
+
+    if has_height:
+        heights = sub["height"].astype(float, errors="ignore")
+        heights = heights.where((heights > 0) & heights.notna(), default_height_m)
+    elif nf_col is not None and nf_col in sub.columns:
+        heights = (sub[nf_col].astype(float, errors="ignore") * f2f_vals).fillna(
+            default_height_m
+        )
+    else:
+        heights = pd.Series(default_height_m, index=sub.index)
+    return heights.clip(lower=0.01).fillna(default_height_m)  # type: ignore[return-value]
+
+
+def _build_map_features_vectorized(
+    df_flat: pd.DataFrame,
+    rect_col: str,
+    cart_crs: str,
+    default_height_m: float,
+    has_height: bool,
+    has_num_floors: bool,
+    value_col: str | None,
+) -> list[dict] | None:
+    """Vectorized path: batch parse WKT and transform via geopandas."""
+    import contextlib
+
+    import geopandas as gpd
+
+    wkt_series = df_flat[rect_col].apply(
+        lambda v: getattr(v, "wkt", v) if v is not None else None
+    )
+    valid_mask = wkt_series.apply(lambda s: isinstance(s, str))
+    if not bool(valid_mask.any()):
+        return None
+
+    sub = df_flat.loc[valid_mask].copy()
+    wkt_valid = wkt_series.loc[valid_mask].astype(str)
+
+    try:
+        gs = gpd.GeoSeries.from_wkt(wkt_valid, crs=cart_crs, on_invalid="ignore")
+    except Exception:
+        return None
+
+    gs_wgs = gs.to_crs("EPSG:4326")
+    valid_geom = ~gs_wgs.is_empty & gs_wgs.geom_type.isin(["Polygon", "MultiPolygon"])
+    if not bool(valid_geom.any()):
+        return None
+
+    sub = sub.loc[valid_geom]
+    gs_wgs = gs_wgs.loc[valid_geom]
+    heights = _compute_heights_vectorized(sub, df_flat, has_height, default_height_m)
+
+    features: list[dict] = []
+    for idx, geom in zip(sub.index, gs_wgs, strict=True):
+        poly_lonlat = _geom_to_polygon_coords(geom)
+        if poly_lonlat is None:
+            continue
+
+        row = sub.loc[idx]
+        feat: dict = {"polygon": poly_lonlat, "height": float(heights.loc[idx])}
+        if value_col and value_col in sub.columns:
+            v = row[value_col]
+            if v == v and v is not None:
+                with contextlib.suppress(TypeError, ValueError):
+                    feat["value"] = float(v)
+        features.append(feat)
+
+    return features if features else None
+
+
 def build_map_features_from_df(  # noqa: C901
     df: pd.DataFrame,
     cart_crs: str = "EPSG:3857",
@@ -253,6 +354,7 @@ def build_map_features_from_df(  # noqa: C901
 
     Converts each rotated_rectangle WKT (in cart_crs) to lat/lon polygon,
     extrudes by height (meters). Works with flat parquet or index-flattened data.
+    Uses vectorized geopandas path for large datasets.
 
     Args:
         df: DataFrame with rotated_rectangle (or GLOBI_ROTATED_RECTANGLE) and height.
@@ -278,6 +380,24 @@ def build_map_features_from_df(  # noqa: C901
     if not has_height and not has_num_floors:
         return None
 
+    # use vectorized path for 50+ rows
+    if len(df_flat) >= 50:
+        result = _build_map_features_vectorized(
+            df_flat,
+            rect_col,
+            cart_crs,
+            default_height_m,
+            has_height,
+            has_num_floors,
+            value_col,
+        )
+        if result is not None:
+            return result
+
+    # fallback: row-by-row for small datasets or when vectorized fails
+    from pyproj import Transformer
+
+    transformer = Transformer.from_crs(cart_crs, "EPSG:4326", always_xy=True)
     features: list[dict] = []
     for i in range(len(df_flat)):
         wkt_val = df_flat.iloc[i][rect_col]
@@ -285,8 +405,10 @@ def build_map_features_from_df(  # noqa: C901
         if not isinstance(wkt_str, str):
             continue
 
-        poly_lonlat = transform_rotated_rectangle_to_latlon(wkt_str, cart_crs)
-        if not poly_lonlat:
+        poly_lonlat = transform_rotated_rectangle_to_latlon(
+            wkt_str, cart_crs, _transformer=transformer
+        )
+        if poly_lonlat is None:
             continue
 
         row = df_flat.iloc[i]
@@ -315,7 +437,7 @@ def build_map_features_from_df(  # noqa: C901
             except (TypeError, ValueError, KeyError):
                 pass
 
-        feat: dict = {"polygon": poly_lonlat, "height": float(h)}
+        feat = {"polygon": poly_lonlat, "height": float(h)}
         if value_col and value_col in df_flat.columns:
             try:
                 v = row[value_col]
@@ -331,11 +453,14 @@ def build_map_features_from_df(  # noqa: C901
 def transform_rotated_rectangle_to_latlon(
     wkt: str,
     cart_crs: str = "EPSG:3857",
+    *,
+    _transformer=None,
 ) -> list[list[float]] | None:
     """Convert rotated_rectangle WKT (in cartesian CRS) to lat/lon polygon.
 
     Transforms each vertex from cart_crs to EPSG:4326. Returns [[lon, lat], ...]
     for pydeck polygon layer, or None if invalid.
+    Pass _transformer to reuse (avoids creating one per call in loops).
     """
     from pyproj import Transformer
     from shapely import from_wkt
@@ -357,11 +482,18 @@ def transform_rotated_rectangle_to_latlon(
             return None
         if len(coords) < 3:
             return None
-        transformer = Transformer.from_crs(cart_crs, "EPSG:4326", always_xy=True)
-        result: list[list[float]] = []
-        for x, y in coords:
-            lon, lat = transformer.transform(float(x), float(y))
-            result.append([float(lon), float(lat)])
+
+        trans = _transformer
+        if trans is None:
+            trans = Transformer.from_crs(cart_crs, "EPSG:4326", always_xy=True)
+
+        # batch transform all vertices
+        import numpy as np
+
+        xs = np.array([c[0] for c in coords], dtype=float)
+        ys = np.array([c[1] for c in coords], dtype=float)
+        lons, lats = trans.transform(xs, ys)
+        result = [[float(lon), float(lat)] for lon, lat in zip(lons, lats, strict=True)]
     except Exception:
         return None
     return result
@@ -373,14 +505,16 @@ def build_map_df_from_output(  # noqa: C901
 ) -> pd.DataFrame | None:
     """Build map-ready dataframe directly from output parquet.
 
-    Extracts lat/lon from rotated_rectangle, computes EUI/peak metrics,
-    no merge with inputs. Returns df with building_id, lat, lon,
-    rotated_rectangle, height, eui, peak_per_sqm, end-use eui cols.
+    Extracts lat/lon from rotated_rectangle. Output Energy is kWh/m² and Peak
+    is kW/m², so eui and peak_per_sqm are used directly; total_energy and
+    total_peak are eui*area and peak_per_sqm*area. Returns df with building_id,
+    lat, lon, rotated_rectangle, height, eui, peak_per_sqm, total_energy,
+    total_peak, end-use eui cols. Uses vectorized geopandas for geometry when
+    100+ rows.
     """
     import logging
 
-    from pyproj import Transformer
-    from shapely import from_wkt
+    import geopandas as gpd
 
     df_reset = df.reset_index()
     bid_col = _find_col(df_reset, BUILDING_ID_COL)
@@ -410,56 +544,97 @@ def build_map_df_from_output(  # noqa: C901
     if not energy_cols or not peak_cols:
         return None
 
-    transformer = Transformer.from_crs(cart_crs, "EPSG:4326", always_xy=True)
+    areas = df.index.get_level_values(area_level)
+    # output Energy is kWh/m², Peak is kW/m² - use directly as eui and peak_per_sqm
+    eui_arr = df[energy_cols].sum(axis=1).values
+    peak_per_sqm_arr = df[peak_cols].max(axis=1).values
+
+    h_col = _find_col(df_reset, "height")
+    nf_col = _find_col(df_reset, "num_floors")
+    f2f_col = _find_col(df_reset, "f2f_height")
     log = logging.getLogger(__name__)
-    rows: list[dict] = []
 
-    for idx, (_, row) in enumerate(df_reset.iterrows()):
-        wkt = row.get(rect_col)
-        if not isinstance(wkt, str):
-            continue
-        try:
-            geom = from_wkt(wkt)
-            if geom.is_empty:
+    # vectorized path for 100+ rows: batch parse WKT and transform centroids
+    use_vectorized = len(df_reset) >= 100
+    lon_lat_by_idx: dict[int, tuple[float, float]] = {}
+    wkt_by_idx: dict[int, str] = {}
+
+    if use_vectorized:
+        wkt_series = df_reset[rect_col].apply(
+            lambda v: getattr(v, "wkt", v) if v is not None else None
+        )
+        valid_mask = wkt_series.apply(lambda s: isinstance(s, str))
+        if bool(valid_mask.any()):
+            try:
+                gs = gpd.GeoSeries.from_wkt(
+                    wkt_series.loc[valid_mask].astype(str),
+                    crs=cart_crs,
+                    on_invalid="ignore",
+                )
+                gs_wgs = gs.to_crs("EPSG:4326")
+                valid_geom = ~gs_wgs.is_empty
+                for idx in gs_wgs.loc[valid_geom].index:
+                    geom = gs_wgs.loc[idx]
+                    cx, cy = geom.centroid.x, geom.centroid.y
+                    lon_lat_by_idx[idx] = (float(cy), float(cx))  # lat, lon
+                    wkt_by_idx[idx] = str(wkt_series.loc[idx])
+            except Exception as exc:
+                log.debug("vectorized path failed, falling back: %s", exc)
+                use_vectorized = False
+
+    if not use_vectorized:
+        from pyproj import Transformer
+        from shapely import from_wkt
+
+        transformer = Transformer.from_crs(cart_crs, "EPSG:4326", always_xy=True)
+        for idx in range(len(df_reset)):
+            wkt = df_reset.iloc[idx][rect_col]
+            if not isinstance(wkt, str):
+                wkt = getattr(wkt, "wkt", None) if wkt is not None else None
+            if not isinstance(wkt, str):
                 continue
-            cx, cy = geom.centroid.x, geom.centroid.y
-            lon, lat = transformer.transform(cx, cy)
-            bid = str(row[bid_col])
-        except Exception as exc:
-            log.debug("skip row: %s", exc)
-            continue
+            try:
+                geom = from_wkt(wkt)
+                if geom.is_empty:
+                    continue
+                cx, cy = geom.centroid.x, geom.centroid.y
+                lon, lat = transformer.transform(cx, cy)
+                lon_lat_by_idx[idx] = (float(lat), float(lon))
+                wkt_by_idx[idx] = wkt
+            except Exception as exc:
+                log.debug("skip row %s: %s", idx, exc)
 
-        area_val = df.index.get_level_values(area_level)[idx]
+    rows: list[dict] = []
+    for idx, (lat, lon) in lon_lat_by_idx.items():
+        wkt = wkt_by_idx.get(idx, "")
         try:
+            area_val = areas[idx]
             fval = float(area_val)  # type: ignore[arg-type]
             area = fval if fval > 0 else None
-        except (TypeError, ValueError):
+        except (TypeError, ValueError, IndexError):
             area = None
         if area is None:
             continue
 
-        row_vals = df.iloc[idx]
-        total_energy = float(row_vals[energy_cols].sum())
-        peak = float(row_vals[peak_cols].max())
-        eui = total_energy / area
-        peak_per_sqm = peak / area
-
-        # height from output parquet index (height col, else num_floors * f2f_height)
-        h_col = _find_col(df_reset, "height")
-        nf_col = _find_col(df_reset, "num_floors")
-        f2f_col = _find_col(df_reset, "f2f_height")
+        eui = float(eui_arr[idx])
+        peak_per_sqm = float(peak_per_sqm_arr[idx])
+        total_energy = eui * area
+        total_peak = peak_per_sqm * area
+
+        row = df_reset.iloc[idx]
+        bid = str(row[bid_col])
         height_m = 6.0
-        if h_col is not None and h_col in row.index:
+        if h_col is not None and h_col in df_reset.columns:
             try:
                 hv = row[h_col]
                 hm = float(hv)
-                if hm == hm:  # not nan
+                if hm == hm:
                     height_m = hm
             except (TypeError, ValueError):
                 pass
-        elif nf_col is not None and nf_col in row.index:
+        elif nf_col is not None and nf_col in df_reset.columns:
             f2f = 3.0
-            if f2f_col is not None and f2f_col in row.index:
+            if f2f_col is not None and f2f_col in df_reset.columns:
                 try:
                     fv = row[f2f_col]
                     f2f = float(fv) if fv == fv else 3.0
@@ -468,29 +643,30 @@ def build_map_df_from_output(  # noqa: C901
             try:
                 nv = row[nf_col]
                 nm = float(nv)
-                if nm == nm:  # not nan
+                if nm == nm:
                     height_m = nm * f2f
             except (TypeError, ValueError):
                 pass
 
         row_dict: dict = {
             BUILDING_ID_COL: bid,
-            LAT_COL: float(lat),
-            LON_COL: float(lon),
+            LAT_COL: lat,
+            LON_COL: lon,
             ROTATED_RECTANGLE_COL: wkt,
             "height": height_m,
             "conditioned_area": area,
             "eui": eui,
             "peak_per_sqm": peak_per_sqm,
             "total_energy": total_energy,
-            "total_peak": peak,
+            "total_peak": total_peak,
         }
+        row_vals = df.iloc[idx]
         for meter in {
             str(c[2]) for c in energy_cols if isinstance(c, tuple) and len(c) > 2
         }:
             cols_m = [c for c in energy_cols if c[2] == meter]
             if cols_m:
-                meter_eui = float(row_vals[cols_m].sum()) / area
+                meter_eui = float(row_vals[cols_m].sum())  # already kWh/m²
                 row_dict[f"eui_{meter.lower().replace(' ', '_')}"] = meter_eui
         rows.append(row_dict)
 
diff --git a/src/globi/tools/visualization/views/raw_data.py b/src/globi/tools/visualization/views/raw_data.py
index 448e2b8..94402da 100644
--- a/src/globi/tools/visualization/views/raw_data.py
+++ b/src/globi/tools/visualization/views/raw_data.py
@@ -11,6 +11,7 @@
 from globi.tools.visualization.plotting import (
     Theme,
     create_building_map_deck,
+    create_building_map_deck_from_cache,
     create_column_layer_chart,
     create_histogram_d3_html,
     create_monthly_timeseries_d3_html,
@@ -21,6 +22,8 @@
 from globi.tools.visualization.utils import (
     LAT_COL,
     LON_COL,
+    build_map_df_from_output,
+    build_map_features_from_df,
     has_geo_columns,
     list_categorical_columns,
     list_numeric_columns,
@@ -284,7 +287,8 @@ def _render_results_map(
     """Render 3D building map from rotated_rectangle and height.
 
     Converts rotated_rectangle WKT (cartesian CRS) to lat/lon, extrudes by
-    height (meters). Per geometry.py, rectangles are created in cart_crs.
+    height (meters). Caches map_df and geometry when run/CRS selected; only
+    adds the chosen metric when rendering.
     """
     if "dryrun" in run_label.lower():
         st.info("You have selected a dryrun which does not have a mapping option")
@@ -315,12 +319,32 @@ def _render_results_map(
     )
     value_col, cmap, metric_label = metric_option
 
-    result = create_building_map_deck(
-        df,
-        cart_crs=cart_crs,
-        value_col=value_col,
-        cmap=cmap,
-    )
+    cache_key = f"_map_cache_{run_label}_{cart_crs}"
+    if cache_key not in st.session_state:
+        with st.spinner("Building map data (geometry + metrics)..."):
+            map_df = build_map_df_from_output(df, cart_crs=cart_crs)
+            if map_df is not None:
+                geometry = build_map_features_from_df(
+                    map_df, cart_crs=cart_crs, value_col=None
+                )
+                if geometry is not None:
+                    st.session_state[cache_key] = (map_df, geometry)
+
+    if cache_key in st.session_state:
+        map_df, geometry = st.session_state[cache_key]
+        result = create_building_map_deck_from_cache(
+            geometry,
+            map_df,
+            value_col=value_col,
+            cmap=cmap,
+        )
+    else:
+        result = create_building_map_deck(
+            df,
+            cart_crs=cart_crs,
+            value_col=value_col,
+            cmap=cmap,
+        )
     if result is None:
         st.info(
             "Map unavailable. Output must have rotated_rectangle (or GLOBI_ROTATED_RECTANGLE) "
diff --git a/src/globi/tools/visualization/views/use_cases.py b/src/globi/tools/visualization/views/use_cases.py
index 4d5d118..4e30259 100644
--- a/src/globi/tools/visualization/views/use_cases.py
+++ b/src/globi/tools/visualization/views/use_cases.py
@@ -383,9 +383,13 @@ def _render_retrofit_map(
     per_scenario_emissions: dict[str, dict[str, float]],
     system_costs_per_sqm: dict[str, float],
 ) -> None:
-    """Render pydeck map with selectable metric and colormap."""
-    from globi.tools.visualization.plotting import create_building_map_deck
+    """Render pydeck map with selectable metric and colormap. Caches geometry per scenario/CRS."""
+    from globi.tools.visualization.plotting import (
+        create_building_map_deck,
+        create_building_map_deck_from_cache,
+    )
     from globi.tools.visualization.results_data import build_retrofit_map_df
+    from globi.tools.visualization.utils import build_map_features_from_df
     from globi.tools.visualization.views.raw_data import _render_colormap_legend
 
     st.markdown("#### Building map by retrofit metric")
@@ -450,12 +454,30 @@ def _render_retrofit_map(
         st.warning(f"Metric '{value_col}' not available for this scenario.")
         return
 
-    result = create_building_map_deck(
-        map_df,
-        cart_crs=cart_crs,
-        value_col=value_col,
-        cmap=cmap,
-    )
+    cache_key = f"_retrofit_map_{scenario}_{cart_crs}"
+    if cache_key not in st.session_state:
+        with st.spinner("Building map geometry..."):
+            geometry = build_map_features_from_df(
+                map_df, cart_crs=cart_crs, value_col=None
+            )
+            if geometry is not None:
+                st.session_state[cache_key] = geometry
+
+    if cache_key in st.session_state:
+        geometry = st.session_state[cache_key]
+        result = create_building_map_deck_from_cache(
+            geometry,
+            map_df,
+            value_col=value_col,
+            cmap=cmap,
+        )
+    else:
+        result = create_building_map_deck(
+            map_df,
+            cart_crs=cart_crs,
+            value_col=value_col,
+            cmap=cmap,
+        )
     if result is None:
         st.info("Could not build map.")
         return