From 564c4aaa5ac0f6267551c8c16664a9e91a3b417f Mon Sep 17 00:00:00 2001 From: daryaguettler Date: Wed, 11 Mar 2026 11:33:23 -0400 Subject: [PATCH 1/3] update docs --- .../run-simulations/inputs_and_outputs.md | 644 ++++++++++++++++++ .../run-simulations/simulate_building.md | 37 +- .../run-simulations/simulation_tasking.md | 95 ++- docs/tutorials/visualization/visualization.md | 241 +++++++ mkdocs.yml | 3 + 5 files changed, 974 insertions(+), 46 deletions(-) create mode 100644 docs/tutorials/run-simulations/inputs_and_outputs.md create mode 100644 docs/tutorials/visualization/visualization.md diff --git a/docs/tutorials/run-simulations/inputs_and_outputs.md b/docs/tutorials/run-simulations/inputs_and_outputs.md new file mode 100644 index 0000000..af02938 --- /dev/null +++ b/docs/tutorials/run-simulations/inputs_and_outputs.md @@ -0,0 +1,644 @@ +## Simulation inputs and outputs + +This page documents every input file required to run globi simulations (both single-building and batch/manifest), how to populate them, and the format of the output files produced. + +--- + +### Input files overview + +The table below summarizes all input files. Which files you need depends on whether you are running a single building or a batch via manifest. + +| File | Single building | Batch (manifest) | Description | +| ------------------------ | :-------------: | :--------------: | -------------------------------------------------- | +| `building.yml` | required | -- | single-building specification | +| `manifest.yml` | -- | required | experiment specification | +| `artifacts.yml` | -- | required | file paths for GIS, DB, weather, etc. | +| `semantic-fields.yml` | required | required | semantic field definitions and GIS column mappings | +| `component-map.yml` | required | required | maps semantic fields to component selection rules | +| `components-lib.db` | required | required | SQLite component database | +| `buildings.parquet` | -- | required | GIS building footprints (GeoDataFrame) | +| `gis-preprocessor.yml` | -- | optional | geometry validation and defaults | +| `hourly-data-config.yml` | -- | optional | hourly output variable configuration | +| `overheating-config.yml` | -- | optional | overheating analysis thresholds | +| EPW weather file | required | required | EnergyPlus weather data (URL or local path) | + +--- + +### Input file details + +#### `building.yml` -- single building specification + +Used by `make cli-native simulate` (or `make cli simulate`). Defines a single building's geometry, envelope, and the semantic context used to look up components. + +**Required fields**: + +| Field | Type | Description | +| ------------------------ | ----------- | ------------------------------------------------------------- | +| `db_file` | path | path to the component database (SQLite) | +| `semantic_fields_file` | path | path to the semantic fields config | +| `component_map_file` | path | path to the component map config | +| `epwzip_file` | path or URL | EPW weather file | +| `semantic_field_context` | dict | key-value pairs matching semantic field names to their values | + +**Optional fields**: + +| Field | Type | Default | Constraints | Description | +| ----------------------- | ----- | -------- | ----------- | ----------------------------------- | +| `length` | float | 15.0 | >= 3.0 | long edge of the building [m] | +| `width` | float | 15.0 | >= 3.0 | short edge of the building [m] | +| `num_floors` | int | 2 | >= 1 | number of floors | +| `f2f_height` | float | 3.0 | >= 0 | floor-to-floor height [m] | +| `wwr` | float | 0.2 | 0.0 -- 1.0 | window-to-wall ratio | +| `basement` | str | `"none"` | see below | basement type | +| `attic` | str | `"none"` | see below | attic type | +| `exposed_basement_frac` | float | 0.25 | 0.0 -- 1.0 | fraction of basement exposed to air | + +Valid values for `basement` and `attic`: `"none"`, `"unoccupied_unconditioned"`, `"unoccupied_conditioned"`, `"occupied_unconditioned"`, `"occupied_conditioned"`. + +If `length < width`, they are automatically swapped so `length` is always the longer edge. + +**Example** (`inputs/building.yml`): + +```yaml +db_file: inputs/components-lib.db +semantic_fields_file: inputs/semantic-fields.yml +component_map_file: inputs/component-map.yml +epwzip_file: "https://climate.onebuilding.org/WMO_Region_4_North_and_Central_America/USA_United_States_of_America/MA_Massachusetts/USA_MA_Boston-Logan.Intl.AP.725090_TMYx.2009-2023.zip" +semantic_field_context: + Region: TestRegion + Typology: Residential + Age_bracket: Post_2000 + Scenario: Baseline + Income: Low +length: 20.0 +width: 15.0 +num_floors: 2 +f2f_height: 3.5 +wwr: 0.3 +basement: none +attic: none +exposed_basement_frac: 0.25 +``` + +--- + +#### `manifest.yml` -- experiment specification + +Used by `make cli-native submit manifest` (or `make cli submit manifest`). Defines a batch experiment over a set of buildings. All referenced config files can be either inline YAML or file paths -- when a path is given, the file is loaded automatically. + +| Field | Type | Required | Description | +| ------------------------- | --------------------- | -------- | ------------------------------------------------------ | +| `name` | str | yes | experiment/region name (used in `run_name`) | +| `scenario` | str | yes | scenario identifier (e.g. `Baseline`, `Retrofit`) | +| `file_config` | path or inline | yes | path to `artifacts.yml` or inline file config | +| `gis_preprocessor_config` | path or inline | no | path to `gis-preprocessor.yml` or inline config | +| `hourly_data_config` | path, inline, or null | no | path to `hourly-data-config.yml`, or `null` to disable | +| `overheating_config` | path, inline, or null | no | path to `overheating-config.yml`, or `null` to disable | + +**Example** (`inputs/manifest.yml`): + +```yaml +name: TestRegion +scenario: Baseline +hourly_data_config: null +file_config: inputs/artifacts.yml +gis_preprocessor_config: inputs/gis-preprocessor.yml +``` + +**Example with overheating and hourly data enabled**: + +```yaml +name: TestRegion +scenario: Baseline +hourly_data_config: inputs/hourly-data-config.yml +overheating_config: inputs/overheating-config.yml +file_config: inputs/artifacts.yml +gis_preprocessor_config: inputs/gis-preprocessor.yml +``` + +--- + +#### `artifacts.yml` -- file references + +Points to the data files used during batch simulation. Referenced by `manifest.yml` via the `file_config` field. + +| Field | Type | Description | +| ---------------------- | ----------- | ------------------------------------------------------ | +| `gis_file` | path | path to the buildings GeoDataFrame (parquet) | +| `db_file` | path | path to the component database (SQLite) | +| `semantic_fields_file` | path | path to the semantic fields config | +| `component_map_file` | path | path to the component map config | +| `epwzip_file` | path or URL | EPW weather file (or `null` to use nearest EPW lookup) | + +**Example** (`inputs/artifacts.yml`): + +```yaml +gis_file: inputs/buildings.parquet +db_file: inputs/components-lib.db +semantic_fields_file: inputs/semantic-fields.yml +epwzip_file: "https://climate.onebuilding.org/WMO_Region_4_North_and_Central_America/USA_United_States_of_America/MA_Massachusetts/USA_MA_Boston-Logan.Intl.AP.725090_TMYx.2009-2023.zip" +component_map_file: inputs/component-map.yml +``` + +--- + +#### `semantic-fields.yml` -- semantic field definitions + +Defines the semantic fields (categorical variables) used to look up building components in the database, and maps GIS column names to building attributes. + +| Field | Type | Description | +| ------------------ | --------- | ------------------------------------------------------------------------- | +| `Name` | str | model name | +| `Fields` | list | list of semantic field definitions | +| `Fields[].Name` | str | field name (must match keys in `semantic_field_context` and component DB) | +| `Fields[].Options` | list[str] | allowed values for this field | +| `Height_col` | str | GIS column name for building height | +| `Num_Floors_col` | str | GIS column name for number of floors | +| `Building_ID_col` | str | GIS column name for building ID | +| `GFA_col` | str | GIS column name for gross floor area / footprint area | + +**Example** (`inputs/semantic-fields.yml`): + +```yaml +Name: Test Region Model +Fields: + - Name: Region + Options: + - TestRegion + - Name: Typology + Options: + - Office + - School + - Residential + - Hospital + - Hotel + - Name: Age_bracket + Options: + - Pre_1980 + - 1980_to_2000 + - Post_2000 + - Name: Income + Options: + - Low + - High + - Name: Scenario + Options: + - Baseline + - Retrofit + +Height_col: height +Num_Floors_col: num_floors +Building_ID_col: building_id +GFA_col: footprint_area +``` + +The `Fields` entries define the categorical axes of the component database. Each building is assigned a value for each field (either from the GIS data or from `semantic_field_context`), and those values are used to select the appropriate envelope, HVAC, and other components. + +--- + +#### `component-map.yml` -- component selection rules + +Maps semantic fields to component types. Each component category has a `selector` that specifies which semantic fields are used to look up the matching entry in the component database. + +The top-level structure has two sections: + +- **Envelope**: construction and infiltration components +- **Operations**: space use, HVAC, and DHW components + +**Example** (`inputs/component-map.yml`): + +```yaml +Envelope: + selector: + source_fields: + - Region + - Typology + - Scenario + - Age_bracket + +Operations: + SpaceUse: + selector: + source_fields: + - Region + - Typology + - Income + - Scenario + HVAC: + selector: + source_fields: + - Region + - Typology + - Scenario + - Age_bracket + DHW: + selector: + source_fields: + - Region + - Typology +``` + +Each `source_fields` list names the semantic fields whose values are concatenated to form the lookup key in the component database. For example, an envelope lookup with `Region=TestRegion`, `Typology=Office`, `Scenario=Baseline`, `Age_bracket=Post_2000` would search for a matching entry in the database. + +For more complex models, you can nest sub-components under each category. For example, envelope can be split into `Infiltration`, `Window`, and `Assemblies`, each with their own selector: + +```yaml +Envelope: + Infiltration: + selector: + source_fields: + - Region + - TypologySpaceUse + - Weatherization + suffix: Main + Window: + selector: + source_fields: + - Region + Assemblies: + selector: + source_fields: + - Region + - TypologySpaceUse + - Age_bracket + +Operations: + SpaceUse: + Occupancy: + selector: + source_fields: + - Region + - TypologySpaceUse + Lighting: + selector: + source_fields: + - Region + - TypologySpaceUse + - Lighting + Equipment: + selector: + source_fields: + - Region + - TypologySpaceUse + Thermostat: + selector: + source_fields: + - Region + - TypologySpaceUse + - Thermostat + WaterUse: + selector: + source_fields: + - Region + - TypologySpaceUse + HVAC: + ConditioningSystems: + Heating: + selector: + source_fields: + - Region + - Heating + Cooling: + selector: + source_fields: + - Region + - Cooling + Ventilation: + selector: + source_fields: + - Region + - TypologyVentilation + - Weatherization + DHW: + selector: + source_fields: + - Region + - DHW +``` + +--- + +#### `components-lib.db` -- component database + +A SQLite database containing building component definitions (materials, assemblies, glazing, HVAC systems, schedules, etc.). This database is populated separately and is referenced by both single-building and batch simulations. + +The database uses a Prisma-managed schema with tables including: + +- **Envelope**: `ConstructionMaterial`, `ConstructionAssembly`, `ConstructionAssemblyLayer`, `GlazingConstructionSimple`, `Infiltration`, `Envelope`, `EnvelopeAssembly` +- **Operations**: `Occupancy`, `Lighting`, `Equipment`, `Thermostat`, `WaterUse`, `SpaceUse`, `ThermalSystem`, `ConditioningSystems`, `HVAC`, `Ventilation`, `DHW`, `Operations` +- **Schedule**: `Day`, `Week`, `Year` +- **Zone**: `Zone` + +Component records are keyed by concatenated semantic field values (e.g. `TestRegion_Office_Baseline_Post_2000`). + +--- + +#### `buildings.parquet` -- building footprints + +A GeoParquet file containing building footprint geometries and attributes for batch simulations. Each row represents one building. + +**Required columns** (column names are defined in `semantic-fields.yml`): + +| Column (from semantic-fields) | Description | +| ------------------------------------------ | --------------------------------------- | +| building ID column (`Building_ID_col`) | unique building identifier | +| height column (`Height_col`) | building height [m] | +| number of floors column (`Num_Floors_col`) | number of floors | +| GFA column (`GFA_col`) | gross floor area or footprint area [m2] | +| geometry | building footprint polygon | + +Additionally, the parquet must contain columns for each semantic field defined in `semantic-fields.yml` that the GIS preprocessor needs to assign to each building (e.g. `Typology`, `Age_bracket`, etc.). Fields not present in the GIS data can be set via the `scenario` field in the manifest. + +--- + +#### `gis-preprocessor.yml` -- geometry validation and defaults + +Controls how GIS building data is validated and preprocessed before simulation. All fields are optional with sensible defaults. + +| Field | Type | Default | Description | +| ------------------------------- | ------------ | ---------------------- | --------------------------------------------- | +| `cart_crs` | str | `EPSG:3857` | cartesian CRS for geometry operations | +| `min_building_area` | float | 10.0 | minimum building footprint area [m2] | +| `min_edge_length` | float | 3.0 | minimum edge length [m] | +| `max_edge_length` | float | 1000.0 | maximum edge length [m] | +| `neighbor_threshold` | float | 100.0 | distance threshold for neighbor detection [m] | +| `f2f_height` | float | 3.0 | floor-to-floor height [m] | +| `min_building_height` | float | 3.0 | minimum building height [m] | +| `max_building_height` | float | 300.0 | maximum building height [m] | +| `min_num_floors` | int | 1 | minimum number of floors | +| `max_num_floors` | int | 125 | maximum number of floors | +| `default_wwr` | float | 0.2 | default window-to-wall ratio | +| `default_num_floors` | int | 2 | default number of floors when missing | +| `default_basement` | str | `"none"` | default basement type | +| `default_attic` | str | `"none"` | default attic type | +| `default_exposed_basement_frac` | float | 0.25 | default exposed basement fraction | +| `epwzip_file` | path or null | null | override EPW file for all buildings | +| `epw_query` | str or null | `"source in ['tmyx']"` | filter for closest EPW lookup | + +**Example** (`inputs/gis-preprocessor.yml`): + +```yaml +cart_crs: EPSG:4326 +min_building_area: 10.0 +min_edge_length: 3.0 +max_edge_length: 1000.0 +neighbor_threshold: 100.0 +f2f_height: 3.0 +min_building_height: 3.0 +max_building_height: 300.0 +min_num_floors: 1 +max_num_floors: 125 +default_wwr: 0.2 +default_num_floors: 2 +default_basement: none +default_attic: none +default_exposed_basement_frac: 0.25 +epwzip_file: null +epw_query: source in ['tmyx'] +``` + +--- + +#### `hourly-data-config.yml` -- hourly output configuration + +Configures which hourly EnergyPlus output variables to report. When enabled (by setting `hourly_data_config` in the manifest), the simulation produces additional per-building time series dataframes. + +| Field | Type | Description | +| ------------- | --------- | ------------------------------------------------------------------------- | +| `data` | list[str] | EnergyPlus output variable names to report | +| `output_mode` | str | one of `"dataframes-and-filerefs"`, `"fileref-only"`, `"dataframes-only"` | + +Available hourly variables include (among others): + +- `"Zone Mean Air Temperature"` +- `"Zone Air Relative Humidity"` + +**Example** (`inputs/hourly-data-config.yml`): + +```yaml +data: + - "Zone Mean Air Temperature" + - "Zone Air Relative Humidity" + +output_mode: dataframes-and-filerefs +``` + +--- + +#### `overheating-config.yml` -- overheating analysis configuration + +Configures overheating analysis thresholds. When enabled (by setting `overheating_config` in the manifest), the simulation produces `BasicOverheating.pq` and related dataframes. + +| Field | Type | Description | +| --------------------- | ---- | --------------------------------------------------- | +| `heat_thresholds` | list | temperature thresholds for heat exceedance analysis | +| `cold_thresholds` | list | temperature thresholds for cold exceedance analysis | +| `heat_index_criteria` | dict | heat index hour limits (set to `null` to skip) | +| `thermal_comfort` | dict | thermal comfort parameters (met, clo, v) | + +**Example** (`inputs/overheating-config.yml`): + +```yaml +heat_thresholds: + - threshold: 26.0 + - threshold: 30.0 + - threshold: 35.0 +cold_thresholds: + - threshold: 10.0 + - threshold: 5.0 +heat_index_criteria: + extreme_danger_hours: null + danger_or_worse_hours: null + caution_or_worse_hours: null +thermal_comfort: + met: 1.1 + clo: 0.5 + v: 0.1 +``` + +--- + +### How inputs relate to each other + +For a **single building** simulation, the relationship is straightforward: + +``` +building.yml +├── db_file ──────────────► components-lib.db +├── semantic_fields_file ─► semantic-fields.yml +├── component_map_file ───► component-map.yml +└── epwzip_file ──────────► weather file (URL or local) +``` + +For a **batch** (manifest) simulation, the chain is: + +``` +manifest.yml +├── file_config ──────────────────► artifacts.yml +│ ├── gis_file ──────────────► buildings.parquet +│ ├── db_file ───────────────► components-lib.db +│ ├── semantic_fields_file ──► semantic-fields.yml +│ ├── component_map_file ────► component-map.yml +│ └── epwzip_file ───────────► weather file +├── gis_preprocessor_config ──────► gis-preprocessor.yml +├── hourly_data_config (optional) ► hourly-data-config.yml +└── overheating_config (optional) ► overheating-config.yml +``` + +!!! warning + + all file paths in your configs should be relative to the repository root, or use absolute paths. for dockerized runs (`make cli`), all input files must be located under the `inputs/` directory. + +--- + +### Output files + +#### Single building output + +Running `make cli-native simulate` produces the following directory structure: + +``` +outputs/ +├── ep/ # EnergyPlus working directory +│ └── eplus_simulation/ +│ └── {hash}/ # simulation run (hash of the IDF) +│ ├── Minimal.idf # generated EnergyPlus model +│ ├── *.epw # weather file used +│ ├── eplusout.csv # hourly outputs +│ ├── eplusmtr.csv # meter outputs +│ ├── eplustbl.csv # tabular summary report +│ ├── epluszsz.csv # zone sizing data +│ └── ... # other EnergyPlus artifacts +└── results/ + ├── EnergyAndPeak.parquet # main results (parquet) + ├── EnergyAndPeak.csv # flattened CSV export + └── EnergyAndPeak.xlsx # multi-sheet Excel workbook +``` + +#### Batch simulation output (from S3) + +Running `make cli-native get experiment` downloads results to: + +``` +outputs/ +└── {run_name}/ + └── {version}/ + ├── EnergyAndPeak.pq # main results (parquet) + ├── EnergyAndPeak.csv # CSV export (auto-generated) + └── EnergyAndPeak.xlsx # Excel workbook (auto-generated) +``` + +When hourly data or overheating analysis is enabled, additional dataframes are stored in S3 and can be fetched by specifying `--dataframe-key`: + +- `BasicOverheating` -- overheating hours per building +- `ExceedanceDegreeHours` -- degree hours above each threshold +- `HeatIndexCategories` -- heat index classification hours +- `ConsecutiveExceedances` -- consecutive exceedance periods +- `HourlyData.Zone_Mean_Air_Temperature` -- hourly zone temperatures +- `HourlyData.Zone_Air_Relative_Humidity` -- hourly zone humidity + +--- + +#### `EnergyAndPeak` dataframe format + +This is the primary output. It uses a multi-index column structure with four levels: + +| Level | Name | Values | +| ----- | ----------- | ------------------------------------------------------------------------------------------------------------------------------ | +| 0 | Measurement | `Energy`, `Peak` | +| 1 | Aggregation | `Raw`, `End Uses`, `Utilities` | +| 2 | Meter | `Lighting`, `Equipment`, `Domestic Hot Water`, `Heating`, `Cooling`, `ChilledWater`, `Coal`, `Electricity`, `NaturalGas`, etc. | +| 3 | Month | `1` through `12` | + +**Index** (for batch runs): a multi-index containing `building_id` and all semantic field feature columns (e.g. `feature.semantic.Typology`, `feature.semantic.Age_bracket`). + +**Units**: + +- Energy values are in **kWh/m2** +- Peak values are in **kW/m2** + +**CSV format** (flattened): + +The CSV export stacks the Month level, producing one row per month. The header is four rows deep (one per column level): + +``` + Energy Energy Energy ... Peak Peak ... + Raw Raw Raw ... Raw Raw ... + Lighting Equipment DHW ... Lighting Equipment ... +Month +1 1.588 1.860 0.432 ... 0.0065 0.0076 ... +2 1.434 1.680 0.390 ... 0.0065 0.0076 ... +... +12 1.416 1.860 0.432 ... 0.0174 0.0076 ... +``` + +**Excel format**: + +The Excel workbook contains one sheet per Measurement+Aggregation combination: + +- `Energy_Raw` -- raw energy by meter and month +- `Energy_EndUses` -- energy grouped by end use +- `Energy_Utilities` -- energy grouped by utility/fuel type +- `Peak_Raw` -- raw peak by meter and month +- `Peak_EndUses` -- peak grouped by end use +- `Peak_Utilities` -- peak grouped by utility/fuel type +- `Feature Index` -- building IDs and semantic field values (batch runs only) + +--- + +#### EnergyPlus raw outputs + +The `ep/` directory contains the raw EnergyPlus simulation artifacts for each run. Key files: + +| File | Description | +| ------------------------- | ---------------------------------------------------------------------- | +| `Minimal.idf` | the generated EnergyPlus input file | +| `*.epw` | the weather file used | +| `eplusout.csv` | all hourly output variables (temperatures, humidity, energy in Joules) | +| `eplusmtr.csv` | meter-level outputs (energy by fuel type, hourly/monthly) | +| `eplustbl.csv` | tabular summary report (annual totals, end-use breakdown) | +| `epluszsz.csv` | zone sizing data (design loads, mass flows) | +| `eplusout.eso` | EnergyPlus standard output (binary) | +| `*.eio`, `*.rdd`, `*.mdd` | variable dictionaries and metadata | + +--- + +### Quick reference + +#### Minimal single-building setup + +``` +inputs/ +├── building.yml +├── components-lib.db +├── semantic-fields.yml +└── component-map.yml +``` + +#### Minimal batch setup + +``` +inputs/ +├── manifest.yml +├── artifacts.yml +├── buildings.parquet +├── components-lib.db +├── semantic-fields.yml +├── component-map.yml +└── gis-preprocessor.yml # optional +``` + +#### Batch with hourly data and overheating + +``` +inputs/ +├── manifest.yml +├── artifacts.yml +├── buildings.parquet +├── components-lib.db +├── semantic-fields.yml +├── component-map.yml +├── gis-preprocessor.yml +├── hourly-data-config.yml +└── overheating-config.yml +``` diff --git a/docs/tutorials/run-simulations/simulate_building.md b/docs/tutorials/run-simulations/simulate_building.md index 83b9420..ed655c3 100644 --- a/docs/tutorials/run-simulations/simulate_building.md +++ b/docs/tutorials/run-simulations/simulate_building.md @@ -16,7 +16,7 @@ It assumes you have already completed the [setup guide](../getting-started/requi !!! note - the commands in this guide are the same for macOS, linux, and windows (using a unix‑like shell such as git bash or wsl). + the commands in this guide are the same for macOS, linux, and windows (using a unix-like shell such as git bash or wsl). --- @@ -77,23 +77,31 @@ exposed_basement_frac: 0.25 ### Step 2: Run the simulation -Use the `make cli simulate` command to run a single building simulation: +Use the `make cli-native simulate` command to run a single building simulation locally: ```bash -make cli simulate -- --config inputs/building.yml --output-dir outputs +make cli-native simulate -- --config inputs/building.yml --output-dir outputs ``` -**Command options**: +If you use the default paths, you can simply run: -- `--config {PATH}`: path to your building configuration YAML file (default: `inputs/building.yml`) -- `--output-dir {PATH}`: directory where simulation results will be saved (default: `outputs`) +```bash +make cli-native simulate +``` -If you use the default paths, you can simply run: +Alternatively, run via Docker: ```bash -make cli simulate +make cli simulate -- --config inputs/building.yml --output-dir outputs ``` +**Command options**: + +| Option | Type | Default | Description | +| -------------- | ---- | --------------------- | ------------------------------------------------ | +| `--config` | path | `inputs/building.yml` | path to your building configuration YAML file | +| `--output-dir` | path | `outputs` | directory where simulation results will be saved | + !!! warning **critical**: you must include the two dashes `--` after `simulate` and before any options. this separator is required to pass arguments correctly to the underlying CLI command. if you forget it, the command will fail with an error. @@ -183,13 +191,16 @@ outputs/ ```bash # run simulation with default paths (inputs/building.yml -> outputs/) -make cli simulate +make cli-native simulate # run simulation with custom config and output directory -make cli simulate -- --config inputs/my_building.yml --output-dir outputs/my_results +make cli-native simulate -- --config inputs/my_building.yml --output-dir outputs/my_results # run simulation with only custom output directory -make cli simulate -- --output-dir outputs/custom +make cli-native simulate -- --output-dir outputs/custom + +# run via docker (same options) +make cli simulate -- --config inputs/building.yml --output-dir outputs ``` ### Building configuration file structure @@ -210,8 +221,8 @@ width: 15.0 # default: 15.0 num_floors: 3 # default: 2 f2f_height: 3.5 # default: 3.0 wwr: 0.3 # default: 0.2 -basement: none # default: "none" (options: "none", "unoccupied_unconditioned", "unoccupied_conditioned", "occupied_unconditioned", "occupied_conditioned") -attic: none # default: "none" (options: "none", "unoccupied_unconditioned", "unoccupied_conditioned", "occupied_unconditioned", "occupied_conditioned") +basement: none # default: "none" +attic: none # default: "none" exposed_basement_frac: 0.25 # default: 0.25 ``` diff --git a/docs/tutorials/run-simulations/simulation_tasking.md b/docs/tutorials/run-simulations/simulation_tasking.md index c3e9191..1781dcf 100644 --- a/docs/tutorials/run-simulations/simulation_tasking.md +++ b/docs/tutorials/run-simulations/simulation_tasking.md @@ -1,6 +1,6 @@ ## Run simulations with Hatchet and Docker -This guide walks you through running `globi` simulations end‑to‑end using Hatchet and Docker. +This guide walks you through running `globi` simulations end-to-end using Hatchet and Docker. This is the workflow for running batches of buildings across a manifest configuration. It assumes you have already completed the [setup guide](../getting-started/requirements.md), including: @@ -27,7 +27,7 @@ The steps below cover: !!! note - the commands in this guide are the same for macOS, linux, and windows (using a unix‑like shell such as git bash or wsl). + the commands in this guide are the same for macOS, linux, and windows (using a unix-like shell such as git bash or wsl). --- @@ -67,7 +67,7 @@ Look for a `hatchet-lite` container with a `running` status. ### Step 2: Create and configure Hatchet environment files -Hatchet uses a client token stored in environment files that are loaded by the `make cli` (dockerized) or `make cli-native` (non-dockerized) target. +Hatchet uses a client token stored in environment files that are loaded by `make cli` (dockerized) or `make cli-native` (non-dockerized). 1. **Generate a Hatchet client token**: @@ -156,7 +156,7 @@ You should see containers for Hatchet and the simulation services with a `runnin make: *** [engine] Error 1 ``` - this is usually transient. re‑run: + this is usually transient. re-run: ```bash make engine @@ -194,7 +194,7 @@ If you do **not** see workers, refer to the troubleshooting section below. ### Step 5: Run a test simulation -Now you can submit a simulation manifest via the `make cli` (dockerized) or `make cli-native` (non-dockerized) target, which wraps the `globi` CLI with the correct environment files. +Now you can submit a simulation manifest via `make cli` (dockerized) or `make cli-native` (non-dockerized), which wraps the `globi` CLI with the correct environment files. !!! warning @@ -219,10 +219,10 @@ Now you can submit a simulation manifest via the `make cli` (dockerized) or `mak ```bash # dockerized - make cli submit manifest -- --path inputs/manifest.yml --grid-run --max-tests 100 + make cli submit manifest -- --path inputs/manifest.yml --grid-run --max-sims 100 # non-dockerized - make cli-native submit manifest -- --path inputs/manifest.yml --grid-run --max-tests 100 + make cli-native submit manifest -- --path inputs/manifest.yml --grid-run --max-sims 100 ``` !!! warning @@ -242,23 +242,25 @@ Now you can submit a simulation manifest via the `make cli` (dockerized) or `mak where: - `{PATH_TO_MANIFEST}` is your manifest file path (for example `inputs/manifest.yml`) - - `--grid-run` enables grid‑style execution over the manifest configuration + - `--grid-run` enables grid-style execution over the manifest configuration (only simulates semantic field combinations) **Optional flags**: - - `--max-tests {NUMBER}`: override the maximum number of tests in a grid run (default: 1000). example: `--max-tests 100` - - `--scenario {SCENARIO_NAME}`: override the scenario listed in the manifest file with the provided scenario - - `--skip-model-constructability-check`: skip the model constructability check (flag, no value) - - `--epwzip-file {PATH}`: override the EPWZip file listed in the manifest file with the provided EPWZip file + | Flag | Type | Description | + | ------------------------------------- | ---- | ---------------------------------------------------- | + | `--max-sims {NUMBER}` | int | override the maximum number of simulations to run | + | `--scenario {SCENARIO_NAME}` | str | override the scenario listed in the manifest file | + | `--skip-model-constructability-check` | flag | skip the model constructability check | + | `--epwzip-file {PATH}` | path | override the EPW weather file listed in the manifest | Example with multiple optional flags: ```bash # dockerized - make cli submit manifest -- --path inputs/manifest.yml --grid-run --max-tests 50 --scenario baseline + make cli submit manifest -- --path inputs/manifest.yml --grid-run --max-sims 50 --scenario baseline # non-dockerized - make cli-native submit manifest -- --path inputs/manifest.yml --grid-run --max-tests 50 --scenario baseline + make cli-native submit manifest -- --path inputs/manifest.yml --grid-run --max-sims 50 --scenario baseline ``` 4. **Monitor progress in the Hatchet UI**: @@ -266,13 +268,13 @@ Now you can submit a simulation manifest via the `make cli` (dockerized) or `mak - go to `http://localhost:8888` - navigate to **workflows** or **runs** - locate the workflow corresponding to your manifest submission - - watch status transition from `pending` → `running` → `completed` (or `failed` if there is an error) + - watch status transition from `pending` -> `running` -> `completed` (or `failed` if there is an error) -You can click into the workflow to view task‑level logs and any errors. + You can click into the workflow to view task-level logs and any errors. 5. **Note the run_name from the output**: - When the simulation completes, the CLI prints a summary with a `run_name` (for example `TestRegion/dryrun/Baseline`). **save this run_name** — you will need it to fetch results in the next step. + When the simulation completes, the CLI prints a summary with a `run_name` (for example `TestRegion/dryrun/Baseline`). **save this run_name** -- you will need it to fetch results in the next step. !!! note @@ -342,6 +344,30 @@ This command: - prints the exact location where files were saved - automatically generates CSV and Excel files for the `EnergyAndPeak` dataframe +#### Fetch overheating and other dataframes + +If your manifest includes an `overheating_config`, the run produces extra dataframes that are **not** downloaded by the default command. Request them with `--dataframe-key`: + +| dataframe key | description | +| ------------------------ | ---------------------------------------- | +| `EnergyAndPeak` | (default) monthly energy and peak demand | +| `BasicOverheating` | hours above/below temperature thresholds | +| `ExceedanceDegreeHours` | degree-hours above thresholds | +| `HeatIndexCategories` | heat index category hours | +| `ConsecutiveExceedances` | consecutive exceedance streaks (if any) | + +Example: download overheating outputs for a run that used an overheating config: + +```bash +# basic overheating (hours above threshold per building) +make cli-native get experiment -- --run-name "TestRegion_Tutorial/Baseline" --dataframe-key BasicOverheating + +# exceedance degree hours +make cli-native get experiment -- --run-name "TestRegion_Tutorial/Baseline" --dataframe-key ExceedanceDegreeHours +``` + +Files are written to `outputs/{run_name}/{version}/{dataframe_key}.pq`. Overheating dataframes are saved as parquet only (no automatic Excel/CSV). + **Example output structure**: ``` @@ -357,20 +383,20 @@ outputs/ #### Fetch a specific version and output directory -If you have multiple versions of the same run, or you want to control exactly where results are written, include `--version` and `--output_dir`: +If you have multiple versions of the same run, or you want to control exactly where results are written, include `--version` and `--output-dir`: ```bash # dockerized make cli get experiment -- \ --run-name {YOUR_RUN_NAME_HERE} \ --version {VERSION} \ - --output_dir {YOUR_CHOSEN_OUTPUT_DIR} + --output-dir {YOUR_CHOSEN_OUTPUT_DIR} # non-dockerized make cli-native get experiment -- \ --run-name {YOUR_RUN_NAME_HERE} \ --version {VERSION} \ - --output_dir {YOUR_CHOSEN_OUTPUT_DIR} + --output-dir {YOUR_CHOSEN_OUTPUT_DIR} ``` where: @@ -380,8 +406,10 @@ where: **Additional options**: -- `--dataframe-key {KEY}`: specify which dataframe to download (default: `EnergyAndPeak`). if hourly data was configured, each time series is a separate dataframe (e.g. `HourlyData.Zone_Mean_Air_Temperature`, `HourlyData.Zone_Air_Relative_Humidity`) -- `--include-csv`: include CSV export in addition to parquet (CSV is automatically included for `EnergyAndPeak` dataframe) +| Option | Type | Default | Description | +| ----------------------- | ---- | --------------- | -------------------------------------------------------------------------------------------------------------------------------------------------- | +| `--dataframe-key {KEY}` | str | `EnergyAndPeak` | which dataframe to download. if hourly data was configured, each time series is a separate dataframe (e.g. `HourlyData.Zone_Mean_Air_Temperature`) | +| `--include-csv` | flag | off | include CSV export in addition to parquet (CSV is automatically included for `EnergyAndPeak`) | **Example with all options**: @@ -390,14 +418,14 @@ where: make cli get experiment -- \ --run-name TestRegion/dryrun/Baseline \ --version 1.0.0 \ - --output_dir outputs/my_analysis \ + --output-dir outputs/my_analysis \ --include-csv # non-dockerized make cli-native get experiment -- \ --run-name TestRegion/dryrun/Baseline \ --version 1.0.0 \ - --output_dir outputs/my_analysis \ + --output-dir outputs/my_analysis \ --include-csv ``` @@ -421,7 +449,7 @@ make down This: -- stops and removes containers from `docker-compose.yml`, `docker-compose.hatchet.yml`, and `docker-compose.aws.yml` +- stops and removes containers from all compose files (including `docker-compose.st.yml` for the visualizer) - keeps docker images on disk so future runs start faster Run `make engine` again the next time you want to use the system. @@ -462,7 +490,7 @@ This section lists common issues and concrete steps to diagnose and fix them. target simulations: failed to solve: image ".../hatchet/globi:latest": already exists ``` - - simply re‑run: + - simply re-run: ```bash make engine @@ -480,7 +508,7 @@ This section lists common issues and concrete steps to diagnose and fix them. - if `hatchet-lite` fails to start because port `8080` is in use: - close any other application using port `8080` - or stop the conflicting container/process - - then re‑run `make hatchet-lite` or `make engine` + - then re-run `make hatchet-lite` or `make engine` --- @@ -515,7 +543,7 @@ This section lists common issues and concrete steps to diagnose and fix them. ``` - check for worker containers in `docker compose ... ps` - - open Hatchet UI → **workers** and verify that they show as healthy + - open Hatchet UI -> **workers** and verify that they show as healthy - if workers crash repeatedly, inspect their logs using `docker compose ... logs ` --- @@ -556,7 +584,7 @@ This section lists common issues and concrete steps to diagnose and fix them. - **jobs stuck in `pending`** - - check that workers are running (Hatchet UI → **workers**) + - check that workers are running (Hatchet UI -> **workers**) - confirm worker containers are healthy with `docker compose ... ps` - inspect worker logs for errors (for example configuration or connectivity issues) @@ -576,7 +604,7 @@ This section lists common issues and concrete steps to diagnose and fix them. - **`module not found` or missing dependency** - - re‑sync dependencies: + - re-sync dependencies: ```bash uv sync --all-extras --all-groups @@ -617,10 +645,11 @@ make engine # submit a simulation manifest (note the -- separator is required!) # dockerized -make cli submit manifest -- --path inputs/manifest.yml --grid-run --max-tests 100 +make cli submit manifest -- --path inputs/manifest.yml --grid-run --max-sims 100 # non-dockerized -make cli-native submit manifest -- --path inputs/manifest.yml --grid-run --max-tests 100 +make cli-native submit manifest -- --path inputs/manifest.yml --grid-run --max-sims 100 +# fetch experiment results # dockerized make cli get experiment -- --run-name {YOUR_RUN_NAME_HERE} # non-dockerized @@ -631,7 +660,7 @@ make down # open hatchet ui open http://localhost:8888 # macos -# or manually paste http://localhost:8080 into your browser +# or manually paste http://localhost:8888 into your browser ``` ### Key file locations diff --git a/docs/tutorials/visualization/visualization.md b/docs/tutorials/visualization/visualization.md new file mode 100644 index 0000000..9195c83 --- /dev/null +++ b/docs/tutorials/visualization/visualization.md @@ -0,0 +1,241 @@ +## Visualization engine + +The globi visualization engine is a Streamlit app for exploring and analyzing simulation results. It provides interactive charts, 3D building maps, and purpose-built analysis views for retrofit comparison, overheating assessment, and scenario comparison. + +It assumes you have already completed the [setup guide](../getting-started/requirements.md) and have simulation results available either locally or in S3. + +--- + +### Before you start + +- **simulation results**: you need at least one completed simulation run with output parquet files (see [simulate a building](../run-simulations/simulate_building.md) or [simulation tasking](../run-simulations/simulation_tasking.md)) +- **terminal location**: run commands from the repository root + +--- + +### Starting the visualizer + +=== "Native (local)" + + ```bash + make viz-native + ``` + + This runs `streamlit run src/globi/tools/visualization/main.py` with the required environment files loaded. The app opens in your browser automatically. + +=== "Docker" + + ```bash + make viz + ``` + + This builds and starts the visualizer container via Docker Compose, including the `docker-compose.st.yml` configuration. + +Once running, the app is available at the URL printed in the terminal (typically `http://localhost:8501`). + +--- + +### Data sources + +The sidebar lets you choose between two data sources: + +#### Local + +Point the app at a local directory containing simulation output folders. Each subfolder that contains `.pq` or `.parquet` files is treated as a separate run. + +- **default directory**: `outputs` +- **optional**: place a `buildings.parquet` file in the `inputs/` directory with building location data (latitude, longitude, geometry) to enable 3D map visualizations + +#### S3 + +Connect directly to your S3 experiment storage. The app lists available experiments and lets you pick a run name, version, and dataframe key. + +**Required environment variables** (set in `.env.*.aws` and `.env.scythe.storage`): + +- `SCYTHE_STORAGE_BUCKET`: the S3 bucket name +- `SCYTHE_STORAGE_BUCKET_PREFIX` (optional): prefix within the bucket + +AWS credentials must be configured for S3 access. + +--- + +### Pages + +The app has three pages, accessible from the navigation menu. + +--- + +#### Overview + +The landing page. It describes the available data sources, how to use the app, and what file formats are expected. Use this as a reference when first opening the visualizer. + +--- + +#### Raw Data Visualization + +Explore the output of any individual simulation run. Select a run from the dropdown and the app loads the corresponding parquet file. + +The behavior depends on the file format detected: + +##### Results / EnergyAndPeak format + +For files with the standard multi-index column structure (Measurement, Aggregation, Meter, Month), two tabs are available: + +**Summary tab**: + +| Chart | Description | +| ---------------------- | ----------------------------------------------------------------- | +| EUI histogram | distribution of energy use intensity across buildings | +| Peak demand histogram | distribution of peak demand across buildings | +| End-use pie chart | breakdown of energy by end use (heating, cooling, lighting, etc.) | +| Utilities pie chart | breakdown of energy by fuel type (electricity, gas, etc.) | +| Monthly EUI by end use | stacked bar chart showing monthly energy by end use | +| Monthly EUI by utility | stacked bar chart showing monthly energy by fuel type | + +**Map tab**: + +A 3D pydeck map showing buildings as extruded polygons. Requires building geometry data (either embedded in the parquet or from `inputs/buildings.parquet`). Color can be mapped to: + +- EUI (energy use intensity) +- total energy +- peak demand per sqm +- total peak demand + +##### Generic parquet format + +For any other parquet file, the app provides: + +- column selection with automatic numeric/categorical detection +- D3 histograms for numeric columns +- summary statistics grouped by a categorical column +- configurable value and category layers from the column structure + +##### Export + +All charts can be exported as: + +- **CSV**: raw data behind the chart +- **HTML**: interactive standalone chart +- **PNG**: static image (requires Playwright: `playwright install chromium`) + +--- + +#### Use Cases + +Purpose-built analysis views for common workflows. Select the use case type from the sidebar. + +--- + +##### Retrofit analysis + +Compare energy, cost, and emissions across two or more scenarios (e.g. baseline vs. retrofit). Requires at least two runs loaded from the data source. + +**Configuration** (sidebar): + +- select baseline and retrofit scenario(s) +- enter per-scenario energy costs ($/kWh per fuel type) +- enter per-scenario emissions factors (kgCO2/kWh per fuel type) +- enter system costs per sqm ($/m2) for each scenario +- assign display names to each scenario + +**Visualizations**: + +| Chart | Description | +| -------------------- | ---------------------------------------------------------------------- | +| EUI KDE plot | kernel density estimate comparing EUI distributions across scenarios | +| End-use stacked bars | energy breakdown by end use, per scenario | +| Fuel stacked bars | energy breakdown by fuel type, per scenario | +| Cost bar chart | total energy cost by scenario | +| Emissions bar chart | total emissions by scenario | +| 3D building map | buildings colored by selected metric (EUI, peak, percent change, etc.) | + +The map supports switching between metrics and adjusting elevation scale, radius, and view parameters (zoom, pitch, bearing). + +--- + +##### Overheating analysis + +Visualize overheating risk across buildings on a 3D map. Requires that the simulation produced a `BasicOverheating.pq` file (enabled via hourly data configuration in the manifest). + +**Configuration** (sidebar): + +- temperature threshold: 26, 30, or 35 degrees C +- aggregation method: zone-weighted average or worst zone + +**Visualization**: + +- 3D pydeck map with buildings colored by overheating hours above the selected threshold +- configurable elevation scale and view parameters +- hover tooltips showing building-level overheating details + +!!! note + + if the selected run does not contain a `BasicOverheating.pq` file, the overheating use case will not be available. make sure hourly data output is enabled in your simulation configuration. + +--- + +##### Scenario comparison + +A lightweight comparison between two or more scenarios without cost/emissions data. Useful for quickly comparing energy profiles across different simulation configurations. + +**Configuration** (sidebar): + +- select the scenarios to compare +- assign display names + +**Visualizations**: + +| Chart | Description | +| -------------------- | ---------------------------------------- | +| EUI KDE plot | distribution comparison across scenarios | +| End-use stacked bars | energy by end use per scenario | +| Fuel stacked bars | energy by fuel type per scenario | + +--- + +### Supported data formats + +| File | Structure | Used by | +| -------------------------- | ----------------------------------------------------------- | --------------------------------------- | +| `EnergyAndPeak.pq` | multi-index columns: Measurement, Aggregation, Meter, Month | raw data, retrofit, scenario comparison | +| `Results.pq` | same structure as EnergyAndPeak (legacy name) | raw data, retrofit, scenario comparison | +| `BasicOverheating.pq` | overheating hours per building per zone | overheating analysis | +| generic `.pq` / `.parquet` | any flat or index-flattened parquet | raw data (generic mode) | +| `buildings.parquet` | building locations with lat/lon and geometry | 3D map views | + +--- + +### Troubleshooting + +- **no runs found**: ensure your output directory contains subfolders with `.pq` or `.parquet` files. the app scans recursively for these. + +- **map not showing**: 3D maps require building geometry data. either the parquet file must contain `latitude`, `longitude`, and `rotated_rectangle` columns, or you must have an `inputs/buildings.parquet` file with this data that can be joined on `building_id`. + +- **PNG export fails**: PNG export uses Playwright for headless browser rendering. install it with: + + ```bash + playwright install chromium + ``` + +- **S3 connection errors**: verify your AWS credentials are configured and the environment variables `SCYTHE_STORAGE_BUCKET` (and optionally `SCYTHE_STORAGE_BUCKET_PREFIX`) are set in your env files. + +- **streamlit not found**: re-sync dependencies: + + ```bash + uv sync --all-extras --all-groups + ``` + +--- + +### Quick reference + +```bash +# start visualizer locally +make viz-native + +# start visualizer via docker +make viz + +# stop all docker services (including visualizer) +make down +``` diff --git a/mkdocs.yml b/mkdocs.yml index c8663a7..37ec6f6 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -17,6 +17,9 @@ nav: - Run Simulations: - Simulation Tasking: tutorials/run-simulations/simulation_tasking.md - Simulate Building: tutorials/run-simulations/simulate_building.md + - Inputs and Outputs: tutorials/run-simulations/inputs_and_outputs.md + - Visualization: + - Visualization Engine: tutorials/visualization/visualization.md - Reference: - CLI: reference/cli.md From abca5e7464f437a4d7dd7a38739154ee5be521a5 Mon Sep 17 00:00:00 2001 From: daryaguettler Date: Wed, 11 Mar 2026 14:23:01 -0700 Subject: [PATCH 2/3] make minor UI updates to visualizations --- src/globi/tools/visualization/data_sources.py | 86 +++- src/globi/tools/visualization/main.py | 1 + src/globi/tools/visualization/models.py | 14 +- src/globi/tools/visualization/plotting.py | 206 ++++++++ src/globi/tools/visualization/results_data.py | 64 ++- src/globi/tools/visualization/utils.py | 357 +++++++++++-- .../tools/visualization/views/use_cases.py | 473 ++++++++++++++---- 7 files changed, 1048 insertions(+), 153 deletions(-) diff --git a/src/globi/tools/visualization/data_sources.py b/src/globi/tools/visualization/data_sources.py index e68434b..6b416d6 100644 --- a/src/globi/tools/visualization/data_sources.py +++ b/src/globi/tools/visualization/data_sources.py @@ -17,8 +17,12 @@ ) from globi.tools.visualization.utils import ( build_overheating_map_df, + build_overheating_summary_df, find_output_run_dirs, + get_overheating_thresholds, get_pq_file_for_run, + list_overheating_files_for_run, + load_heat_index_summary_for_chart, load_output_table, run_has_overheating, ) @@ -173,16 +177,42 @@ def list_runs_with_overheating(self) -> list[str]: """List run ids that have overheating outputs. Override for support.""" return [] + def list_overheating_files(self, run_id: str) -> list[str]: + """List available overheating df keys for a run. Override for support.""" + return [] + def load_overheating_map_data( self, run_id: str, cart_crs: str = "EPSG:3857", heat_threshold_c: float = 26.0, aggregation: str = "Zone Weighted", + data_source_type: str = "BasicOverheating", + heat_index_metric: str = "danger_hours", ) -> pd.DataFrame | None: """Load map-ready overheating data for a run. Override for support.""" return None + def load_overheating_heat_index_summary( + self, + run_id: str, + aggregation: str = "Zone Weighted", + ) -> dict[str, float] | None: + """Load HeatIndexCategories summary for stacked bar. Override for support.""" + return None + + def get_overheating_thresholds(self, run_id: str) -> list[float]: + """Get available heat thresholds for a run. Override for support.""" + return [26.0, 30.0, 35.0] + + def load_overheating_summary( + self, + run_id: str, + aggregation: str = "Zone Weighted", + ) -> pd.DataFrame | None: + """Load summary stats across all buildings for heatmap. Override for support.""" + return None + @classmethod def from_config(cls, config: DataSourceConfig) -> DataSource: """Factory method to create appropriate data source.""" @@ -246,18 +276,30 @@ def load_building_locations(self) -> pd.DataFrame | None: return pd.DataFrame(gdf.drop(columns=["geometry"], errors="ignore")) def list_runs_with_overheating(self) -> list[str]: - """List run ids that have BasicOverheating output.""" + """List run ids that have any overheating output.""" self.list_available_runs() return [rid for rid, d in self._run_dirs.items() if run_has_overheating(d)] + def list_overheating_files(self, run_id: str) -> list[str]: + """List available overheating df keys for a run.""" + run_dir = self._run_dirs.get(run_id) + if run_dir is None: + self.list_available_runs() + run_dir = self._run_dirs.get(run_id) + if run_dir is None: + return [] + return list_overheating_files_for_run(run_dir) + def load_overheating_map_data( self, run_id: str, cart_crs: str = "EPSG:3857", heat_threshold_c: float = 26.0, aggregation: str = "Zone Weighted", + data_source_type: str = "BasicOverheating", + heat_index_metric: str = "danger_hours", ) -> pd.DataFrame | None: - """Load map-ready overheating data (geometry + hours above threshold).""" + """Load map-ready overheating data (geometry + selected metric).""" run_dir = self._run_dirs.get(run_id) if run_dir is None: self.list_available_runs() @@ -269,8 +311,48 @@ def load_overheating_map_data( cart_crs=cart_crs, heat_threshold_c=heat_threshold_c, aggregation=aggregation, + data_source_type=data_source_type, + heat_index_metric=heat_index_metric, ) + def load_overheating_heat_index_summary( + self, + run_id: str, + aggregation: str = "Zone Weighted", + ) -> dict[str, float] | None: + """Load HeatIndexCategories summary (hours by category) for stacked bar chart.""" + run_dir = self._run_dirs.get(run_id) + if run_dir is None: + self.list_available_runs() + run_dir = self._run_dirs.get(run_id) + if run_dir is None: + return None + return load_heat_index_summary_for_chart(run_dir, aggregation=aggregation) + + def get_overheating_thresholds(self, run_id: str) -> list[float]: + """Get available heat thresholds from the data.""" + run_dir = self._run_dirs.get(run_id) + if run_dir is None: + self.list_available_runs() + run_dir = self._run_dirs.get(run_id) + if run_dir is None: + return [26.0, 30.0, 35.0] + return get_overheating_thresholds(run_dir) + + def load_overheating_summary( + self, + run_id: str, + aggregation: str = "Zone Weighted", + ) -> pd.DataFrame | None: + """Load summary stats across all buildings for heatmap.""" + run_dir = self._run_dirs.get(run_id) + if run_dir is None: + self.list_available_runs() + run_dir = self._run_dirs.get(run_id) + if run_dir is None: + return None + return build_overheating_summary_df(run_dir, aggregation=aggregation) + class S3DataSource(DataSource): """Data source for S3-stored experiment results.""" diff --git a/src/globi/tools/visualization/main.py b/src/globi/tools/visualization/main.py index ce072f2..44ddfb1 100644 --- a/src/globi/tools/visualization/main.py +++ b/src/globi/tools/visualization/main.py @@ -13,6 +13,7 @@ st.set_page_config(page_title="GLOBI Visualization", layout="wide") st.title("GLOBI Visualization") +# update the icon of the webpage to be a globe data_source = render_data_source_sidebar() st.session_state["data_source"] = data_source diff --git a/src/globi/tools/visualization/models.py b/src/globi/tools/visualization/models.py index 1a65b73..6b5234c 100644 --- a/src/globi/tools/visualization/models.py +++ b/src/globi/tools/visualization/models.py @@ -127,19 +127,19 @@ class RetrofitUseCaseConfig(BaseModel): class RetrofitCostParams(BaseModel): - """User-configurable retrofit cost and emissions parameters.""" + """User-configurable retrofit cost and emissions parameters (per scenario).""" - energy_cost_factors: dict[str, float] = Field( + per_scenario_energy_costs: dict[str, dict[str, float]] = Field( default_factory=dict, - description="Cost per kWh ($/kWh) by fuel type.", + description="Per-scenario energy cost factors: scenario -> fuel -> $/kWh.", ) - emissions_factors: dict[str, float] = Field( + per_scenario_emissions: dict[str, dict[str, float]] = Field( default_factory=dict, - description="Emissions factor (kg CO2/kWh) by fuel type.", + description="Per-scenario emissions factors: scenario -> fuel -> kg CO2/kWh.", ) - unit_costs: dict[str, float] = Field( + system_costs_per_sqm: dict[str, float] = Field( default_factory=dict, - description="Capital cost per scenario (scenario name -> total $).", + description="System cost per scenario in $/m², applied per building by conditioned area.", ) diff --git a/src/globi/tools/visualization/plotting.py b/src/globi/tools/visualization/plotting.py index 99647c8..5bad819 100644 --- a/src/globi/tools/visualization/plotting.py +++ b/src/globi/tools/visualization/plotting.py @@ -987,6 +987,212 @@ def create_comparison_stacked_bar_d3_html( return dedent(html) +_METRIC_GROUP_INTERPOLATORS = { + "Basic": "interpolateReds", + "EDH": "interpolateYlOrRd", + "HeatIndex": "interpolateOrRd", +} +_METRIC_GROUP_UNITS = { + "Basic": "hr", + "EDH": "degC-hr", + "HeatIndex": "hr", +} +_METRIC_GROUP_CSS_GRADIENTS = { + "Basic": "linear-gradient(to right, #fff5f0, #fb6a4a, #a50f15)", + "EDH": "linear-gradient(to right, #ffffcc, #fd8d3c, #bd0026)", + "HeatIndex": "linear-gradient(to right, #fff7ec, #fc8d59, #7f0000)", +} + + +def _classify_metric_group(col_name: str) -> str: + """Map a column name like 'Basic 25.0C' to its metric group key.""" + if col_name.startswith("Basic"): + return "Basic" + if col_name.startswith("EDH"): + return "EDH" + return "HeatIndex" + + +def create_overheating_heatmap_d3_html( + df: pd.DataFrame, + row_col: str = "statistic", + theme: Theme = "light", +) -> str: + """Build D3 heatmap of summary stats x overheating metrics. + + Each metric group (Basic, EDH, HeatIndex) gets its own color palette and + is independently normalized, since they have different units. + """ + c = _theme_colors(theme) + value_cols = [ + col + for col in df.columns + if col != row_col and pd.api.types.is_numeric_dtype(df[col]) + ] + if not value_cols: + return "
no numeric columns
" + + rows = df[row_col].astype(str).tolist() + values = df[value_cols].fillna(0).values.tolist() + + # per-group normalization: all columns in a group share the same max + col_groups = [_classify_metric_group(vc) for vc in value_cols] + group_maxes: dict[str, float] = {} + for vc, grp in zip(value_cols, col_groups, strict=True): + mx = float(df[vc].max()) + group_maxes[grp] = max(group_maxes.get(grp, 0), mx) + # map each column to its group max + col_maxes = [max(group_maxes.get(grp, 1), 1e-9) for grp in col_groups] + # map each column to its interpolator name + col_interps = [ + _METRIC_GROUP_INTERPOLATORS.get(grp, "d3.interpolateReds") for grp in col_groups + ] + + payload = { + "rows": rows, + "cols": value_cols, + "values": values, + "col_maxes": col_maxes, + "col_interps": col_interps, + } + data_json = json.dumps(payload, ensure_ascii=False) + + # build legend html: one gradient bar per group present + seen_groups = dict.fromkeys(col_groups) + legend_parts: list[str] = [] + for grp in seen_groups: + gradient = _METRIC_GROUP_CSS_GRADIENTS.get( + grp, _METRIC_GROUP_CSS_GRADIENTS["Basic"] + ) + unit = _METRIC_GROUP_UNITS.get(grp, "") + mx = group_maxes.get(grp, 0) + legend_parts.append( + f'
' + f'{grp}' + f'0' + f'
' + f'{mx:,.1f} {unit}' + f"
" + ) + legend_html_str = "".join(legend_parts) + + html = f""" + + + + + Overheating summary heatmap + + + + + +
+
+ {legend_html_str} +
+ + + + """ + return dedent(html) + + def create_comparison_bar_d3_html( data: dict, value_key: str, diff --git a/src/globi/tools/visualization/results_data.py b/src/globi/tools/visualization/results_data.py index aa50eda..a7ccaf8 100644 --- a/src/globi/tools/visualization/results_data.py +++ b/src/globi/tools/visualization/results_data.py @@ -230,6 +230,24 @@ def _get_per_building_utilities(df: pd.DataFrame) -> pd.DataFrame | None: return cast(pd.DataFrame, result) +def _get_total_conditioned_area(df: pd.DataFrame) -> float: + """Sum conditioned area (m²) across all buildings from index.""" + area_name = "feature.geometry.energy_model_conditioned_area" + if area_name not in (df.index.names or []): + return 0.0 + level = list(df.index.names).index(area_name) + areas = df.index.get_level_values(level) + total = 0.0 + for a in areas: + try: + v = float(a) + if v > 0: + total += v + except (TypeError, ValueError): + pass + return total + + def compute_per_building_cost_emissions( df: pd.DataFrame, energy_cost_factors: dict[str, float], @@ -259,7 +277,7 @@ def build_retrofit_map_df( df: pd.DataFrame, energy_cost_factors: dict[str, float], emissions_factors: dict[str, float], - unit_cost: float = 0.0, + system_cost_per_sqm: float = 0.0, cart_crs: str = "EPSG:3857", ) -> pd.DataFrame | None: """Build map-ready df with geometry and retrofit metrics (eui, energy_cost, emissions, etc).""" @@ -297,69 +315,79 @@ def build_retrofit_map_df( geo_df["energy_cost"] = 0.0 geo_df["emissions"] = 0.0 - n = len(geo_df) - geo_df["capital_cost"] = (unit_cost / n) if n > 0 and unit_cost else 0.0 + if "conditioned_area" in geo_df.columns and system_cost_per_sqm: + geo_df["capital_cost"] = geo_df["conditioned_area"] * system_cost_per_sqm + else: + geo_df["capital_cost"] = 0.0 geo_df["total_cost"] = geo_df["energy_cost"] + geo_df["capital_cost"] return geo_df def compute_retrofit_cost_emissions( dfs: dict[str, pd.DataFrame], - energy_cost_factors: dict[str, float], - emissions_factors: dict[str, float], - unit_costs: dict[str, float] | None = None, + per_scenario_energy_costs: dict[str, dict[str, float]], + per_scenario_emissions: dict[str, dict[str, float]], + system_costs_per_sqm: dict[str, float] | None = None, ) -> tuple[dict[str, dict[str, float]], dict[str, dict[str, float]], dict[str, float]]: """Compute energy cost and emissions by scenario from Utilities consumption. + Each scenario can have its own energy cost and emissions factors. + Returns: cost_by_fuel: scenario -> fuel -> $ (annual energy cost) emissions_by_fuel: scenario -> fuel -> kg CO2 - capital_costs: scenario -> $ (from unit_costs) + capital_costs: scenario -> $ (system_cost_per_sqm * total area) """ cost_by_fuel: dict[str, dict[str, float]] = {} emissions_by_fuel: dict[str, dict[str, float]] = {} - capital_costs: dict[str, float] = dict(unit_costs or {}) + capital_costs: dict[str, float] = {} for scenario_name, df in dfs.items(): utilities = _get_utilities_kwh_by_fuel(df) + energy_cost_factors = per_scenario_energy_costs.get(scenario_name, {}) + em_factors = per_scenario_emissions.get(scenario_name, {}) cost_by_fuel[scenario_name] = {} emissions_by_fuel[scenario_name] = {} for meter, kwh in utilities.items(): fuel_key = normalize_fuel_name(meter) cost_factor = energy_cost_factors.get(fuel_key, 0.0) - emissions_factor = emissions_factors.get(fuel_key, 0.0) + emissions_factor = em_factors.get(fuel_key, 0.0) cost_by_fuel[scenario_name][meter] = kwh * cost_factor emissions_by_fuel[scenario_name][meter] = kwh * emissions_factor + if system_costs_per_sqm: + cost_per_sqm = system_costs_per_sqm.get(scenario_name, 0.0) + total_area = _get_total_conditioned_area(df) + capital_costs[scenario_name] = cost_per_sqm * total_area + return cost_by_fuel, emissions_by_fuel, capital_costs def extract_retrofit_comparison_data( dfs: dict[str, pd.DataFrame], region_name: str = "", - energy_cost_factors: dict[str, float] | None = None, - emissions_factors: dict[str, float] | None = None, - unit_costs: dict[str, float] | None = None, + per_scenario_energy_costs: dict[str, dict[str, float]] | None = None, + per_scenario_emissions: dict[str, dict[str, float]] | None = None, + system_costs_per_sqm: dict[str, float] | None = None, ) -> dict: - """Extract comparison data with optional cost and emissions. + """Extract comparison data with optional per-scenario cost and emissions. Merges extract_comparison_data output with cost_data, emissions_data, cost_by_fuel, emissions_by_fuel when factors are provided. """ base = extract_comparison_data(dfs, region_name) - if energy_cost_factors or emissions_factors: + if per_scenario_energy_costs or per_scenario_emissions: cost_by_fuel, emissions_by_fuel, capital = compute_retrofit_cost_emissions( dfs, - energy_cost_factors or {}, - emissions_factors or {}, - unit_costs, + per_scenario_energy_costs or {}, + per_scenario_emissions or {}, + system_costs_per_sqm, ) base["cost_by_fuel"] = cost_by_fuel base["emissions_by_fuel"] = emissions_by_fuel base["capital_costs"] = capital - # totals for bar chart base["cost_totals"] = { s: sum(cf.values()) + capital.get(s, 0) for s, cf in cost_by_fuel.items() } diff --git a/src/globi/tools/visualization/utils.py b/src/globi/tools/visualization/utils.py index 6812a8f..53dec44 100644 --- a/src/globi/tools/visualization/utils.py +++ b/src/globi/tools/visualization/utils.py @@ -75,17 +75,67 @@ def find_output_run_dirs(base_dir: Path | str) -> list[Path]: return sorted(seen) -OVERHEATING_PQ_NAMES = ("BasicOverheating.pq", "BasicOverheating.parquet") +OVERHEATING_DF_KEYS = ( + "BasicOverheating", + "ExceedanceDegreeHours", + "HeatIndexCategories", +) +OVERHEATING_FILE_MAP = { + "BasicOverheating": ("BasicOverheating.pq", "BasicOverheating.parquet"), + "ExceedanceDegreeHours": ( + "ExceedanceDegreeHours.pq", + "ExceedanceDegreeHours.parquet", + ), + "HeatIndexCategories": ("HeatIndexCategories.pq", "HeatIndexCategories.parquet"), +} def run_has_overheating(run_dir: Path) -> bool: - """True if run directory contains overheating output (BasicOverheating).""" - return any((run_dir / name).is_file() for name in OVERHEATING_PQ_NAMES) + """True if run directory contains any overheating output.""" + return any( + (run_dir / name).is_file() + for names in OVERHEATING_FILE_MAP.values() + for name in names + ) + +def list_overheating_files_for_run(run_dir: Path) -> list[str]: + """Return list of available overheating df keys (e.g. BasicOverheating, ExceedanceDegreeHours).""" + available: list[str] = [] + for key, names in OVERHEATING_FILE_MAP.items(): + if any((run_dir / n).is_file() for n in names): + available.append(key) + return available -def get_overheating_file_for_run(run_dir: Path) -> Path | None: - """Return BasicOverheating file path if present.""" - for name in OVERHEATING_PQ_NAMES: + +def get_overheating_thresholds(run_dir: Path) -> list[float]: + """Read available heat thresholds from BasicOverheating (or ExceedanceDegreeHours).""" + for key in ("BasicOverheating", "ExceedanceDegreeHours"): + oh_path = get_overheating_file_for_run(run_dir, key) + if oh_path is None: + continue + df = load_output_table(oh_path) + flat = df.reset_index() + thresh_col = _find_col(flat, "Threshold [degC]") + polarity_col = _find_col(flat, "Polarity") + if thresh_col is None: + continue + if polarity_col is not None: + flat = flat[flat[polarity_col] == "Overheat"] + vals = sorted(pd.Series(flat[thresh_col]).dropna().unique().tolist()) + if vals: + return vals + return [26.0, 30.0, 35.0] + + +def get_overheating_file_for_run( + run_dir: Path, df_key: str = "BasicOverheating" +) -> Path | None: + """Return overheating file path for given df_key if present.""" + names = OVERHEATING_FILE_MAP.get( + df_key, ("BasicOverheating.pq", "BasicOverheating.parquet") + ) + for name in names: p = run_dir / name if p.is_file(): return p @@ -429,6 +479,7 @@ def build_map_df_from_output( # noqa: C901 LON_COL: float(lon), ROTATED_RECTANGLE_COL: wkt, "height": height_m, + "conditioned_area": area, "eui": eui, "peak_per_sqm": peak_per_sqm, "total_energy": total_energy, @@ -451,24 +502,117 @@ def build_map_df_from_output( # noqa: C901 return out +def _extract_basic_overheating( + oh_flat: pd.DataFrame, + bid_col, + heat_threshold_c: float, + aggregation: str, +) -> pd.DataFrame | None: + """Extract building-level overheating hours from BasicOverheating flat df.""" + polarity_col = _find_col(oh_flat, "Polarity") + thresh_col = _find_col(oh_flat, "Threshold [degC]") + agg_col = _find_col(oh_flat, "Aggregation Unit") + group_col = _find_col(oh_flat, "Group") + val_col = "Total Hours [hr]" if "Total Hours [hr]" in oh_flat.columns else None + if not all([polarity_col, thresh_col, agg_col, group_col, val_col]): + return None + mask = ( + (oh_flat[polarity_col] == "Overheat") + & (oh_flat[thresh_col] == heat_threshold_c) + & (oh_flat[agg_col] == "Building") + & (oh_flat[group_col] == aggregation) + ) + oh_sub = oh_flat.loc[mask, [bid_col, val_col]].drop_duplicates(subset=[bid_col]) + oh_sub = oh_sub.rename(columns={val_col: "map_value"}) + return oh_sub + + +def _extract_exceedance_degree_hours( + oh_flat: pd.DataFrame, + bid_col, + heat_threshold_c: float, + aggregation: str, +) -> pd.DataFrame | None: + """Extract building-level EDH from ExceedanceDegreeHours flat df.""" + polarity_col = _find_col(oh_flat, "Polarity") + thresh_col = _find_col(oh_flat, "Threshold [degC]") + agg_col = _find_col(oh_flat, "Aggregation Unit") + group_col = _find_col(oh_flat, "Group") + val_col = "EDH [degC-hr]" if "EDH [degC-hr]" in oh_flat.columns else None + if not all([polarity_col, thresh_col, agg_col, group_col, val_col]): + return None + mask = ( + (oh_flat[polarity_col] == "Overheat") + & (oh_flat[thresh_col] == heat_threshold_c) + & (oh_flat[agg_col] == "Building") + & (oh_flat[group_col] == aggregation) + ) + oh_sub = oh_flat.loc[mask, [bid_col, val_col]].drop_duplicates(subset=[bid_col]) + oh_sub = oh_sub.rename(columns={val_col: "map_value"}) + return oh_sub + + +def _extract_heat_index_categories( + oh_flat: pd.DataFrame, + bid_col, + aggregation: str, + metric: str, +) -> pd.DataFrame | None: + """Extract building-level heat index metric from HeatIndexCategories flat df.""" + agg_col = _find_col(oh_flat, "Aggregation Unit") + group_col = _find_col(oh_flat, "Group") + if agg_col is None or group_col is None: + return None + # map UI aggregation to HeatIndex Group values + group_map = {"Zone Weighted": "Zone Weighted", "Worst Zone": "Worst per Timestep"} + group_val = group_map.get(aggregation, aggregation) + mask = (oh_flat[agg_col] == "Building") & (oh_flat[group_col] == group_val) + hi_sub = oh_flat.loc[mask].copy() + if hi_sub.empty: + return None + danger_cols = [ + c + for c in hi_sub.columns + if c + in ( + "Extreme Danger [hr]", + "Danger [hr]", + "Extreme Caution [hr]", + "Caution [hr]", + ) + ] + if metric == "danger_hours" and danger_cols: + hi_sub["map_value"] = hi_sub[danger_cols].sum(axis=1) + elif metric in hi_sub.columns: + hi_sub["map_value"] = hi_sub[metric] + else: + return None + oh_sub = hi_sub[[bid_col, "map_value"]].drop_duplicates(subset=[bid_col]) + return oh_sub + + def build_overheating_map_df( run_dir: Path, cart_crs: str = "EPSG:3857", heat_threshold_c: float = 26.0, aggregation: str = "Zone Weighted", + data_source_type: str = "BasicOverheating", + heat_index_metric: str = "danger_hours", ) -> pd.DataFrame | None: - """Build map-ready df with overheating hours per building. + """Build map-ready df with overheating metric per building. - Merges BasicOverheating (hours above threshold) with EnergyAndPeak geometry. - Returns df with lat, lon, rotated_rectangle, height, overheating_hours. + Merges overheating data with EnergyAndPeak geometry. Returns df with lat, lon, + rotated_rectangle, height, map_value. Args: - run_dir: Run directory containing BasicOverheating and EnergyAndPeak. + run_dir: Run directory containing overheating and EnergyAndPeak files. cart_crs: CRS for rotated_rectangle. - heat_threshold_c: Overheating threshold (default 26C). - aggregation: "Zone Weighted" or "Worst Zone". + heat_threshold_c: Overheating threshold (BasicOverheating, ExceedanceDegreeHours). + aggregation: Zone Weighted, Worst Zone, etc. + data_source_type: BasicOverheating, ExceedanceDegreeHours, or HeatIndexCategories. + heat_index_metric: For HeatIndexCategories: danger_hours or column name. """ - oh_path = get_overheating_file_for_run(run_dir) + oh_path = get_overheating_file_for_run(run_dir, data_source_type) energy_path = get_pq_file_for_run(run_dir) if oh_path is None or energy_path is None: return None @@ -485,29 +629,186 @@ def build_overheating_map_df( if bid_col is None: return None - polarity_col = _find_col(oh_flat, "Polarity") - thresh_col = _find_col(oh_flat, "Threshold [degC]") - agg_col = _find_col(oh_flat, "Aggregation Unit") - group_col = _find_col(oh_flat, "Group") - val_col = "Total Hours [hr]" if "Total Hours [hr]" in oh_flat.columns else None - if not all([polarity_col, thresh_col, agg_col, group_col, val_col]): + if data_source_type == "BasicOverheating": + oh_sub = _extract_basic_overheating( + oh_flat, bid_col, heat_threshold_c, aggregation + ) + elif data_source_type == "ExceedanceDegreeHours": + oh_sub = _extract_exceedance_degree_hours( + oh_flat, bid_col, heat_threshold_c, aggregation + ) + elif data_source_type == "HeatIndexCategories": + oh_sub = _extract_heat_index_categories( + oh_flat, bid_col, aggregation, heat_index_metric + ) + else: return None - mask = ( - (oh_flat[polarity_col] == "Overheat") - & (oh_flat[thresh_col] == heat_threshold_c) - & (oh_flat[agg_col] == "Building") - & (oh_flat[group_col] == aggregation) - ) - oh_sub = oh_flat.loc[mask, [bid_col, val_col]].drop_duplicates(subset=[bid_col]) - oh_sub = oh_sub.rename(columns={val_col: "overheating_hours"}) - oh_sub[bid_col] = oh_sub[bid_col].astype(str) + if oh_sub is None or oh_sub.empty: + return None + oh_sub[bid_col] = oh_sub[bid_col].astype(str) geo_df[BUILDING_ID_COL] = geo_df[BUILDING_ID_COL].astype(str) merged = geo_df.merge(oh_sub, on=BUILDING_ID_COL, how="inner") return merged if not merged.empty else None +def _load_one_overheating_metric( + run_dir: Path, + df_key: str, + bid_col: str, + heat_threshold_c: float, + aggregation: str, +) -> pd.DataFrame | None: + """Load one overheating metric as building_id + value df.""" + oh_path = get_overheating_file_for_run(run_dir, df_key) + if not oh_path: + return None + df = load_output_table(oh_path) + flat = df.reset_index() + bid = _find_col(flat, bid_col) + if not bid: + return None + if df_key == "BasicOverheating": + sub = _extract_basic_overheating(flat, bid, heat_threshold_c, aggregation) + col_name = "BasicOverheating_hr" + elif df_key == "ExceedanceDegreeHours": + sub = _extract_exceedance_degree_hours(flat, bid, heat_threshold_c, aggregation) + col_name = "ExceedanceDegreeHours" + elif df_key == "HeatIndexCategories": + sub = _extract_heat_index_categories(flat, bid, aggregation, "danger_hours") + col_name = "HeatIndex_danger_hr" + else: + return None + if sub is None: + return None + sub = sub.rename(columns={"map_value": col_name}) + sub[bid] = sub[bid].astype(str) + return sub + + +def _summarize_values(vals) -> dict[str, float]: + """Compute mean, median, p95, max from a numeric array.""" + import numpy as np + + return { + "mean": float(np.mean(vals)), + "median": float(np.median(vals)), + "p95": float(np.percentile(vals, 95)), + "max": float(np.max(vals)), + } + + +def _build_basic_edh_records( + run_dir: Path, + available: list[str], + thresholds: list[float], + aggregation: str, +) -> dict[str, dict[str, float]]: + records: dict[str, dict[str, float]] = {} + for df_key in ("BasicOverheating", "ExceedanceDegreeHours"): + if df_key not in available: + continue + label_prefix = "Basic" if df_key == "BasicOverheating" else "EDH" + for thresh in thresholds: + sub = _load_one_overheating_metric( + run_dir, df_key, BUILDING_ID_COL, thresh, aggregation + ) + if sub is None or sub.empty: + continue + vals = sub.iloc[:, -1].dropna().values + if len(vals) > 0: + records[f"{label_prefix} {thresh}C"] = _summarize_values(vals) + return records + + +def _build_heat_index_record( + run_dir: Path, + available: list[str], + aggregation: str, +) -> dict[str, dict[str, float]] | None: + if "HeatIndexCategories" not in available: + return None + sub = _load_one_overheating_metric( + run_dir, "HeatIndexCategories", BUILDING_ID_COL, 0.0, aggregation + ) + if sub is None or sub.empty: + return None + vals = sub.iloc[:, -1].dropna().values + if len(vals) == 0: + return None + return {"HeatIndex discomfort": _summarize_values(vals)} + + +def build_overheating_summary_df( + run_dir: Path, + aggregation: str = "Zone Weighted", +) -> pd.DataFrame | None: + """Build summary stats (mean, median, max, p95) per metric and threshold. + + Rows = stat names, columns = metric/threshold combos. Suitable for heatmap. + """ + available = list_overheating_files_for_run(run_dir) + if not available: + return None + + thresholds = get_overheating_thresholds(run_dir) + records: dict[str, dict[str, float]] = {} + records.update( + _build_basic_edh_records(run_dir, available, thresholds, aggregation) + ) + hi_rec = _build_heat_index_record(run_dir, available, aggregation) + if hi_rec: + records.update(hi_rec) + + if not records: + return None + + df = pd.DataFrame(records) + df.index.name = "statistic" + return df.reset_index() + + +def load_heat_index_summary_for_chart( + run_dir: Path, + aggregation: str = "Zone Weighted", +) -> dict[str, float] | None: + """Load HeatIndexCategories and return summed hours by category for stacked bar. + + Returns dict like {"Extreme Danger [hr]": 0, "Danger [hr]": 10, ...}. + """ + oh_path = get_overheating_file_for_run(run_dir, "HeatIndexCategories") + if oh_path is None: + return None + oh_df = load_output_table(oh_path) + oh_flat = oh_df.reset_index() + agg_col = _find_col(oh_flat, "Aggregation Unit") + group_col = _find_col(oh_flat, "Group") + if agg_col is None or group_col is None: + return None + group_map = {"Zone Weighted": "Zone Weighted", "Worst Zone": "Worst per Timestep"} + group_val = group_map.get(aggregation, aggregation) + mask = (oh_flat[agg_col] == "Building") & (oh_flat[group_col] == group_val) + hi_sub = oh_flat.loc[mask] + if hi_sub.empty: + return None + cat_cols = [ + c + for c in hi_sub.columns + if c + in ( + "Extreme Danger [hr]", + "Danger [hr]", + "Extreme Caution [hr]", + "Caution [hr]", + "Normal [hr]", + ) + ] + if not cat_cols: + return None + return hi_sub[cat_cols].sum().to_dict() + + def merge_with_building_locations( # noqa: C901 df: pd.DataFrame, locations_df: pd.DataFrame, diff --git a/src/globi/tools/visualization/views/use_cases.py b/src/globi/tools/visualization/views/use_cases.py index 0f2b2bc..4d5d118 100644 --- a/src/globi/tools/visualization/views/use_cases.py +++ b/src/globi/tools/visualization/views/use_cases.py @@ -2,6 +2,8 @@ from __future__ import annotations +from typing import cast + import pandas as pd import streamlit as st import streamlit.components.v1 as components @@ -9,9 +11,12 @@ from globi.tools.visualization.data_sources import DataSource from globi.tools.visualization.models import UseCaseType from globi.tools.visualization.plotting import ( + Theme, create_comparison_bar_d3_html, create_comparison_kde_d3_html, create_comparison_stacked_bar_d3_html, + create_histogram_d3_html, + create_overheating_heatmap_d3_html, ) from globi.tools.visualization.results_data import ( apply_scenario_display_names, @@ -20,6 +25,7 @@ is_results_format, normalize_fuel_name, ) +from globi.tools.visualization.views.raw_data import _chart_download, _streamlit_theme def render_use_cases_page(data_source: DataSource) -> None: @@ -46,55 +52,141 @@ def render_use_cases_page(data_source: DataSource) -> None: _DEFAULT_EMISSIONS = (0.4, 0.2, 0.27, 0.23) +def _build_eui_csv(comparison_data: dict) -> pd.DataFrame: + """Build csv-friendly dataframe from eui distribution data.""" + eui = comparison_data.get("eui_data", {}) + if not eui: + return pd.DataFrame() + max_len = max((len(v) for v in eui.values()), default=0) + return pd.DataFrame({k: v + [None] * (max_len - len(v)) for k, v in eui.items()}) + + +def _build_stacked_csv(comparison_data: dict, data_key: str) -> pd.DataFrame: + """Build csv-friendly dataframe from stacked bar data.""" + data = comparison_data.get(data_key, {}) + if not data: + return pd.DataFrame() + df = pd.DataFrame(data).T.fillna(0) + df.index.name = "scenario" + return df.reset_index() + + +def _build_totals_csv( + comparison_data: dict, + value_key: str, + label: str = "value", +) -> pd.DataFrame: + """Build csv-friendly dataframe from scenario totals.""" + scenarios = comparison_data.get("scenarios", []) + values = comparison_data.get(value_key, {}) + if not values: + return pd.DataFrame() + return pd.DataFrame({ + "scenario": [s for s in scenarios if s in values], + label: [values[s] for s in scenarios if s in values], + }) + + +def _uniquify_display_names( + run_ids, + raw_names: dict[str, str], +) -> dict[str, str]: + """Build run_id -> unique display name mapping, appending (n) on collisions.""" + seen: dict[str, int] = {} + out: dict[str, str] = {} + for rid in run_ids: + d = (raw_names.get(rid, rid) or "").strip() or rid + if d in seen: + seen[d] += 1 + d = f"{d} ({seen[d]})" + else: + seen[d] = 1 + out[rid] = d + return out + + def _retrofit_params_form( selected_runs: list[str], -) -> tuple[dict, dict, dict | None, bool]: - """Render retrofit cost/emissions form, return factors and unit costs.""" - st.markdown("**Energy cost factors** ($/kWh by fuel type)") - energy_cost_factors = {} - emissions_factors = {} - ec_cols = st.columns(4) - for i, (label, default) in enumerate( - zip(_FUEL_LABELS, _DEFAULT_ENERGY_COSTS, strict=True) - ): - with ec_cols[i % 4]: - key = normalize_fuel_name(label) - energy_cost_factors[key] = st.number_input( - label, - min_value=0.0, - value=default, - format="%.3f", - key=f"ec_{key}", +) -> tuple[ + dict[str, dict[str, float]], + dict[str, dict[str, float]], + dict[str, float], + dict[str, str], +]: + """Render per-scenario cost/emissions/system-cost form. + + Returns (per_scenario_energy_costs, per_scenario_emissions, + system_costs_per_sqm, display_names). All dicts keyed by run_id. + """ + per_scenario_energy_costs: dict[str, dict[str, float]] = {} + per_scenario_emissions: dict[str, dict[str, float]] = {} + system_costs_per_sqm: dict[str, float] = {} + display_names: dict[str, str] = {} + + for run_id in selected_runs: + with st.expander( + f"Parameters: {run_id}", + expanded=(run_id == selected_runs[0]), + ): + val = st.text_input( + "Display name", + value=run_id, + key=f"retrofit_display_{run_id}", + placeholder=run_id, + ) + display_names[run_id] = (val.strip() or run_id) if val else run_id + + st.markdown("**Energy cost factors** ($/kWh by fuel type)") + ec_factors: dict[str, float] = {} + ec_cols = st.columns(4) + for i, (label, default) in enumerate( + zip(_FUEL_LABELS, _DEFAULT_ENERGY_COSTS, strict=True) + ): + with ec_cols[i % 4]: + key = normalize_fuel_name(label) + ec_factors[key] = st.number_input( + label, + min_value=0.0, + value=default, + format="%.3f", + key=f"ec_{key}_{run_id}", + ) + per_scenario_energy_costs[run_id] = ec_factors + + st.markdown("**Emissions factors** (kg CO2/kWh by fuel type)") + em_factors: dict[str, float] = {} + em_cols = st.columns(4) + for i, (label, default) in enumerate( + zip(_FUEL_LABELS, _DEFAULT_EMISSIONS, strict=True) + ): + with em_cols[i % 4]: + key = normalize_fuel_name(label) + em_factors[key] = st.number_input( + label, + min_value=0.0, + value=default, + format="%.3f", + key=f"em_{key}_{run_id}", + ) + per_scenario_emissions[run_id] = em_factors + + st.markdown( + "**System cost** ($/m² applied per building by conditioned area)" ) - st.markdown("**Emissions factors** (kg CO2/kWh by fuel type)") - em_cols = st.columns(4) - for i, (label, default) in enumerate( - zip(_FUEL_LABELS, _DEFAULT_EMISSIONS, strict=True) - ): - with em_cols[i % 4]: - key = normalize_fuel_name(label) - emissions_factors[key] = st.number_input( - label, + system_costs_per_sqm[run_id] = st.number_input( + "System cost ($/m²)", min_value=0.0, - value=default, - format="%.3f", - key=f"em_{key}", + value=0.0, + format="%.2f", + key=f"syscost_{run_id}", ) - st.markdown("**Unit costs** (capital cost $ per scenario, optional)") - use_unit_costs = st.checkbox("Include capital costs per scenario", value=False) - unit_costs: dict[str, float] = {} - if use_unit_costs: - uc_cols = st.columns(min(4, len(selected_runs))) - for i, run_id in enumerate(selected_runs): - with uc_cols[i % 4]: - unit_costs[run_id] = st.number_input( - run_id, - min_value=0.0, - value=0.0, - format="%.0f", - key=f"uc_{run_id}", - ) - return energy_cost_factors, emissions_factors, unit_costs or None, use_unit_costs + + return ( + per_scenario_energy_costs, + per_scenario_emissions, + system_costs_per_sqm, + display_names, + ) def _render_retrofit_use_case(data_source: DataSource) -> None: @@ -102,7 +194,8 @@ def _render_retrofit_use_case(data_source: DataSource) -> None: st.markdown("### Retrofit Analysis") st.markdown( "Compare retrofit scenarios with energy savings, costs, and emissions. " - "Enter unit costs, emissions factors, and energy cost factors to see cost/emissions comparison." + "Each scenario has its own energy cost factors, emissions factors, and " + "system cost ($/m², applied per building by conditioned area)." ) available_runs = data_source.list_available_runs() @@ -118,9 +211,12 @@ def _render_retrofit_use_case(data_source: DataSource) -> None: ) with st.expander("Retrofit cost and emissions parameters", expanded=True): - energy_cost_factors, emissions_factors, unit_costs, use_unit_costs = ( - _retrofit_params_form(selected_runs) - ) + ( + per_scenario_energy_costs, + per_scenario_emissions, + system_costs_per_sqm, + display_names, + ) = _retrofit_params_form(selected_runs) if len(selected_runs) < 2: st.info("Select at least 2 scenarios to generate a comparison.") @@ -146,34 +242,55 @@ def _render_retrofit_use_case(data_source: DataSource) -> None: st.error("Could not load enough valid scenarios for comparison.") return + # build unique display-name mapping (disambiguate duplicates) + name_map = _uniquify_display_names(dfs.keys(), display_names) + + # remap all dicts to display names so charts/map use them throughout + dfs = {name_map[k]: v for k, v in dfs.items()} + per_scenario_energy_costs = { + name_map.get(k, k): v for k, v in per_scenario_energy_costs.items() + } + per_scenario_emissions = { + name_map.get(k, k): v for k, v in per_scenario_emissions.items() + } + system_costs_per_sqm = { + name_map.get(k, k): v for k, v in system_costs_per_sqm.items() + } + with st.spinner("Building comparison dashboard..."): comparison_data = extract_retrofit_comparison_data( dfs, region_name="", - energy_cost_factors=energy_cost_factors, - emissions_factors=emissions_factors, - unit_costs=unit_costs if use_unit_costs else None, + per_scenario_energy_costs=per_scenario_energy_costs, + per_scenario_emissions=per_scenario_emissions, + system_costs_per_sqm=system_costs_per_sqm, ) _render_retrofit_charts( comparison_data, dfs=dfs, - energy_cost_factors=energy_cost_factors, - emissions_factors=emissions_factors, - unit_costs=unit_costs if use_unit_costs else None, + per_scenario_energy_costs=per_scenario_energy_costs, + per_scenario_emissions=per_scenario_emissions, + system_costs_per_sqm=system_costs_per_sqm, ) def _render_retrofit_charts( comparison_data: dict, dfs: dict[str, pd.DataFrame] | None = None, - energy_cost_factors: dict[str, float] | None = None, - emissions_factors: dict[str, float] | None = None, - unit_costs: dict[str, float] | None = None, + per_scenario_energy_costs: dict[str, dict[str, float]] | None = None, + per_scenario_emissions: dict[str, dict[str, float]] | None = None, + system_costs_per_sqm: dict[str, float] | None = None, ) -> None: """Render retrofit comparison charts (EUI, end uses, fuel, cost, emissions) and map.""" st.markdown("#### EUI distribution comparison") kde_html = create_comparison_kde_d3_html(comparison_data) components.html(kde_html, height=360, scrolling=False) + _chart_download( + "retro_kde", + _build_eui_csv(comparison_data).to_csv(index=False), + kde_html, + "eui_distribution", + ) col_left, col_right = st.columns(2) with col_left: @@ -185,6 +302,12 @@ def _render_retrofit_charts( title="end uses comparison", ) components.html(eu_html, height=360, scrolling=False) + _chart_download( + "retro_eu", + _build_stacked_csv(comparison_data, "end_uses_data").to_csv(index=False), + eu_html, + "end_uses", + ) with col_right: st.markdown("#### Fuel/utilities comparison") fuel_html = create_comparison_stacked_bar_d3_html( @@ -194,9 +317,15 @@ def _render_retrofit_charts( title="fuel/utilities comparison", ) components.html(fuel_html, height=360, scrolling=False) + _chart_download( + "retro_fuel", + _build_stacked_csv(comparison_data, "utilities_data").to_csv(index=False), + fuel_html, + "fuel_utilities", + ) if comparison_data.get("cost_totals"): - st.markdown("#### Total cost comparison (energy + capital)") + st.markdown("#### Total cost comparison (energy + system)") cost_data = { "scenarios": comparison_data["scenarios"], "cost_totals": comparison_data["cost_totals"], @@ -208,6 +337,14 @@ def _render_retrofit_charts( value_label="annual cost ($)", ) components.html(cost_html, height=200, scrolling=False) + _chart_download( + "retro_cost", + _build_totals_csv(comparison_data, "cost_totals", "annual_cost_usd").to_csv( + index=False + ), + cost_html, + "total_cost", + ) if comparison_data.get("emissions_totals"): st.markdown("#### Total emissions comparison") @@ -222,21 +359,29 @@ def _render_retrofit_charts( value_label="kg CO2/year", ) components.html(em_html, height=200, scrolling=False) + _chart_download( + "retro_em", + _build_totals_csv( + comparison_data, "emissions_totals", "kg_co2_per_year" + ).to_csv(index=False), + em_html, + "emissions", + ) - if dfs and energy_cost_factors is not None and emissions_factors is not None: + if dfs and per_scenario_energy_costs and per_scenario_emissions: _render_retrofit_map( dfs, - energy_cost_factors, - emissions_factors, - unit_costs or {}, + per_scenario_energy_costs, + per_scenario_emissions, + system_costs_per_sqm or {}, ) def _render_retrofit_map( dfs: dict[str, pd.DataFrame], - energy_cost_factors: dict[str, float], - emissions_factors: dict[str, float], - unit_costs: dict[str, float], + per_scenario_energy_costs: dict[str, dict[str, float]], + per_scenario_emissions: dict[str, dict[str, float]], + system_costs_per_sqm: dict[str, float], ) -> None: """Render pydeck map with selectable metric and colormap.""" from globi.tools.visualization.plotting import create_building_map_deck @@ -260,7 +405,7 @@ def _render_retrofit_map( ("total_energy", "viridis", "Total energy (kWh)"), ("energy_cost", "reds", "Energy cost ($)"), ("emissions", "reds", "Emissions (kg CO2)"), - ("capital_cost", "plasma", "Capital cost ($)"), + ("capital_cost", "plasma", "System cost ($)"), ("total_cost", "reds", "Total cost ($)"), ("peak_per_sqm", "reds", "Peak per sqm (kW/m²)"), ("total_peak", "plasma", "Total peak (kW)"), @@ -284,12 +429,14 @@ def _render_retrofit_map( key="retrofit_map_crs", ) - unit_cost = unit_costs.get(scenario, 0.0) + scenario_ec = per_scenario_energy_costs.get(scenario, {}) + scenario_em = per_scenario_emissions.get(scenario, {}) + scenario_syscost = system_costs_per_sqm.get(scenario, 0.0) map_df = build_retrofit_map_df( dfs[scenario], - energy_cost_factors, - emissions_factors, - unit_cost=unit_cost, + scenario_ec, + scenario_em, + system_cost_per_sqm=scenario_syscost, cart_crs=cart_crs, ) if map_df is None or map_df.empty: @@ -321,35 +468,68 @@ def _render_retrofit_map( _render_colormap_legend(metric_label, value_stats, cmap) -def _render_overheating_use_case(data_source: DataSource) -> None: - """Render overheating map: pydeck reds map highlighting buildings with higher overheating.""" - st.markdown("### Overheating Analysis") - st.markdown( - "Identify buildings at risk of overheating. Requires runs with overheating " - "outputs (manifest with calculate_overheating: true)." - ) +_HEAT_INDEX_METRICS = [ + ("danger_hours", "Total discomfort hours (Danger + Caution + etc)"), + ("Extreme Danger [hr]", "Extreme Danger [hr]"), + ("Danger [hr]", "Danger [hr]"), + ("Extreme Caution [hr]", "Extreme Caution [hr]"), + ("Caution [hr]", "Caution [hr]"), + ("Normal [hr]", "Normal [hr]"), +] - runs_with_oh = data_source.list_runs_with_overheating() - if not runs_with_oh: - st.warning( - "No runs with overheating outputs found. Enable overheating in your " - "manifest (calculate_overheating: true) and re-run simulations." - ) - return - selected_run = st.selectbox( - "Select Run", - options=runs_with_oh, - key="overheating_run", +def _render_overheating_summary_charts( + map_df: pd.DataFrame, + data_source_type: str, + heat_threshold: float, + heat_index_metric: str, + theme: Theme, +) -> None: + """Render histogram with download.""" + map_values = map_df["map_value"].dropna().tolist() + if not map_values: + return + if data_source_type == "BasicOverheating": + x_label = f"Hours above {heat_threshold}C" + elif data_source_type == "ExceedanceDegreeHours": + x_label = f"Degree-hours above {heat_threshold}C" + else: + x_label = heat_index_metric.replace("_", " ").title() + hist_html = create_histogram_d3_html( + map_values, title="Distribution", x_label=x_label, theme=theme + ) + components.html(hist_html, height=320, scrolling=False) + hist_df = pd.DataFrame({ + "building_id": map_df["building_id"], + "value": map_df["map_value"], + }) + _chart_download( + "oh_hist", + hist_df.to_csv(index=False), + hist_html, + "overheating_distribution", ) + +def _overheating_form_controls( + available_files: list[str], + thresholds: list[float], +) -> tuple[str, float, str, str, str] | None: + """Render overheating form, return (data_source_type, threshold, aggregation, metric, crs) or None.""" + data_source_type = st.selectbox( + "Data source", + options=available_files, + format_func=lambda x: x.replace("_", " "), + key="overheating_data_source", + ) col1, col2 = st.columns(2) with col1: heat_threshold = st.selectbox( "Temperature threshold (C)", - options=[26.0, 30.0, 35.0], + options=thresholds, index=0, key="overheating_threshold", + disabled=(data_source_type == "HeatIndexCategories"), ) with col2: aggregation = st.selectbox( @@ -358,23 +538,86 @@ def _render_overheating_use_case(data_source: DataSource) -> None: index=0, key="overheating_aggregation", ) - + heat_index_metric = "danger_hours" + if data_source_type == "HeatIndexCategories": + heat_index_metric = st.selectbox( + "Metric", + options=[m[0] for m in _HEAT_INDEX_METRICS], + format_func=lambda x: next( + (m[1] for m in _HEAT_INDEX_METRICS if m[0] == x), x + ), + index=0, + key="overheating_heat_index_metric", + ) cart_crs = st.selectbox( "Polygon CRS (rotated_rectangle)", options=["EPSG:3857", "EPSG:32633", "EPSG:32632", "EPSG:4326"], index=0, key="overheating_crs", ) - if not st.button("Show Overheating Map", key="overheating_map_btn"): + return None + return data_source_type, heat_threshold, aggregation, heat_index_metric, cart_crs + + +def _render_overheating_use_case(data_source: DataSource) -> None: + """Render overheating analysis: map, summary stats, and D3 charts.""" + st.markdown("### Overheating Analysis") + st.markdown( + "Identify buildings at risk of overheating. Supports BasicOverheating, " + "ExceedanceDegreeHours, and HeatIndexCategories outputs." + ) + + runs_with_oh = data_source.list_runs_with_overheating() + if not runs_with_oh: + st.warning( + "No runs with overheating outputs found. Enable overheating in your " + "manifest (overheating_config) and re-run simulations." + ) return + selected_run = st.selectbox( + "Select Run", + options=runs_with_oh, + key="overheating_run", + ) + available_files = data_source.list_overheating_files(selected_run) + if not available_files: + st.warning("No overheating parquet files found for this run.") + return + + # summary heatmap: aggregate stats across all buildings and thresholds + theme = cast(Theme, _streamlit_theme()) + summary_df = data_source.load_overheating_summary( + selected_run, aggregation="Zone Weighted" + ) + if summary_df is not None and not summary_df.empty: + st.markdown("#### Overheating summary across metrics and thresholds") + st.caption( + "Summary statistics (mean, median, p95, max) across all buildings. " + "Each column is independently color-scaled." + ) + heatmap_html = create_overheating_heatmap_d3_html(summary_df, theme=theme) + components.html(heatmap_html, height=480, scrolling=False) + heatmap_csv = summary_df.to_csv(index=False) + _chart_download("oh_heatmap", heatmap_csv, heatmap_html, "overheating_summary") + + thresholds = data_source.get_overheating_thresholds(selected_run) + form_result = _overheating_form_controls(available_files, thresholds) + if form_result is None: + return + data_source_type, heat_threshold, aggregation, heat_index_metric, cart_crs = ( + form_result + ) + with st.spinner("Loading overheating data..."): map_df = data_source.load_overheating_map_data( selected_run, cart_crs=cart_crs, heat_threshold_c=heat_threshold, aggregation=aggregation, + data_source_type=data_source_type, + heat_index_metric=heat_index_metric, ) if map_df is None or map_df.empty: @@ -384,10 +627,27 @@ def _render_overheating_use_case(data_source: DataSource) -> None: from globi.tools.visualization.plotting import create_building_map_deck from globi.tools.visualization.views.raw_data import _render_colormap_legend + theme = cast(Theme, _streamlit_theme()) + + st.markdown("#### Summary statistics") + _render_overheating_summary_charts( + map_df, data_source_type, heat_threshold, heat_index_metric, theme + ) + + # map + metric_label = "map value" + if data_source_type == "BasicOverheating": + metric_label = f"Hours above {heat_threshold}C" + elif data_source_type == "ExceedanceDegreeHours": + metric_label = f"Degree-hours above {heat_threshold}C" + else: + metric_label = heat_index_metric.replace("_", " ").title() + + st.markdown("#### Building map") result = create_building_map_deck( map_df, cart_crs=cart_crs, - value_col="overheating_hours", + value_col="map_value", cmap="reds", ) if result is None: @@ -395,16 +655,11 @@ def _render_overheating_use_case(data_source: DataSource) -> None: return deck, n_features, value_stats = result - st.markdown("#### Overheating hours above threshold") st.pydeck_chart(deck) st.caption(f"{n_features} buildings displayed") if value_stats: - _render_colormap_legend( - f"Hours above {heat_threshold}C", - value_stats, - "reds", - ) + _render_colormap_legend(metric_label, value_stats, "reds") def _render_scenario_comparison(data_source: DataSource) -> None: @@ -472,6 +727,12 @@ def _render_scenario_comparison(data_source: DataSource) -> None: st.markdown("#### EUI distribution comparison") kde_html = create_comparison_kde_d3_html(comparison_data) components.html(kde_html, height=360, scrolling=False) + _chart_download( + "sc_kde", + _build_eui_csv(comparison_data).to_csv(index=False), + kde_html, + "eui_distribution", + ) # end uses and utilities side by side col_left, col_right = st.columns(2) @@ -484,6 +745,14 @@ def _render_scenario_comparison(data_source: DataSource) -> None: title="end uses comparison", ) components.html(eu_html, height=360, scrolling=False) + _chart_download( + "sc_eu", + _build_stacked_csv(comparison_data, "end_uses_data").to_csv( + index=False + ), + eu_html, + "end_uses", + ) with col_right: st.markdown("#### Fuel/utilities comparison") fuel_html = create_comparison_stacked_bar_d3_html( @@ -493,3 +762,11 @@ def _render_scenario_comparison(data_source: DataSource) -> None: title="fuel/utilities comparison", ) components.html(fuel_html, height=360, scrolling=False) + _chart_download( + "sc_fuel", + _build_stacked_csv(comparison_data, "utilities_data").to_csv( + index=False + ), + fuel_html, + "fuel_utilities", + ) From 8893d9775ad88b93ab087b49bc88177b7840e0c3 Mon Sep 17 00:00:00 2001 From: daryaguettler Date: Thu, 12 Mar 2026 21:04:09 +0900 Subject: [PATCH 3/3] add caching --- src/globi/tools/visualization/plotting.py | 94 ++++-- src/globi/tools/visualization/utils.py | 274 ++++++++++++++---- .../tools/visualization/views/raw_data.py | 38 ++- .../tools/visualization/views/use_cases.py | 38 ++- 4 files changed, 352 insertions(+), 92 deletions(-) diff --git a/src/globi/tools/visualization/plotting.py b/src/globi/tools/visualization/plotting.py index 5bad819..1e77b70 100644 --- a/src/globi/tools/visualization/plotting.py +++ b/src/globi/tools/visualization/plotting.py @@ -2,6 +2,7 @@ from __future__ import annotations +import contextlib import json import math from itertools import pairwise @@ -1626,10 +1627,43 @@ def create_building_map_deck( ) if features is None: return None + return _deck_from_features(features, config, cmap) + + +def create_building_map_deck_from_cache( + geometry: list[dict], + map_df: pd.DataFrame, + value_col: str | None, + cmap: str = "viridis", + config: Building3DConfig | None = None, +) -> tuple[pdk.Deck, int, dict | None] | None: + """Build pydeck deck from cached geometry and map_df. No WKT parsing. + + Use when geometry and map_df are already computed (e.g. from prior run/CRS + selection). Only adds the selected metric for coloring. + """ + if len(geometry) != len(map_df): + return None + features = [] + for i, feat in enumerate(geometry): + f = {"polygon": feat["polygon"], "height": feat["height"]} + if value_col and value_col in map_df.columns: + v = map_df.iloc[i][value_col] + if v == v and v is not None: + with contextlib.suppress(TypeError, ValueError): + f["value"] = float(v) + features.append(f) + return _deck_from_features(features, config, cmap) + + +def _deck_from_features( + features: list[dict], + config: Building3DConfig | None, + cmap: str, +) -> tuple[pdk.Deck, int, dict | None]: + """Create deck and stats from features (polygon, height, value).""" vals = [f["value"] for f in features if "value" in f and f["value"] is not None] - value_stats = None - if vals: - value_stats = {"min": min(vals), "max": max(vals)} + value_stats = {"min": min(vals), "max": max(vals)} if vals else None config = config or Building3DConfig(elevation_scale=1.0) deck = create_polygon_layer_chart( features, @@ -1665,17 +1699,28 @@ def create_polygon_layer_chart( v_min = min(vals) if vals else 0.0 v_max = max(vals) if vals else 1.0 span = v_max - v_min if v_max > v_min else 1.0 + default_color = [*list(config.fill_color[:3]), 160] + # build minimal layer data: polygon, height, color, value (for tooltip only) + layer_data: list[dict[str, Any]] = [] for f in features: if value_key in f and f[value_key] is not None: t = (float(f[value_key]) - v_min) / span - f["color"] = _colormap_color(cmap, t) + color = _colormap_color(cmap, t) else: - f["color"] = [*list(config.fill_color[:3]), 160] + color = default_color + row: dict[str, Any] = { + "polygon": f["polygon"], + "height": f["height"], + "color": color, + } + if value_key in f and f[value_key] is not None: + row["value"] = f[value_key] + layer_data.append(row) layer = pdk.Layer( "PolygonLayer", - data=features, + data=layer_data, get_polygon="polygon", get_elevation="height", elevation_scale=config.elevation_scale, @@ -1687,30 +1732,15 @@ def create_polygon_layer_chart( ) # derive a reasonable center/zoom from feature polygons - lons: list[float] = [] - lats: list[float] = [] - for f in features: - for x, y in f["polygon"]: - lons.append(float(x)) - lats.append(float(y)) - - if lons and lats: + all_coords = [(float(x), float(y)) for f in layer_data for x, y in f["polygon"]] + if all_coords: + lons, lats = zip(*all_coords, strict=True) lon_center = sum(lons) / len(lons) lat_center = sum(lats) / len(lats) - lon_span = max(lons) - min(lons) - lat_span = max(lats) - min(lats) - span = max(lon_span, lat_span) - if span < 0.005: - zoom = 15 - elif span < 0.02: - zoom = 14 - elif span < 0.05: - zoom = 13 - else: - zoom = 12 + span = max(max(lons) - min(lons), max(lats) - min(lats)) + zoom = 15 if span < 0.005 else 14 if span < 0.02 else 13 if span < 0.05 else 12 else: - lon_center = 0.0 - lat_center = 0.0 + lon_center = lat_center = 0.0 zoom = 0.8 view_state = pdk.ViewState( @@ -1794,15 +1824,23 @@ def extract_building_polygons( msg = f"No height column '{height_col}' found" raise ValueError(msg) + from pyproj import Transformer + rect_series = df_reset[ROTATED_RECTANGLE_COL] height_series = df_reset[height_col].astype("float64") + transformer = Transformer.from_crs(cart_crs, "EPSG:4326", always_xy=True) polygons: list[list[list[float]]] = [] heights: list[float] = [] values: list[float | None] = [] for i, wkt_value in enumerate(rect_series): - poly_lonlat = transform_rotated_rectangle_to_latlon(wkt_value, cart_crs) + wkt_str = ( + getattr(wkt_value, "wkt", wkt_value) if wkt_value is not None else None + ) + poly_lonlat = transform_rotated_rectangle_to_latlon( + wkt_str or "", cart_crs, _transformer=transformer + ) if not poly_lonlat: continue diff --git a/src/globi/tools/visualization/utils.py b/src/globi/tools/visualization/utils.py index 53dec44..e3b3daf 100644 --- a/src/globi/tools/visualization/utils.py +++ b/src/globi/tools/visualization/utils.py @@ -243,6 +243,107 @@ def _find_col(df: pd.DataFrame, name: str): HEIGHT_FALLBACK_COLS = ("num_floors", "f2f_height") +def _geom_to_polygon_coords(geom) -> list[list[float]] | None: + """Extract exterior coords from Polygon or MultiPolygon as [[lon, lat], ...].""" + from shapely.geometry import MultiPolygon, Polygon + + if isinstance(geom, Polygon): + coords = list(geom.exterior.coords) + elif isinstance(geom, MultiPolygon): + poly = max(geom.geoms, key=lambda g: g.area) + coords = list(poly.exterior.coords) + else: + return None + if len(coords) < 3: + return None + return [[float(x), float(y)] for x, y in coords] + + +def _compute_heights_vectorized( + sub: pd.DataFrame, + df_flat: pd.DataFrame, + has_height: bool, + default_height_m: float, +) -> pd.Series: + """Compute height series for vectorized features.""" + nf_col = _find_col(df_flat, "num_floors") + f2f_col = _find_col(df_flat, "f2f_height") + f2f_default = 3.0 + if f2f_col is not None and f2f_col in df_flat.columns: + f2f_vals = sub[f2f_col].apply( + lambda v: float(v) if v == v and v is not None else f2f_default + ) + else: + f2f_vals = pd.Series(f2f_default, index=sub.index) + + if has_height: + heights = sub["height"].astype(float, errors="ignore") + heights = heights.where((heights > 0) & heights.notna(), default_height_m) + elif nf_col is not None and nf_col in sub.columns: + heights = (sub[nf_col].astype(float, errors="ignore") * f2f_vals).fillna( + default_height_m + ) + else: + heights = pd.Series(default_height_m, index=sub.index) + return heights.clip(lower=0.01).fillna(default_height_m) # type: ignore[return-value] + + +def _build_map_features_vectorized( + df_flat: pd.DataFrame, + rect_col: str, + cart_crs: str, + default_height_m: float, + has_height: bool, + has_num_floors: bool, + value_col: str | None, +) -> list[dict] | None: + """Vectorized path: batch parse WKT and transform via geopandas.""" + import contextlib + + import geopandas as gpd + + wkt_series = df_flat[rect_col].apply( + lambda v: getattr(v, "wkt", v) if v is not None else None + ) + valid_mask = wkt_series.apply(lambda s: isinstance(s, str)) + if not bool(valid_mask.any()): + return None + + sub = df_flat.loc[valid_mask].copy() + wkt_valid = wkt_series.loc[valid_mask].astype(str) + + try: + gs = gpd.GeoSeries.from_wkt(wkt_valid, crs=cart_crs, on_invalid="ignore") + except Exception: + return None + + gs_wgs = gs.to_crs("EPSG:4326") + valid_geom = ~gs_wgs.is_empty & gs_wgs.geom_type.isin(["Polygon", "MultiPolygon"]) + if not bool(valid_geom.any()): + return None + + sub = sub.loc[valid_geom] + gs_wgs = gs_wgs.loc[valid_geom] + heights = _compute_heights_vectorized(sub, df_flat, has_height, default_height_m) + + features: list[dict] = [] + for idx, geom in zip(sub.index, gs_wgs, strict=True): + poly_lonlat = _geom_to_polygon_coords(geom) + if poly_lonlat is None: + continue + + row = sub.loc[idx] + feat: dict = {"polygon": poly_lonlat, "height": float(heights.loc[idx])} + if value_col and value_col in sub.columns: + v = row[value_col] + if v == v and v is not None: + with contextlib.suppress(TypeError, ValueError): + feat["value"] = float(v) + features.append(feat) + + return features if features else None + + def build_map_features_from_df( # noqa: C901 df: pd.DataFrame, cart_crs: str = "EPSG:3857", @@ -253,6 +354,7 @@ def build_map_features_from_df( # noqa: C901 Converts each rotated_rectangle WKT (in cart_crs) to lat/lon polygon, extrudes by height (meters). Works with flat parquet or index-flattened data. + Uses vectorized geopandas path for large datasets. Args: df: DataFrame with rotated_rectangle (or GLOBI_ROTATED_RECTANGLE) and height. @@ -278,6 +380,24 @@ def build_map_features_from_df( # noqa: C901 if not has_height and not has_num_floors: return None + # use vectorized path for 50+ rows + if len(df_flat) >= 50: + result = _build_map_features_vectorized( + df_flat, + rect_col, + cart_crs, + default_height_m, + has_height, + has_num_floors, + value_col, + ) + if result is not None: + return result + + # fallback: row-by-row for small datasets or when vectorized fails + from pyproj import Transformer + + transformer = Transformer.from_crs(cart_crs, "EPSG:4326", always_xy=True) features: list[dict] = [] for i in range(len(df_flat)): wkt_val = df_flat.iloc[i][rect_col] @@ -285,8 +405,10 @@ def build_map_features_from_df( # noqa: C901 if not isinstance(wkt_str, str): continue - poly_lonlat = transform_rotated_rectangle_to_latlon(wkt_str, cart_crs) - if not poly_lonlat: + poly_lonlat = transform_rotated_rectangle_to_latlon( + wkt_str, cart_crs, _transformer=transformer + ) + if poly_lonlat is None: continue row = df_flat.iloc[i] @@ -315,7 +437,7 @@ def build_map_features_from_df( # noqa: C901 except (TypeError, ValueError, KeyError): pass - feat: dict = {"polygon": poly_lonlat, "height": float(h)} + feat = {"polygon": poly_lonlat, "height": float(h)} if value_col and value_col in df_flat.columns: try: v = row[value_col] @@ -331,11 +453,14 @@ def build_map_features_from_df( # noqa: C901 def transform_rotated_rectangle_to_latlon( wkt: str, cart_crs: str = "EPSG:3857", + *, + _transformer=None, ) -> list[list[float]] | None: """Convert rotated_rectangle WKT (in cartesian CRS) to lat/lon polygon. Transforms each vertex from cart_crs to EPSG:4326. Returns [[lon, lat], ...] for pydeck polygon layer, or None if invalid. + Pass _transformer to reuse (avoids creating one per call in loops). """ from pyproj import Transformer from shapely import from_wkt @@ -357,11 +482,18 @@ def transform_rotated_rectangle_to_latlon( return None if len(coords) < 3: return None - transformer = Transformer.from_crs(cart_crs, "EPSG:4326", always_xy=True) - result: list[list[float]] = [] - for x, y in coords: - lon, lat = transformer.transform(float(x), float(y)) - result.append([float(lon), float(lat)]) + + trans = _transformer + if trans is None: + trans = Transformer.from_crs(cart_crs, "EPSG:4326", always_xy=True) + + # batch transform all vertices + import numpy as np + + xs = np.array([c[0] for c in coords], dtype=float) + ys = np.array([c[1] for c in coords], dtype=float) + lons, lats = trans.transform(xs, ys) + result = [[float(lon), float(lat)] for lon, lat in zip(lons, lats, strict=True)] except Exception: return None return result @@ -373,14 +505,16 @@ def build_map_df_from_output( # noqa: C901 ) -> pd.DataFrame | None: """Build map-ready dataframe directly from output parquet. - Extracts lat/lon from rotated_rectangle, computes EUI/peak metrics, - no merge with inputs. Returns df with building_id, lat, lon, - rotated_rectangle, height, eui, peak_per_sqm, end-use eui cols. + Extracts lat/lon from rotated_rectangle. Output Energy is kWh/m² and Peak + is kW/m², so eui and peak_per_sqm are used directly; total_energy and + total_peak are eui*area and peak_per_sqm*area. Returns df with building_id, + lat, lon, rotated_rectangle, height, eui, peak_per_sqm, total_energy, + total_peak, end-use eui cols. Uses vectorized geopandas for geometry when + 100+ rows. """ import logging - from pyproj import Transformer - from shapely import from_wkt + import geopandas as gpd df_reset = df.reset_index() bid_col = _find_col(df_reset, BUILDING_ID_COL) @@ -410,56 +544,97 @@ def build_map_df_from_output( # noqa: C901 if not energy_cols or not peak_cols: return None - transformer = Transformer.from_crs(cart_crs, "EPSG:4326", always_xy=True) + areas = df.index.get_level_values(area_level) + # output Energy is kWh/m², Peak is kW/m² - use directly as eui and peak_per_sqm + eui_arr = df[energy_cols].sum(axis=1).values + peak_per_sqm_arr = df[peak_cols].max(axis=1).values + + h_col = _find_col(df_reset, "height") + nf_col = _find_col(df_reset, "num_floors") + f2f_col = _find_col(df_reset, "f2f_height") log = logging.getLogger(__name__) - rows: list[dict] = [] - for idx, (_, row) in enumerate(df_reset.iterrows()): - wkt = row.get(rect_col) - if not isinstance(wkt, str): - continue - try: - geom = from_wkt(wkt) - if geom.is_empty: + # vectorized path for 100+ rows: batch parse WKT and transform centroids + use_vectorized = len(df_reset) >= 100 + lon_lat_by_idx: dict[int, tuple[float, float]] = {} + wkt_by_idx: dict[int, str] = {} + + if use_vectorized: + wkt_series = df_reset[rect_col].apply( + lambda v: getattr(v, "wkt", v) if v is not None else None + ) + valid_mask = wkt_series.apply(lambda s: isinstance(s, str)) + if bool(valid_mask.any()): + try: + gs = gpd.GeoSeries.from_wkt( + wkt_series.loc[valid_mask].astype(str), + crs=cart_crs, + on_invalid="ignore", + ) + gs_wgs = gs.to_crs("EPSG:4326") + valid_geom = ~gs_wgs.is_empty + for idx in gs_wgs.loc[valid_geom].index: + geom = gs_wgs.loc[idx] + cx, cy = geom.centroid.x, geom.centroid.y + lon_lat_by_idx[idx] = (float(cy), float(cx)) # lat, lon + wkt_by_idx[idx] = str(wkt_series.loc[idx]) + except Exception as exc: + log.debug("vectorized path failed, falling back: %s", exc) + use_vectorized = False + + if not use_vectorized: + from pyproj import Transformer + from shapely import from_wkt + + transformer = Transformer.from_crs(cart_crs, "EPSG:4326", always_xy=True) + for idx in range(len(df_reset)): + wkt = df_reset.iloc[idx][rect_col] + if not isinstance(wkt, str): + wkt = getattr(wkt, "wkt", None) if wkt is not None else None + if not isinstance(wkt, str): continue - cx, cy = geom.centroid.x, geom.centroid.y - lon, lat = transformer.transform(cx, cy) - bid = str(row[bid_col]) - except Exception as exc: - log.debug("skip row: %s", exc) - continue + try: + geom = from_wkt(wkt) + if geom.is_empty: + continue + cx, cy = geom.centroid.x, geom.centroid.y + lon, lat = transformer.transform(cx, cy) + lon_lat_by_idx[idx] = (float(lat), float(lon)) + wkt_by_idx[idx] = wkt + except Exception as exc: + log.debug("skip row %s: %s", idx, exc) - area_val = df.index.get_level_values(area_level)[idx] + rows: list[dict] = [] + for idx, (lat, lon) in lon_lat_by_idx.items(): + wkt = wkt_by_idx.get(idx, "") try: + area_val = areas[idx] fval = float(area_val) # type: ignore[arg-type] area = fval if fval > 0 else None - except (TypeError, ValueError): + except (TypeError, ValueError, IndexError): area = None if area is None: continue - row_vals = df.iloc[idx] - total_energy = float(row_vals[energy_cols].sum()) - peak = float(row_vals[peak_cols].max()) - eui = total_energy / area - peak_per_sqm = peak / area - - # height from output parquet index (height col, else num_floors * f2f_height) - h_col = _find_col(df_reset, "height") - nf_col = _find_col(df_reset, "num_floors") - f2f_col = _find_col(df_reset, "f2f_height") + eui = float(eui_arr[idx]) + peak_per_sqm = float(peak_per_sqm_arr[idx]) + total_energy = eui * area + total_peak = peak_per_sqm * area + + row = df_reset.iloc[idx] + bid = str(row[bid_col]) height_m = 6.0 - if h_col is not None and h_col in row.index: + if h_col is not None and h_col in df_reset.columns: try: hv = row[h_col] hm = float(hv) - if hm == hm: # not nan + if hm == hm: height_m = hm except (TypeError, ValueError): pass - elif nf_col is not None and nf_col in row.index: + elif nf_col is not None and nf_col in df_reset.columns: f2f = 3.0 - if f2f_col is not None and f2f_col in row.index: + if f2f_col is not None and f2f_col in df_reset.columns: try: fv = row[f2f_col] f2f = float(fv) if fv == fv else 3.0 @@ -468,29 +643,30 @@ def build_map_df_from_output( # noqa: C901 try: nv = row[nf_col] nm = float(nv) - if nm == nm: # not nan + if nm == nm: height_m = nm * f2f except (TypeError, ValueError): pass row_dict: dict = { BUILDING_ID_COL: bid, - LAT_COL: float(lat), - LON_COL: float(lon), + LAT_COL: lat, + LON_COL: lon, ROTATED_RECTANGLE_COL: wkt, "height": height_m, "conditioned_area": area, "eui": eui, "peak_per_sqm": peak_per_sqm, "total_energy": total_energy, - "total_peak": peak, + "total_peak": total_peak, } + row_vals = df.iloc[idx] for meter in { str(c[2]) for c in energy_cols if isinstance(c, tuple) and len(c) > 2 }: cols_m = [c for c in energy_cols if c[2] == meter] if cols_m: - meter_eui = float(row_vals[cols_m].sum()) / area + meter_eui = float(row_vals[cols_m].sum()) # already kWh/m² row_dict[f"eui_{meter.lower().replace(' ', '_')}"] = meter_eui rows.append(row_dict) diff --git a/src/globi/tools/visualization/views/raw_data.py b/src/globi/tools/visualization/views/raw_data.py index 448e2b8..94402da 100644 --- a/src/globi/tools/visualization/views/raw_data.py +++ b/src/globi/tools/visualization/views/raw_data.py @@ -11,6 +11,7 @@ from globi.tools.visualization.plotting import ( Theme, create_building_map_deck, + create_building_map_deck_from_cache, create_column_layer_chart, create_histogram_d3_html, create_monthly_timeseries_d3_html, @@ -21,6 +22,8 @@ from globi.tools.visualization.utils import ( LAT_COL, LON_COL, + build_map_df_from_output, + build_map_features_from_df, has_geo_columns, list_categorical_columns, list_numeric_columns, @@ -284,7 +287,8 @@ def _render_results_map( """Render 3D building map from rotated_rectangle and height. Converts rotated_rectangle WKT (cartesian CRS) to lat/lon, extrudes by - height (meters). Per geometry.py, rectangles are created in cart_crs. + height (meters). Caches map_df and geometry when run/CRS selected; only + adds the chosen metric when rendering. """ if "dryrun" in run_label.lower(): st.info("You have selected a dryrun which does not have a mapping option") @@ -315,12 +319,32 @@ def _render_results_map( ) value_col, cmap, metric_label = metric_option - result = create_building_map_deck( - df, - cart_crs=cart_crs, - value_col=value_col, - cmap=cmap, - ) + cache_key = f"_map_cache_{run_label}_{cart_crs}" + if cache_key not in st.session_state: + with st.spinner("Building map data (geometry + metrics)..."): + map_df = build_map_df_from_output(df, cart_crs=cart_crs) + if map_df is not None: + geometry = build_map_features_from_df( + map_df, cart_crs=cart_crs, value_col=None + ) + if geometry is not None: + st.session_state[cache_key] = (map_df, geometry) + + if cache_key in st.session_state: + map_df, geometry = st.session_state[cache_key] + result = create_building_map_deck_from_cache( + geometry, + map_df, + value_col=value_col, + cmap=cmap, + ) + else: + result = create_building_map_deck( + df, + cart_crs=cart_crs, + value_col=value_col, + cmap=cmap, + ) if result is None: st.info( "Map unavailable. Output must have rotated_rectangle (or GLOBI_ROTATED_RECTANGLE) " diff --git a/src/globi/tools/visualization/views/use_cases.py b/src/globi/tools/visualization/views/use_cases.py index 4d5d118..4e30259 100644 --- a/src/globi/tools/visualization/views/use_cases.py +++ b/src/globi/tools/visualization/views/use_cases.py @@ -383,9 +383,13 @@ def _render_retrofit_map( per_scenario_emissions: dict[str, dict[str, float]], system_costs_per_sqm: dict[str, float], ) -> None: - """Render pydeck map with selectable metric and colormap.""" - from globi.tools.visualization.plotting import create_building_map_deck + """Render pydeck map with selectable metric and colormap. Caches geometry per scenario/CRS.""" + from globi.tools.visualization.plotting import ( + create_building_map_deck, + create_building_map_deck_from_cache, + ) from globi.tools.visualization.results_data import build_retrofit_map_df + from globi.tools.visualization.utils import build_map_features_from_df from globi.tools.visualization.views.raw_data import _render_colormap_legend st.markdown("#### Building map by retrofit metric") @@ -450,12 +454,30 @@ def _render_retrofit_map( st.warning(f"Metric '{value_col}' not available for this scenario.") return - result = create_building_map_deck( - map_df, - cart_crs=cart_crs, - value_col=value_col, - cmap=cmap, - ) + cache_key = f"_retrofit_map_{scenario}_{cart_crs}" + if cache_key not in st.session_state: + with st.spinner("Building map geometry..."): + geometry = build_map_features_from_df( + map_df, cart_crs=cart_crs, value_col=None + ) + if geometry is not None: + st.session_state[cache_key] = geometry + + if cache_key in st.session_state: + geometry = st.session_state[cache_key] + result = create_building_map_deck_from_cache( + geometry, + map_df, + value_col=value_col, + cmap=cmap, + ) + else: + result = create_building_map_deck( + map_df, + cart_crs=cart_crs, + value_col=value_col, + cmap=cmap, + ) if result is None: st.info("Could not build map.") return