diff --git a/docs/explanation/index.md b/docs/explanation/index.md index f6bac1c..ab14fc4 100644 --- a/docs/explanation/index.md +++ b/docs/explanation/index.md @@ -21,4 +21,5 @@ data_download data_validation customizing_checks dbt_computation +weather_year_modeling ``` diff --git a/docs/explanation/weather_year_modeling.md b/docs/explanation/weather_year_modeling.md new file mode 100644 index 0000000..825c09b --- /dev/null +++ b/docs/explanation/weather_year_modeling.md @@ -0,0 +1,284 @@ +(weather-year-modeling)= +# Weather year modeling + +STRIDE uses detailed weather data to adjust electricity load shapes for temperature variations throughout the year. This page explains how weather data are processed and applied to create realistic hourly load profiles. + +## Overview + +Weather-based load adjustments follow this workflow: + +``` +Weather BAIT Data + ↓ +Degree Day Calculation (HDD/CDD) + ↓ +Shoulder Month Smoothing (Adjusted HDD/CDD) + ↓ +Temperature Multipliers + ↓ +Load Shape Expansion (Representative → Full Year) + ↓ +Annual Energy Scaling + ↓ +Final Hourly Load Shapes +``` + +## Input weather data + +STRIDE uses Building-Adjusted Integrated Temperature (BAIT) data derived from ERA5 reanalysis weather data. BAIT is a composite temperature metric that accounts for: +- Outdoor dry-bulb temperature +- Surface solar radiation +- Wind speed at 2m +- Humidity +- Building thermal characteristics + +The calculation methodology is similar to that described in Staffell, Pfenninger, and Johnson (2023).[^1] + +The weather data includes: +- **Temporal resolution**: Daily (averaged from hourly ERA5 data) +- **Coverage**: Weather years 1995-2024 +- **Geographic representation**: Country-level, based on a single highly or most-populous location per country +- **Variables**: Temperature, Solar Radiation, Wind Speed, Dew Point, Humidity, BAIT + +[^1]: Staffell, I., Pfenninger, S., & Johnson, N. (2023). A global model of hourly space heating and cooling demand at multiple spatial scales. *Nature Energy*, 8, 1328-1344. https://doi.org/10.1038/s41560-023-01341-5 + +[^2]: Castillo, R., van Ruijven, B.J., Pfenninger, S., van Vuuren, D.P., Carrara, S., & Patel, M.K. (2022). Future global electricity demand load curves. *Energy*, 259, 124857. https://doi.org/10.1016/j.energy.2022.124857 + +## Degree day calculation + +### Heating and cooling degree days + +Degree days quantify how much heating or cooling is needed on a given day: + +**Heating Degree Days (HDD)**: +```sql +HDD = GREATEST(0, heating_threshold - BAIT) +``` + +**Cooling Degree Days (CDD)**: +```sql +CDD = GREATEST(0, BAIT - cooling_threshold) +``` + +### ModelParameters + +These thresholds are configurable through `ModelParameters`: + +| Parameter | Description | Default | Unit | +|-----------|-------------|---------|------| +| `heating_threshold` | Temperature below which heating is needed | 18.0 | °C | +| `cooling_threshold` | Temperature above which cooling is needed | 18.0 | °C | + +Example configuration in `project.json5`: + +```json5 +{ + project_id: "my_project", + // ... other config ... + model_parameters: { + heating_threshold: 18.0, + cooling_threshold: 18.0, + } +} +``` + +### Degree day grouping + +Degree days are aggregated by: +- **Geography**: Country or region +- **Weather Year**: Reference year for weather patterns +- **Month**: Calendar month (1-12) +- **Day Type**: Weekday or weekend + +This grouping enables: +- Seasonal variation analysis +- Weekday/weekend pattern differences +- Representative day selection + +## Temperature multiplier calculation + +Temperature multipliers scale representative day heating/cooling load across days within each group (month + day type) based on relative temperature extremes. + +### Basic multiplier formula + +For a day with HDD value in a month with total HDD: + +``` +heating_multiplier = (HDD / total_HDD) × num_days +``` + +Similarly for cooling: + +``` +cooling_multiplier = (CDD / total_CDD) × num_days +``` + +**Key property**: Multipliers sum to `num_days` within each group, preserving total energy. + +### The shoulder month problem + +In spring and fall ("shoulder months"), some days may have zero or very low degree days while others have significant heating or cooling needs. Without adjustment, this creates unrealistic load spikes by concentrating all HVAC load on just the extreme days. + +Example shoulder month (April): +- Days 1-21, 27-30: HDD = 0 (mild weather) +- Days 22-26: HDD = 5-10 (cold snap) + +Without smoothing, all heating load would be assigned to days 22-26, creating artificial spikes. + +### Shoulder month smoothing + +STRIDE applies a minimum threshold to smooth these transitions: + +```sql +-- Calculate maximum degree days in each group +max_hdd = MAX(hdd) in (month, day_type) +min_threshold = max_hdd / shoulder_month_smoothing_factor + +-- Apply threshold +adjusted_hdd = CASE + WHEN hdd < min_threshold THEN min_threshold + ELSE hdd +END +``` + +This ensures all days in shoulder months experience some HVAC load, preventing unrealistic concentration. + +### Smoothing parameters + +| Parameter | Description | Default | Typical Values | +|-----------|-------------|---------|----------------| +| `enable_shoulder_month_smoothing` | Enable/disable smoothing | `true` | `true`/`false` | +| `shoulder_month_smoothing_factor` | Divisor for max degree days | 10.0 | 5.0 (aggressive), 10.0 (moderate), 20.0 (gentle) | + +Example in `project.json5`: + +```json5 +{ + project_id: "my_project", + // ... other config ... + model_parameters: { + enable_shoulder_month_smoothing: true, + shoulder_month_smoothing_factor: 10.0, // Moderate smoothing + } +} +``` + +**Effect of smoothing factor**: +- **Lower values (5)**: More aggressive smoothing, broader load distribution +- **Higher values (20)**: Gentler smoothing, closer to original pattern +- **Disabled**: No smoothing, potential for unrealistic spikes + +### Adjusted multiplier calculation + +Final multipliers use adjusted degree days: + +``` +heating_multiplier = (adjusted_hdd / adjusted_total_hdd) × num_days +``` + +``` +cooling_multiplier = (adjusted_cdd / adjusted_total_cdd) × num_days +``` + +This preserves energy conservation (multipliers still sum to `num_days`) while smoothing shoulder month transitions. + +## Application to load shapes + +### Load shapes for representative days + +Load shapes from the IMAGE Integrated Assessment Model (Castillo et al. 2022)[^2] provide hourly consumption profiles. The dataset includes: +- **One weekday and one weekend day per month** (24 total representative days) +- **24 hourly values per day** (e.g., hour 0 = midnight-1am, hour 23 = 11pm-midnight) +- **Segmentation by**: End use (Heating, Cooling, Other), sector (Residential, Commercial, Industrial, Transportation), geography, model year + +### Expansion to full year + +The `load_shapes_expanded` dbt model expands these 24 representative days into 8760 hours (365 days × 24 hours) by: + +1. **Matching each calendar day** of the selected weather year to its representative profile: + - Days are matched by month (January → January representative day) and day type (weekday/weekend) + - Example: Tuesday, January 15 uses the January weekday profile + +2. **Applying temperature multipliers** to adjust for weather: + ```sql + adjusted_value = load_shape_value * multiplier + + -- Multiplier depends on end use: + multiplier = CASE + WHEN enduse = 'heating' THEN heating_multiplier + WHEN enduse = 'cooling' THEN cooling_multiplier + ELSE 1.0 -- Non-HVAC end uses (lighting, equipment, etc.) + END + ``` + +3. **Repeating the 24-hour pattern** for each day with its specific temperature multiplier + +**Result**: Full-year hourly load shapes that preserve: +- Original hourly patterns from IMAGE (morning/evening peaks, daily cycles) +- Monthly seasonal variation (via representative days) +- Weekday/weekend differences +- Historical weather patterns (via weather-driven adjustments for heating/cooling end uses based on ERA5) + +## Scaling to annual consumption + +The final step scales weather-adjusted hourly shapes to match annual energy projections. + +### Annual energy projection + +For each sector/subsector/model year, STRIDE calculates annual energy demand from: +- Energy intensity regressions (energy per unit GDP, or population x HDI) +- Energy use driver projections (GDP, HDI, population) + +This produces annual totals in MWh for each sector. + +### Scaling factor calculation + +```python +# Sum all hourly values for the year +load_shape_annual_total = SUM(expanded_hourly_values) + +# Calculate scaling factor +scaling_factor = projected_annual_energy / load_shape_annual_total +``` + +### Final hourly values + +```python +final_hourly_load = expanded_hourly_value * scaling_factor +``` + +This ensures: +- Hourly values sum to the projected annual total +- Weather-based daily/seasonal patterns are preserved +- Realistic load profiles throughout the year + +## dbt models + +The weather year modeling pipeline is implemented in these dbt models: + +| Model | Purpose | +|-------|---------| +| `weather_bait_daily` | Pivots weather data from long to wide format and extracts date components | +| `weather_degree_days` | Calculates daily HDD and CDD from BAIT | +| `weather_degree_days_grouped` | Aggregates degree days by geography, weather year, month, and day type | +| `temperature_multipliers` | Computes daily multipliers with shoulder month smoothing | +| `load_shapes_expanded` | Applies temperature multipliers to expand representative days to full year | +| `energy_projection_*` | Combines expanded load shapes with energy intensity to produce projections | + +## Logging and diagnostics + +When computing energy projections, STRIDE logs temperature multiplier statistics: + +``` +INFO: Computing energy projection with model parameters: + heating_threshold=18.0, cooling_threshold=18.0, + enable_shoulder_month_smoothing=True, shoulder_month_smoothing_factor=10.0 +INFO: Running scenario=baseline with weather_year=2018, + shoulder_month_smoothing=enabled (factor=10.0) +INFO: Temperature multiplier ranges for scenario=baseline: + heating=[0.234, 3.456], cooling=[0.123, 4.567], other=[1.000, 1.000] +``` + +## Related Topics + +- {ref}`dbt-computation` - Overall dbt transformation pipeline diff --git a/docs/tutorials/create_project.md b/docs/tutorials/create_project.md index 386bd1b..3b6bad2 100644 --- a/docs/tutorials/create_project.md +++ b/docs/tutorials/create_project.md @@ -50,7 +50,8 @@ List available weather years: ``` This creates a JSON5 configuration file with default settings. You can edit this file to - customize the project ID, description, model years, and scenarios. + customize the project ID, description, model years, scenarios, and model parameters + (such as heating/cooling thresholds and shoulder month smoothing). 2. Create the project from the configuration file. @@ -266,4 +267,5 @@ And then opening the displayed address in a web browser: - {ref}`cli-reference` - {ref}`data-api` - {ref}`dbt-projet` +- {ref}`weather-year-modeling` - {ref}`manage-calculated-tables` \ No newline at end of file diff --git a/src/stride/dbt/models/temperature_multipliers.sql b/src/stride/dbt/models/temperature_multipliers.sql index c21db0d..f96b814 100644 --- a/src/stride/dbt/models/temperature_multipliers.sql +++ b/src/stride/dbt/models/temperature_multipliers.sql @@ -6,36 +6,118 @@ -- Calculate temperature adjustment multipliers for each day -- These multipliers adjust load shapes based on daily temperature variations + +WITH max_degree_days AS ( + -- Calculate maximum degree days for each group + -- Used to smooth shoulder month transitions by setting a minimum threshold + SELECT + geography, + weather_year, + month, + day_type, + MAX(hdd) AS max_hdd, + MAX(cdd) AS max_cdd + FROM {{ ref('weather_degree_days') }} + GROUP BY geography, weather_year, month, day_type +), + +adjusted_degree_days AS ( + -- Adjust low degree days in shoulder months to smooth transitions + -- In months with some heating/cooling, apply a minimum threshold (max / factor) + -- This affects both zero values and small non-zero values below the threshold + -- Only applies if enable_shoulder_month_smoothing is True + SELECT + dd.geography, + dd.timestamp, + dd.weather_year, + dd.month, + dd.day, + dd.day_type, + dd.bait, + dd.hdd AS original_hdd, + dd.cdd AS original_cdd, + gs.num_days, + gs.total_hdd, + gs.total_cdd, + mdd.max_hdd, + mdd.max_cdd, + -- Adjusted HDD: apply minimum threshold (max_hdd / factor) in shoulder months (if enabled) + -- Default factor is defined in stride.models.DEFAULT_SHOULDER_MONTH_SMOOTHING_FACTOR + CASE + WHEN {{ var('enable_shoulder_month_smoothing', true) }} + AND gs.total_hdd > 0 AND mdd.max_hdd IS NOT NULL + AND dd.hdd < (mdd.max_hdd / {{ var('shoulder_month_smoothing_factor', 10.0) }}) + THEN mdd.max_hdd / {{ var('shoulder_month_smoothing_factor', 10.0) }} + ELSE dd.hdd + END AS adjusted_hdd, + -- Adjusted CDD: apply minimum threshold (max_cdd / factor) in shoulder months (if enabled) + -- Default factor is defined in stride.models.DEFAULT_SHOULDER_MONTH_SMOOTHING_FACTOR + CASE + WHEN {{ var('enable_shoulder_month_smoothing', true) }} + AND gs.total_cdd > 0 AND mdd.max_cdd IS NOT NULL + AND dd.cdd < (mdd.max_cdd / {{ var('shoulder_month_smoothing_factor', 10.0) }}) + THEN mdd.max_cdd / {{ var('shoulder_month_smoothing_factor', 10.0) }} + ELSE dd.cdd + END AS adjusted_cdd + FROM {{ ref('weather_degree_days') }} dd + JOIN {{ ref('weather_degree_days_grouped') }} gs + ON dd.weather_year = gs.weather_year + AND dd.month = gs.month + AND dd.day_type = gs.day_type + AND dd.geography = gs.geography + LEFT JOIN max_degree_days mdd + ON dd.weather_year = mdd.weather_year + AND dd.month = mdd.month + AND dd.day_type = mdd.day_type + AND dd.geography = mdd.geography +), + +adjusted_totals AS ( + -- Recalculate totals with adjusted values + SELECT + geography, + weather_year, + month, + day_type, + SUM(adjusted_hdd) AS adjusted_total_hdd, + SUM(adjusted_cdd) AS adjusted_total_cdd + FROM adjusted_degree_days + GROUP BY geography, weather_year, month, day_type +) + +-- Calculate multipliers using adjusted degree days SELECT - dd.geography, - dd.timestamp, - dd.weather_year, - dd.month, - dd.day, - dd.day_type, - dd.bait, - dd.hdd, - dd.cdd, - gs.num_days, - gs.total_hdd, - gs.total_cdd, - -- Heating multiplier: normalize HDD within the group (weather_year, month, day_type) + ad.geography, + ad.timestamp, + ad.weather_year, + ad.month, + ad.day, + ad.day_type, + ad.bait, + ad.original_hdd AS hdd, + ad.original_cdd AS cdd, + ad.adjusted_hdd, + ad.adjusted_cdd, + ad.num_days, + ad.total_hdd, + ad.total_cdd, + -- Heating multiplier: normalize adjusted HDD within the group -- If total_hdd is zero, no heating occurs in this period, so multiplier is 1.0 CASE - WHEN gs.total_hdd = 0 OR gs.total_hdd IS NULL THEN 1.0 - ELSE (dd.hdd / gs.total_hdd) * gs.num_days + WHEN ad.total_hdd = 0 OR ad.total_hdd IS NULL THEN 1.0 + ELSE (ad.adjusted_hdd / at.adjusted_total_hdd) * ad.num_days END AS heating_multiplier, - -- Cooling multiplier: normalize CDD within the group + -- Cooling multiplier: normalize adjusted CDD within the group -- If total_cdd is zero, no cooling occurs in this period, so multiplier is 1.0 CASE - WHEN gs.total_cdd = 0 OR gs.total_cdd IS NULL THEN 1.0 - ELSE (dd.cdd / gs.total_cdd) * gs.num_days + WHEN ad.total_cdd = 0 OR ad.total_cdd IS NULL THEN 1.0 + ELSE (ad.adjusted_cdd / at.adjusted_total_cdd) * ad.num_days END AS cooling_multiplier, -- Other multiplier is always 1.0 (no temperature adjustment for non-HVAC loads) 1.0 AS other_multiplier -FROM {{ ref('weather_degree_days') }} dd -JOIN {{ ref('weather_degree_days_grouped') }} gs - ON dd.weather_year = gs.weather_year - AND dd.month = gs.month - AND dd.day_type = gs.day_type - AND dd.geography = gs.geography +FROM adjusted_degree_days ad +JOIN adjusted_totals at + ON ad.weather_year = at.weather_year + AND ad.month = at.month + AND ad.day_type = at.day_type + AND ad.geography = at.geography diff --git a/src/stride/models.py b/src/stride/models.py index 7c1ca61..69a8110 100644 --- a/src/stride/models.py +++ b/src/stride/models.py @@ -103,19 +103,41 @@ class CalculatedTableOverride(DSGBaseModel): # type: ignore ) +# Default model parameter values +# These constants ensure consistency across Python code and should match dbt model defaults +DEFAULT_HEATING_THRESHOLD = 18.0 +DEFAULT_COOLING_THRESHOLD = 18.0 +DEFAULT_ENABLE_SHOULDER_MONTH_SMOOTHING = True +DEFAULT_SHOULDER_MONTH_SMOOTHING_FACTOR = 10.0 + + class ModelParameters(DSGBaseModel): # type: ignore """Advanced model parameters for energy projections.""" heating_threshold: float = Field( - default=18.0, + default=DEFAULT_HEATING_THRESHOLD, description="Temperature threshold (°C) below which heating degree days are calculated. " "Used for temperature adjustment of heating end uses in load shapes.", ) cooling_threshold: float = Field( - default=18.0, + default=DEFAULT_COOLING_THRESHOLD, description="Temperature threshold (°C) above which cooling degree days are calculated. " "Used for temperature adjustment of cooling end uses in load shapes.", ) + enable_shoulder_month_smoothing: bool = Field( + default=DEFAULT_ENABLE_SHOULDER_MONTH_SMOOTHING, + description="Enable smoothing of temperature multipliers in shoulder months. " + "When True, days with zero degree days in months with mixed heating/cooling are assigned " + "small values to prevent unrealistic load spikes. When False, uses traditional calculation.", + ) + shoulder_month_smoothing_factor: float = Field( + default=DEFAULT_SHOULDER_MONTH_SMOOTHING_FACTOR, + description="Divisor applied to maximum degree days to set minimum threshold for smoothing. " + "In months with mixed heating/cooling activity, degree days below (max / factor) are " + "raised to this minimum threshold to prevent unrealistic load concentration. " + "Smaller values create smoother transitions. Typical values: 5.0 (aggressive), 10.0 (moderate), 20.0 (gentle). " + "Only used when enable_shoulder_month_smoothing is True.", + ) class ProjectConfig(DSGBaseModel): # type: ignore diff --git a/src/stride/project.py b/src/stride/project.py index e927b29..1aad187 100644 --- a/src/stride/project.py +++ b/src/stride/project.py @@ -534,6 +534,15 @@ def compute_energy_projection(self, use_table_overrides: bool = True) -> None: If True, use compute results based on the table overrides specified in the project config. """ + logger.info( + "Computing energy projection with model parameters: " + "heating_threshold={}, cooling_threshold={}, " + "enable_shoulder_month_smoothing={}, shoulder_month_smoothing_factor={}", + self._config.model_parameters.heating_threshold, + self._config.model_parameters.cooling_threshold, + self._config.model_parameters.enable_shoulder_month_smoothing, + self._config.model_parameters.shoulder_month_smoothing_factor, + ) orig = os.getcwd() model_years = ",".join((str(x) for x in self._config.list_model_years())) table_overrides = self.get_table_overrides() if use_table_overrides else {} @@ -549,6 +558,8 @@ def compute_energy_projection(self, use_table_overrides: bool = True) -> None: f'"weather_year": {self._config.weather_year}, ' f'"heating_threshold": {self._config.model_parameters.heating_threshold}, ' f'"cooling_threshold": {self._config.model_parameters.cooling_threshold}, ' + f'"enable_shoulder_month_smoothing": {str(self._config.model_parameters.enable_shoulder_month_smoothing).lower()}, ' + f'"shoulder_month_smoothing_factor": {self._config.model_parameters.shoulder_month_smoothing_factor}, ' f'"use_ev_projection": {use_ev_str}' f"{override_str}}}" ) @@ -559,7 +570,18 @@ def compute_energy_projection(self, use_table_overrides: bool = True) -> None: self._con.close() try: os.chdir(self._path / DBT_DIR) - logger.info("Run scenario={} dbt models with '{}'", scenario.name, " ".join(cmd)) + smoothing_status = ( + f"enabled (factor={self._config.model_parameters.shoulder_month_smoothing_factor})" + if self._config.model_parameters.enable_shoulder_month_smoothing + else "disabled" + ) + logger.info( + "Running scenario={} with weather_year={}, shoulder_month_smoothing={}", + scenario.name, + self._config.weather_year, + smoothing_status, + ) + logger.debug("dbt command: '{}'", " ".join(cmd)) start = time.time() subprocess.run(cmd, check=True) duration = time.time() - start @@ -585,6 +607,33 @@ def compute_energy_projection(self, use_table_overrides: bool = True) -> None: row_count, ) + # Log temperature multiplier statistics + multiplier_stats = self._con.sql( + f""" + SELECT + MIN(heating_multiplier) AS min_heating, + MAX(heating_multiplier) AS max_heating, + MIN(cooling_multiplier) AS min_cooling, + MAX(cooling_multiplier) AS max_cooling, + MIN(other_multiplier) AS min_other, + MAX(other_multiplier) AS max_other + FROM {scenario.name}.temperature_multipliers + """ + ).fetchone() + + if multiplier_stats: + logger.info( + "Temperature multiplier ranges for scenario={}: " + "heating=[{:.3f}, {:.3f}], cooling=[{:.3f}, {:.3f}], other=[{:.3f}, {:.3f}]", + scenario.name, + multiplier_stats[0], + multiplier_stats[1], + multiplier_stats[2], + multiplier_stats[3], + multiplier_stats[4], + multiplier_stats[5], + ) + columns = "timestamp, model_year, scenario, sector, geography, metric, value" if i == 0: query = f""" diff --git a/tests/test_energy_projection.py b/tests/test_energy_projection.py index 7756254..113e5b9 100644 --- a/tests/test_energy_projection.py +++ b/tests/test_energy_projection.py @@ -878,9 +878,63 @@ def compute_temperature_multipliers( """ ) - # Compute multipliers (matching temperature_multipliers.sql) + # Compute multipliers (matching temperature_multipliers.sql with shoulder month smoothing) return con.sql( """ + WITH max_degree_days AS ( + -- Calculate max degree days in each group for smoothing + SELECT + dd.geography + ,dd.month + ,dd.day_type + ,MAX(dd.hdd) AS max_hdd + ,MAX(dd.cdd) AS max_cdd + FROM weather_degree_days dd + JOIN weather_grouped gs + ON dd.geography = gs.geography + AND dd.weather_year = gs.weather_year + AND dd.month = gs.month + AND dd.day_type = gs.day_type + WHERE gs.total_hdd > 0 OR gs.total_cdd > 0 + GROUP BY dd.geography, dd.month, dd.day_type + ), + adjusted_degree_days AS ( + -- Apply shoulder month smoothing (default factor 10.0, enabled by default) + SELECT + dd.* + ,CASE + WHEN gs.total_hdd > 0 AND dd.hdd < (md.max_hdd / 10.0) + THEN md.max_hdd / 10.0 + ELSE dd.hdd + END AS adjusted_hdd + ,CASE + WHEN gs.total_cdd > 0 AND dd.cdd < (md.max_cdd / 10.0) + THEN md.max_cdd / 10.0 + ELSE dd.cdd + END AS adjusted_cdd + FROM weather_degree_days dd + JOIN weather_grouped gs + ON dd.geography = gs.geography + AND dd.weather_year = gs.weather_year + AND dd.month = gs.month + AND dd.day_type = gs.day_type + LEFT JOIN max_degree_days md + ON dd.geography = md.geography + AND dd.month = md.month + AND dd.day_type = md.day_type + ), + adjusted_totals AS ( + -- Recalculate totals with adjusted values + SELECT + geography + ,weather_year + ,month + ,day_type + ,SUM(adjusted_hdd) AS adjusted_total_hdd + ,SUM(adjusted_cdd) AS adjusted_total_cdd + FROM adjusted_degree_days + GROUP BY geography, weather_year, month, day_type + ) SELECT dd.geography ,dd.timestamp @@ -894,21 +948,30 @@ def compute_temperature_multipliers( ,gs.num_days ,gs.total_hdd ,gs.total_cdd + ,dd.adjusted_hdd + ,dd.adjusted_cdd + ,at.adjusted_total_hdd + ,at.adjusted_total_cdd ,CASE - WHEN gs.total_hdd = 0 OR gs.total_hdd IS NULL THEN 1.0 - ELSE (dd.hdd / gs.total_hdd) * gs.num_days + WHEN at.adjusted_total_hdd = 0 OR at.adjusted_total_hdd IS NULL THEN 1.0 + ELSE (dd.adjusted_hdd / at.adjusted_total_hdd) * gs.num_days END AS heating_multiplier ,CASE - WHEN gs.total_cdd = 0 OR gs.total_cdd IS NULL THEN 1.0 - ELSE (dd.cdd / gs.total_cdd) * gs.num_days + WHEN at.adjusted_total_cdd = 0 OR at.adjusted_total_cdd IS NULL THEN 1.0 + ELSE (dd.adjusted_cdd / at.adjusted_total_cdd) * gs.num_days END AS cooling_multiplier ,1.0 AS other_multiplier - FROM weather_degree_days dd + FROM adjusted_degree_days dd JOIN weather_grouped gs ON dd.geography = gs.geography AND dd.weather_year = gs.weather_year AND dd.month = gs.month AND dd.day_type = gs.day_type + JOIN adjusted_totals at + ON dd.geography = at.geography + AND dd.weather_year = at.weather_year + AND dd.month = at.month + AND dd.day_type = at.day_type """ ) diff --git a/tests/test_shoulder_month_smoothing.py b/tests/test_shoulder_month_smoothing.py new file mode 100644 index 0000000..4b3a7f5 --- /dev/null +++ b/tests/test_shoulder_month_smoothing.py @@ -0,0 +1,277 @@ +"""Tests for shoulder month smoothing in temperature multipliers.""" + +import duckdb +import pandas as pd +from pathlib import Path +from stride import Project +from stride.models import ( + DEFAULT_ENABLE_SHOULDER_MONTH_SMOOTHING, + DEFAULT_SHOULDER_MONTH_SMOOTHING_FACTOR, +) + + +def _find_shoulder_months(multipliers: pd.DataFrame) -> tuple[list[int], list[int]]: + """Find shoulder months with mixed zero and non-zero degree days. + + Returns + ------- + tuple[list[int], list[int]] + (shoulder_heating_months, shoulder_cooling_months) + """ + shoulder_heating_months = [] + shoulder_cooling_months = [] + + for month in multipliers["month"].unique(): + month_data = multipliers[multipliers["month"] == month] + + # Check for shoulder heating months + if (month_data["total_hdd"] > 0).any(): + has_zero_hdd = (month_data["hdd"] == 0).any() + has_nonzero_hdd = (month_data["hdd"] > 0).any() + if has_zero_hdd and has_nonzero_hdd: + shoulder_heating_months.append(month) + + # Check for shoulder cooling months + if (month_data["total_cdd"] > 0).any(): + has_zero_cdd = (month_data["cdd"] == 0).any() + has_nonzero_cdd = (month_data["cdd"] > 0).any() + if has_zero_cdd and has_nonzero_cdd: + shoulder_cooling_months.append(month) + + return shoulder_heating_months, shoulder_cooling_months + + +def _verify_heating_smoothing(multipliers: pd.DataFrame, month: int) -> None: + """Verify that heating smoothing works correctly for a shoulder month.""" + month_data = multipliers[(multipliers["month"] == month) & (multipliers["total_hdd"] > 0)] + + # Calculate the minimum threshold using the default factor constant + max_hdd = month_data["hdd"].max() + min_threshold = max_hdd / DEFAULT_SHOULDER_MONTH_SMOOTHING_FACTOR + + # Find days with low HDD values (below threshold) + low_hdd_days = month_data[month_data["hdd"] < min_threshold] + if low_hdd_days.empty: + return + + # All low HDD days should have positive heating_multipliers due to smoothing + assert (low_hdd_days["heating_multiplier"] > 0).all(), ( + f"Month {month}: Days with low HDD should have positive heating_multiplier " + f"due to shoulder month smoothing" + ) + + # The multiplier for low days should be relatively small (less than the average) + avg_multiplier = month_data["heating_multiplier"].mean() + assert ( + low_hdd_days["heating_multiplier"] < avg_multiplier + ).all(), f"Month {month}: Smoothed heating_multipliers should be below average" + + +def _verify_cooling_smoothing(multipliers: pd.DataFrame, month: int) -> None: + """Verify that cooling smoothing works correctly for a shoulder month.""" + month_data = multipliers[(multipliers["month"] == month) & (multipliers["total_cdd"] > 0)] + + # Calculate the minimum threshold using the default factor constant + max_cdd = month_data["cdd"].max() + min_threshold = max_cdd / DEFAULT_SHOULDER_MONTH_SMOOTHING_FACTOR + + # Find days with low CDD values (below threshold) + low_cdd_days = month_data[month_data["cdd"] < min_threshold] + if low_cdd_days.empty: + return + + # All low CDD days should have positive cooling_multipliers due to smoothing + assert (low_cdd_days["cooling_multiplier"] > 0).all(), ( + f"Month {month}: Days with low CDD should have positive cooling_multiplier " + f"due to shoulder month smoothing" + ) + + # The multiplier for low days should be relatively small (less than the average) + avg_multiplier = month_data["cooling_multiplier"].mean() + assert ( + low_cdd_days["cooling_multiplier"] < avg_multiplier + ).all(), f"Month {month}: Smoothed cooling_multipliers should be below average" + + +def test_shoulder_month_smoothing_prevents_spikes(default_project: Project) -> None: + """Verify that shoulder month smoothing prevents unrealistic load spikes. + + In shoulder months (e.g., spring/fall), some days may have zero HDD/CDD while others + have positive values. Without smoothing, this concentrates all load on the non-zero + days, creating unrealistic spikes. With smoothing, zero-degree-day days are assigned + a small value (min_degree_days / smoothing_factor) to distribute load more evenly. + """ + project = default_project + con = project.con + + # Query the temperature_multipliers view from the baseline scenario + multipliers = con.sql( + """ + SELECT + geography, + month, + day_type, + hdd, + cdd, + total_hdd, + total_cdd, + heating_multiplier, + cooling_multiplier + FROM baseline.temperature_multipliers + ORDER BY month, timestamp + """ + ).to_df() + + # Find shoulder months - months where there's a mix of zero and non-zero degree days + shoulder_heating_months, shoulder_cooling_months = _find_shoulder_months(multipliers) + + # Verify smoothing works for shoulder heating months + for month in shoulder_heating_months: + _verify_heating_smoothing(multipliers, month) + + # Verify smoothing works for shoulder cooling months + for month in shoulder_cooling_months: + _verify_cooling_smoothing(multipliers, month) + + +def test_shoulder_month_smoothing_configuration(tmp_path: Path) -> None: + """Test that shoulder month smoothing parameters can be configured in ProjectConfig.""" + from stride.models import ModelParameters, ProjectConfig + + # Test default values + params = ModelParameters() + assert params.enable_shoulder_month_smoothing is DEFAULT_ENABLE_SHOULDER_MONTH_SMOOTHING + assert params.shoulder_month_smoothing_factor == DEFAULT_SHOULDER_MONTH_SMOOTHING_FACTOR + + # Test custom values + params_custom = ModelParameters( + enable_shoulder_month_smoothing=False, shoulder_month_smoothing_factor=10.0 + ) + assert params_custom.enable_shoulder_month_smoothing is False + assert params_custom.shoulder_month_smoothing_factor == 10.0 + + # Test in ProjectConfig + config = ProjectConfig( + project_id="test", + creator="tester", + description="Test project", + country="USA", + start_year=2025, + end_year=2030, + weather_year=2018, + model_parameters=ModelParameters( + enable_shoulder_month_smoothing=True, shoulder_month_smoothing_factor=2.0 + ), + ) + assert config.model_parameters.enable_shoulder_month_smoothing is True + assert config.model_parameters.shoulder_month_smoothing_factor == 2.0 + + +def test_non_shoulder_months_unchanged() -> None: + """Verify that non-shoulder months (all heating or all cooling) are unaffected by smoothing. + + In pure winter months (all days have HDD>0) or pure summer months (all days have CDD>0), + the smoothing logic should have no effect since there are no low-degree-day days to smooth. + """ + # Create synthetic test data representing a pure winter month + # All days have positive HDD, no low values + con = duckdb.connect(":memory:") + + # Create weather data for a cold month (January) - all days have heating + dates = pd.date_range("2018-01-01", "2018-01-31", freq="D") + winter_data = pd.DataFrame( + { + "geography": "test_country", + "timestamp": dates, + "weather_year": 2018, + "month": 1, + "day": dates.day, + "day_type": ["weekday" if d < 5 else "weekend" for d in dates.dayofweek], + "bait": [5.0 + i % 5 for i in range(len(dates))], # All below 18°C + "hdd": [13.0 - (i % 5) for i in range(len(dates))], # All 8-13, no low values + "cdd": [0.0] * len(dates), # No cooling + } + ) + + con.register("weather_data", winter_data) + + # Calculate multipliers with the same logic as temperature_multipliers.sql + result = con.sql( + """ + WITH grouped AS ( + SELECT + geography, + month, + day_type, + COUNT(*) AS num_days, + SUM(hdd) AS total_hdd, + SUM(cdd) AS total_cdd, + MAX(hdd) AS max_hdd + FROM weather_data + GROUP BY geography, month, day_type + ) + SELECT + wd.day, + wd.hdd, + g.max_hdd, + g.total_hdd, + -- Without smoothing + (wd.hdd / g.total_hdd) * g.num_days AS multiplier_no_smoothing, + -- With smoothing (should be identical since all HDD values are above max/10) + (CASE WHEN g.total_hdd > 0 AND wd.hdd < (g.max_hdd / 10.0) + THEN g.max_hdd / 10.0 + ELSE wd.hdd END / g.total_hdd) * g.num_days AS multiplier_with_smoothing + FROM weather_data wd + JOIN grouped g ON wd.geography = g.geography + AND wd.month = g.month + AND wd.day_type = g.day_type + """ + ).to_df() + + # In pure winter months with all high HDD values, smoothing should have no effect + # (no low HDD days to smooth) + assert (result["multiplier_no_smoothing"] == result["multiplier_with_smoothing"]).all() + + # All HDDs are positive and above the threshold + assert (result["hdd"] > 0).all() + max_hdd = result["max_hdd"].iloc[0] + assert (result["hdd"] >= max_hdd / DEFAULT_SHOULDER_MONTH_SMOOTHING_FACTOR).all() + + con.close() + + +def test_multipliers_sum_to_num_days(default_project: Project) -> None: + """Verify that temperature multipliers properly sum to num_days within each group. + + This is critical for energy conservation - the sum of multipliers across all days + in a group (month + day_type) should equal the number of days in that group. + This must hold true even with shoulder month smoothing applied. + """ + project = default_project + con = project.con + + # Check that multipliers sum correctly for each group + sums = con.sql( + """ + SELECT + month, + day_type, + MAX(num_days) AS num_days, + SUM(heating_multiplier) AS sum_heating_multipliers, + SUM(cooling_multiplier) AS sum_cooling_multipliers, + -- Allow small numerical tolerance (0.01%) + ABS(SUM(heating_multiplier) - MAX(num_days)) < MAX(num_days) * 0.0001 AS heating_ok, + ABS(SUM(cooling_multiplier) - MAX(num_days)) < MAX(num_days) * 0.0001 AS cooling_ok + FROM baseline.temperature_multipliers + GROUP BY month, day_type + ORDER BY month, day_type + """ + ).to_df() + + # All groups should have multipliers summing to num_days + assert sums[ + "heating_ok" + ].all(), f"Heating multipliers don't sum to num_days:\n{sums[~sums['heating_ok']]}" + assert sums[ + "cooling_ok" + ].all(), f"Cooling multipliers don't sum to num_days:\n{sums[~sums['cooling_ok']]}"