Skip to content

Com.py KeyError when Normunit = Area-ft2-BA #122

@simularis

Description

@simularis

Running Com.py with our development branch for SWHC046, we observed an error.

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
File c:\DEER2026\DEER-Prototypes-EnergyPlus\scripts\data transformation\Com.py:5
      504 #%%
      505 #do area separately after normunit merge
      506 area_lookup = df_normunits[df_normunits['Normunit']=='Area-ft2-BA'][['BldgType','total_area_m2']]
----> 508 sim_annual_v3 = pd.merge(sim_annual_v2, area_lookup, on='BldgType')
      509 sim_annual_v3['measarea'] = sim_annual_v3['total_area_m2']

File c:\Users\nfette\Anaconda3\envs\deer-ep-py39\lib\site-packages\pandas\core\reshape\merge.py:110, in merge(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)
     93 @Substitution("\nleft : DataFrame or named Series")
     94 @Appender(_merge_doc, indents=0)
     95 def merge(
   (...)
    108     validate: str | None = None,
    109 ) -> DataFrame:
--> 110     op = _MergeOperation(
    111         left,
    112         right,
    113         how=how,
    114         on=on,
    115         left_on=left_on,
    116         right_on=right_on,
    117         left_index=left_index,
    118         right_index=right_index,
    119         sort=sort,
    120         suffixes=suffixes,
    121         indicator=indicator,
    122         validate=validate,
    123     )
    124     return op.get_result(copy=copy)

File c:\Users\nfette\Anaconda3\envs\deer-ep-py39\lib\site-packages\pandas\core\reshape\merge.py:703, in _MergeOperation.__init__(self, left, right, how, on, left_on, right_on, axis, left_index, right_index, sort, suffixes, indicator, validate)
    696 self._cross = cross_col
    698 # note this function has side effects
    699 (
    700     self.left_join_keys,
    701     self.right_join_keys,
    702     self.join_names,
--> 703 ) = self._get_merge_keys()
    705 # validate the merge keys dtypes. We may need to coerce
    706 # to avoid incompatible dtypes
    707 self._maybe_coerce_merge_keys()

File c:\Users\nfette\Anaconda3\envs\deer-ep-py39\lib\site-packages\pandas\core\reshape\merge.py:1179, in _MergeOperation._get_merge_keys(self)
   1175 if lk is not None:
   1176     # Then we're either Hashable or a wrong-length arraylike,
   1177     #  the latter of which will raise
   1178     lk = cast(Hashable, lk)
-> 1179     left_keys.append(left._get_label_or_level_values(lk))
   1180     join_names.append(lk)
   1181 else:
   1182     # work-around for merge_asof(left_index=True)

File c:\Users\nfette\Anaconda3\envs\deer-ep-py39\lib\site-packages\pandas\core\generic.py:1850, in NDFrame._get_label_or_level_values(self, key, axis)
   1844     values = (
   1845         self.axes[axis]
   1846         .get_level_values(key)  # type: ignore[assignment]
   1847         ._values
   1848     )
   1849 else:
-> 1850     raise KeyError(key)
   1852 # Check for duplicates
   1853 if values.ndim > 1:

KeyError: 'BldgType'

Debugging

Looking at the columns of the two dataframes to be merged:

>>> sim_annual_v2.columns
Index(['TechID', 'BldgLoc', 'BldgType_x', 'BldgHVAC', 'BldgVint', 'kwh_tot',
       'kwh_ltg', 'kwh_task', 'kwh_equip', 'kwh_htg', 'kwh_clg', 'kwh_twr',
       'kwh_aux', 'kwh_vent', 'kwh_venthtg', 'kwh_ventclg', 'kwh_refg',
       'kwh_hpsup', 'kwh_shw', 'kwh_ext', 'thm_tot', 'thm_equip', 'thm_htg',
       'thm_shw', 'deskw_ltg', 'deskw_equ', 'SizingID', 'tstat', 'Normunit',
       'BldgType_y', 'Value', 'numunits'],
      dtype='object')
>>> area_lookup.columns
Index(['BldgType', 'total_area_m2'], dtype='object')

The dataframe should have one column BldgType but has instead two columns BldgType_x and BldgType_y, suggesting a previous application of pd.merge() function was done without specifying BldgType as a "join on" column.

Looking at the code around here:

#%%
#add area based on building type
#also add normunit (also the area) for the example measure
#code may need to be tweaked if normalizing unit is different for a specific measure
unit_lookup = df_normunits[['BldgType', 'Normunit', 'Value']]
if normunit == 'Each':
unit_table = unit_lookup[unit_lookup['Normunit']=='Each'][['Normunit','Value']]
sim_annual_v2 = pd.merge(sim_annual_v1, unit_table, on='Normunit')
else:
sim_annual_v2 = pd.merge(sim_annual_v1, unit_lookup, on='Normunit')
sim_annual_v2['numunits'] = sim_annual_v2['Value']
#%%
#do area separately after normunit merge
area_lookup = df_normunits[df_normunits['Normunit']=='Area-ft2-BA'][['BldgType','total_area_m2']]
sim_annual_v3 = pd.merge(sim_annual_v2, area_lookup, on='BldgType')
sim_annual_v3['measarea'] = sim_annual_v3['total_area_m2']

In my case the code follows the IF clause to line 501 because Normunit for setup of measure SWHC046 is Area-ft2-BA.

Proposed solution

Something like this would work for merging the normunit lookup table for Area-ft2-BA, but this might need to be updated if someone adds new normalizing unit data to the Normunits.xlsx lookup tables.

@@ -496,11 +496,14 @@ sim_annual_v1['Normunit'] = normunit
 unit_lookup = df_normunits[['BldgType', 'Normunit', 'Value']]
 if normunit == 'Each':
     unit_table = unit_lookup[unit_lookup['Normunit']=='Each'][['Normunit','Value']]
     sim_annual_v2 = pd.merge(sim_annual_v1, unit_table, on='Normunit')
else:
-    sim_annual_v2 = pd.merge(sim_annual_v1, unit_lookup, on='Normunit')
+    sim_annual_v2 = pd.merge(sim_annual_v1, unit_lookup, on=['Normunit','BldgType'])

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions