Source code for ebm.model.energy_need_filter

from typing import Optional

import pandas as pd

from ebm.model.building_category import BuildingCategory
from ebm.model.building_condition import BuildingCondition
from ebm.model.column_operations import replace_column_alias
from ebm.model.energy_purpose import EnergyPurpose


[docs] def filter_original_condition(df: pd.DataFrame, building_category: BuildingCategory|str, tek:str, purpose: str) -> pd.DataFrame: """ Explode and deduplicates DataFrame df and returns rows matching building_category, tek, and purpose Convenience function that does ```python exploded = explode_dataframe(df) de_duped = de_dupe_dataframe(exploded) filtered = de_duped[(de_duped.building_category==building_category) & (de_duped.building_code==tek) & (de_duped.purpose == purpose)] ``` Parameters ---------- df : pd.DataFrame building_category : BuildingCategory | str tek : str purpose : str Returns ------- pd.DataFrame """ exploded = explode_dataframe(df) de_duped = de_dupe_dataframe(exploded) return de_duped[(de_duped.building_category==building_category) & (de_duped.building_code==tek) & (de_duped.purpose == purpose)]
[docs] def filter_improvement_building_upgrade(df: pd.DataFrame, building_category: BuildingCategory|str, tek:str, purpose: str) -> pd.DataFrame: """ Explode and deduplicates DataFrame df and returns rows matching building_category, tek, and purpose Convenience function that does ```python exploded = explode_dataframe(df) de_duped = de_dupe_dataframe(exploded) filtered = de_duped[(de_duped.building_category==building_category) & (de_duped.building_code==tek) & (de_duped.purpose == purpose)] ``` Parameters ---------- df : pd.DataFrame building_category : BuildingCategory | str tek : str purpose : str Returns ------- pd.DataFrame """ exploded = explode_dataframe(df) de_duped = de_dupe_dataframe(exploded, unique_columns=['building_category', 'building_code', 'purpose', 'building_condition']) filtered=de_duped[(de_duped.building_category==building_category) & (de_duped.building_code==tek) & (de_duped.purpose == purpose)] filler_frame = pd.DataFrame([(building_category, tek, purpose, bc, 0.0) for bc in BuildingCondition.existing_conditions()], columns=['building_category', 'building_code', 'purpose', 'building_condition', 'reduction_share']) return pd.concat([filtered, filler_frame]).drop_duplicates(['building_category', 'building_code', 'purpose', 'building_condition'], keep='first')
[docs] def de_dupe_dataframe(df: pd.DataFrame, unique_columns: Optional[list[str]]=None) -> pd.DataFrame: """ Drops duplicate rows in df based on building_category, TEK and purpose same as df.drop_duplicates(unique_columns) Parameters ---------- df : pd.DataFrame unique_columns : list[str], optional default= ['building_category', 'building_code', 'purpose'] Returns ------- pd.DataFrame """ de_dupe_by = unique_columns if unique_columns else ['building_category', 'building_code', 'purpose'] de_duped = df.drop_duplicates(de_dupe_by) return de_duped
[docs] def explode_dataframe(df: pd.DataFrame, building_code_list:Optional[list[str]]=None) -> pd.DataFrame: """ Explode column aliases for building_category, TEK, purpose in dataframe. default in building_category is replaced with all options from BuildingCategory enum default in TEK is replaced with all elements in optional building_code_list parameter default in purpose is replaced with all options from EnergyPurpose enum Parameters ---------- df : pd.DataFrame building_code_list : list of TEK to replace default, Optional default TEK49 PRE_TEK49 PRE_TEK49_RES_1950 TEK69 TEK87 TEK97 TEK07 TEK10 TEK17 Returns ------- pd.DataFrame """ if not building_code_list: building_code_list = 'TEK49 PRE_TEK49 PRE_TEK49_RES_1950 TEK69 TEK87 TEK97 TEK07 TEK10 TEK17 TEK21 TEK01'.split(' ') # expand building_category df = replace_column_alias(df, column='building_category', values={'default': [b for b in BuildingCategory], 'residential': [b for b in BuildingCategory if b.is_residential()], 'non_residential': [b for b in BuildingCategory if not b.is_residential()]}) # expand tek df = replace_column_alias(df, 'building_code', values=building_code_list, alias='default') # expand purpose df = replace_column_alias(df, 'purpose', values=[p for p in EnergyPurpose], alias='default') # Add priorty column and sort df['bc_priority'] = df.building_category.apply(lambda x: 0 if '+' not in x else len(x.split('+'))) df['t_priority'] = df.building_code.apply(lambda x: 0 if '+' not in x else len(x.split('+'))) df['p_priority'] = df.purpose.apply(lambda x: 0 if '+' not in x else len(x.split('+'))) if not 'priority' in df.columns: df['priority'] = 0 df['priority'] = df.bc_priority + df.t_priority + df.p_priority # Explode df = df.assign(**{'building_category': df['building_category'].str.split('+'), }).explode('building_category') df = df.assign(**{'building_code': df['building_code'].str.split('+')}).explode('building_code') df = df.assign(**{'purpose': df['purpose'].str.split('+'), }).explode('purpose') # dedupe deduped = df.sort_values(by=['building_category', 'building_code', 'purpose', 'priority']) deduped['dupe'] = deduped.duplicated(['building_category', 'building_code', 'purpose'], keep=False) return deduped
[docs] def main(): """ Explode and prints all files listed in command line arguments. Default is reading files from input/ """ import pathlib import sys def _load_file(infile): df = pd.read_csv(infile) building_code_list = 'TEK49 PRE_TEK49 TEK69 TEK87 TEK97 TEK07 TEK10 TEK17'.split(' ') return explode_dataframe(df, building_code_list=building_code_list).sort_values( by=['dupe', 'building_category', 'building_code', 'purpose', 'priority']) pd.set_option('display.max_rows', None) pd.set_option('display.max_columns', None) pd.set_option('display.width', None) if len(sys.argv) < 2: files = list(pathlib.Path('input').glob('*.csv')) else: files = [pathlib.Path(f) for f in sys.argv[1:]] for filename in files: print(f'# {filename}') try: df = _load_file(filename) print(df) except KeyError as key_error: print('KeyError: missing ', str(key_error), sys.stderr)
if __name__ == '__main__': main()