Source code for ebm.model.dataframemodels

from typing import cast, Optional

import pandas as pd
import pandera as pa
from pandera.typing import DataFrame, Series
from pandera.typing.common import DataFrameBase

from ebm.model.column_operations import explode_unique_columns, explode_column_alias
from ebm.model.energy_purpose import EnergyPurpose


[docs] class EnergyNeedYearlyImprovements(pa.DataFrameModel): building_category: Series[str] building_code: Series[str] purpose: Series[str] value: Series[float] = pa.Field(ge=0.0, coerce=True) start_year: Optional[Series[int]] = pa.Field(coerce=True, default=2020) function: Series[str] end_year: Optional[Series[int]] = pa.Field(coerce=True, default=2050) _filename = 'energy_need_improvements'
[docs] class Config: unique = ['building_category', 'building_code', 'purpose', 'start_year', 'function', 'end_year']
[docs] class YearlyReduction(pa.DataFrameModel): building_category: Series[str] building_code: Series[str] purpose: Series[str] start_year: Series[int] = pa.Field(coerce=True, default=2020) end_year: Series[int] = pa.Field(coerce=True, default=2050) yearly_efficiency_improvement: Series[float] = pa.Field(ge=0.0, coerce=True)
[docs] class Config: unique = ['building_category', 'building_code', 'purpose', 'start_year', 'function', 'end_year']
[docs] @staticmethod def from_energy_need_yearly_improvements( en_yearly_improvement: DataFrameBase[EnergyNeedYearlyImprovements]|EnergyNeedYearlyImprovements) -> 'DataFrameBase[YearlyReduction]': """ Transforms a EnergyNeedYearlyImprovement DataFrame into a EnergyNeedYearlyReduction DataFrame. Parameters ---------- en_yearly_improvement : DataFrame[EnergyNeedYearlyImprovements] Returns ------- DataFrameBase[YearlyReduction] Raises ------ pa.errors.SchemaError When the resulting dataframe fails to validate pa.errors.SchemaErrors When the resulting dataframe fails to validate """ unique_columns = ['building_category', 'building_code', 'purpose', 'start_year', 'end_year'] # Casting en_yearly_improvement to DataFrame so that type checkers complaining about datatype df = cast(pd.DataFrame, en_yearly_improvement) if 'start_year' not in df.columns: df['start_year'] = 2020 if 'end_year' not in df.columns: df['end_year'] = 2050 df = df.query('function=="yearly_reduction"') df = explode_unique_columns(df, unique_columns=unique_columns) df = explode_column_alias(df, column='purpose', values=[p for p in EnergyPurpose], alias='default', de_dup_by=unique_columns) df['yearly_efficiency_improvement'] = df['value'] df = df[['building_category', 'building_code', 'purpose', 'start_year', 'end_year', 'yearly_efficiency_improvement']] df = df.reset_index() return YearlyReduction.validate(df, lazy=True)
[docs] class PolicyImprovement(pa.DataFrameModel): building_category: Series[str] building_code: Series[str] purpose: Series[str] start_year: Series[int] = pa.Field(ge=0, coerce=True) end_year: Series[int] = pa.Field(ge=0, coerce=True) improvement_at_end_year: Series[float] = pa.Field(ge=0.0, lt=2.0, coerce=True)
[docs] class Config: unique = ['building_category', 'building_code', 'purpose', 'start_year', 'end_year']
[docs] @pa.dataframe_check def start_year_before_end_year(cls, df: pd.DataFrame) -> Series[bool]: return df.start_year < df.end_year
[docs] @staticmethod def from_energy_need_yearly_improvements( energy_need_improvements: DataFrameBase[EnergyNeedYearlyImprovements] | EnergyNeedYearlyImprovements) -> 'DataFrameBase[PolicyImprovement]': energy_need_improvements = cast(pd.DataFrame, energy_need_improvements) df = energy_need_improvements.query('function=="improvement_at_end_year"') if 'start_year' not in df.columns: df['start_year'] = 2020 if 'end_year' not in df.columns: df['end_year'] = 2050 unique_columns = ('building_category', 'building_code', 'purpose', 'start_year', 'function', 'end_year',) df = explode_unique_columns(df, unique_columns=unique_columns) df = explode_column_alias(df, column='purpose', values=[p for p in EnergyPurpose], alias='default', de_dup_by=unique_columns) df['improvement_at_end_year'] = df['value'] return PolicyImprovement.validate(df)