Source code for ebm.model.data_classes

import typing
from dataclasses import dataclass

import pandas as pd


[docs] @dataclass class ScurveParameters: building_category: str condition: str earliest_age: int average_age: int rush_years: int last_age: int rush_share: float never_share: float
[docs] @dataclass class TEKParameters: tek: str building_year: int start_year: int end_year: int
[docs] @dataclass(frozen=True) class YearRange: """ A class to represent a period model with a start and end year. Attributes ---------- start : int The starting year of the period. end : int The ending year of the period. year_range : tuple of int A tuple containing all years in the period from start to end (inclusive). Methods ------- __post_init__(): Initializes the years attribute after the object is created. __iter__(): Returns an iterator over the years in the period. range() -> tuple of int: Returns a tuple of years from start to end (inclusive). subset(offset: int = 0, length: int = -1) -> 'YearRange': Creates a subset YearRange of this year range. to_index() -> pd.Index: Converts the year_range to a pandas Index. Examples -------- Slice pandas DataFrame with YearRange. >>> df = pd.DataFrame(data=['first', 'b', 'c', 'd', 'last'], ... index=[2010, 2011, 2012, 2013, 2014]) >>> years = YearRange(2011, 2013) >>> df.loc[years] 0 2011 b 2012 c 2013 d >>> """ start: int end: int year_range: typing.Tuple[int] = tuple() def __post_init__(self): """ Initializes the years attribute after the object is created. """ if self.start > self.end: raise ValueError(f'Start year {self.start} cannot be greater than end year {self.end}') object.__setattr__(self, 'year_range', self.range()) def __str__(self): return f'YearRange(start={self.start}, end={self.end})' def __repr__(self): return str(self) def __len__(self): return len(self.year_range) def __iter__(self) -> typing.Generator[int, None, None]: """ Returns an iterator over the years in the period. Yields ------ int The next year in the period. """ for y in self.year_range: yield y
[docs] def range(self) -> typing.Tuple[int]: """ Returns a tuple of years from start to end for use with indexes and such. Returns ------- tuple of int Tuple containing all years in sequence from start to end (inclusive). """ return tuple(range(self.start, self.end + 1))
[docs] def subset(self, offset: int = 0, length: int = -1) -> 'YearRange': """ Creates a subset YearRange of this year range. Parameters ---------- offset : int How many years to skip after the first year. length : int, optional How many years to return after the offset. When -1, all remaining years are returned. Default: -1 Returns ------- year_range : YearRange Raises ------ ValueError When `offset` is less than 0 or `offset` is greater than the number of years in the YearRange. Examples -------- >>> YearRange(2010, 2016).subset(2,3) YearRange(start=2012, end=2014, year_range=(2012, 2013, 2014)) >>> YearRange(2010, 2016).subset(2,-1) YearRange(start=2012, end=2016, year_range=(2012, 2013, 2014, 2015, 2016)) >>> YearRange(2010, 2016).subset(3) YearRange(start=2013, end=2016, year_range=(2013, 2014, 2015, 2016)) """ if offset < 0: raise ValueError(f'Offset cannot be negative: {offset}') if offset + self.start > self.end: raise ValueError(f'Offset is out of range: {offset=} >= {len(self)}') start_year = self.start + offset last_year = start_year + length - 1 if length > 0 and start_year + length < self.end else self.end return YearRange(start_year, last_year)
[docs] def to_index(self, name='year') -> pd.Index: """ Converts the year_range to a pandas Index. Parameters ---------- name : str, optional name of the index. Default: 'name' Returns ------- pd.Index Pandas Index object containing the years in the range. """ return pd.Index(self.year_range, name=name)
[docs] def to_dataframe(self, name='year') -> pd.DataFrame: """ Converts the year_range to a pandas DataFrame. Parameters ---------- name : str, optional name of the column. Default: 'year' Returns ------- pd.DataFrame Pandas Dataframe object containing the years in the range in the column year. """ return pd.DataFrame(self.year_range, columns=[name])
[docs] def cross_join(self, df: pd.DataFrame) -> pd.DataFrame: """ Join every row in df with every year in a YearRange Parameters ---------- df : pd.DataFrame dataframe to join with YearRange Returns ------- pd.DataFrame Pandas Dataframe containing the original dataframe and a year column """ return pd.merge(left=df, right=self.to_dataframe(name='year'), how='cross')
[docs] @staticmethod def from_series(s : pd.Series): if s.name == 'year': return YearRange(s.min(), s.max()) return YearRange(s.index.get_level_values(level='year').min(), s.index.get_level_values(level='year').max())
def __getitem__(self, key: int | slice) -> pd.Index: """ Returns a pandas Index object for the specified slice of the year range. Parameters ---------- key : int | slice The index or slice of the year range to return. Returns ------- pd.Index A pandas Index object containing the specified years. """ if isinstance(key, int): return pd.Index([self.year_range[key]], name='year') elif isinstance(key, slice): return pd.Index(self.year_range[key], name='year') else: raise TypeError(f"Invalid key type: {type(key)}")