Source code for ebm.model.building_category

import typing
from enum import EnumType, StrEnum, unique

import pandas as pd
from loguru import logger

RESIDENTIAL = 'residential'
NON_RESIDENTIAL = 'non_residential'


[docs] class MyEnumType(EnumType): def __contains__(cls, value): return value in cls._value2member_map_
[docs] @unique class BuildingCategory(StrEnum, metaclass=MyEnumType): HOUSE = 'house' APARTMENT_BLOCK = 'apartment_block' KINDERGARTEN = 'kindergarten' SCHOOL = 'school' UNIVERSITY = 'university' OFFICE = 'office' RETAIL = 'retail' HOTEL = 'hotel' HOSPITAL = 'hospital' NURSING_HOME = 'nursing_home' CULTURE = 'culture' SPORTS = 'sports' STORAGE = 'storage_repairs' def __repr__(self): return f'{self.__class__.__name__}.{self.name}'
[docs] def yearly_construction_floor_area(self): logger.warning('Using static yearly_construction_floor_area') raise NotImplementedError(f'yearly_construction_floor_area does not support category {self.name} (yet)')
[docs] def is_residential(self) -> bool: return self == BuildingCategory.HOUSE or self == BuildingCategory.APARTMENT_BLOCK
[docs] def is_non_residential(self) -> bool: return not self.is_residential()
[docs] @staticmethod def from_string(category_name: str) -> 'BuildingCategory': """Create an enum object from category name Args: category_name (str) Returns: building_category (BuildingCategory (Enum)) Raises: ValueError: category_name not found in BuildingCategory """ search = category_name.lower().replace(' ', '').replace('_', '') for building_category in iter(BuildingCategory): if search == building_category.value.lower().replace('_', ''): return building_category raise ValueError(f'No such building category {category_name}')
[docs] def from_norsk(norsk: str) -> BuildingCategory: if norsk.lower() == 'småhus': return BuildingCategory.HOUSE if norsk.lower() in ('leilighet', 'boligblokk'): return BuildingCategory.APARTMENT_BLOCK if norsk.lower() == 'barnehage': return BuildingCategory.KINDERGARTEN if norsk.lower() == 'kontor': return BuildingCategory.OFFICE if norsk.lower() == 'skole': return BuildingCategory.SCHOOL if norsk.lower() == 'universitet': return BuildingCategory.UNIVERSITY if norsk.lower() == 'sykehjem': return BuildingCategory.NURSING_HOME if norsk.lower() == 'sykehus': return BuildingCategory.HOSPITAL if norsk.lower() == 'hotell': return BuildingCategory.HOTEL if norsk.lower() == 'idrettsbygg': return BuildingCategory.SPORTS if norsk.lower() == 'forretningsbygg': return BuildingCategory.RETAIL if norsk.lower() == 'kulturbygg': return BuildingCategory.CULTURE return BuildingCategory.from_string(norsk)
[docs] def expand_building_category(row: pd.Series) -> pd.DataFrame: """ Expand a row of data based on the building category into multiple rows, each representing a specific sub-category of either residential or non-residential buildings. Parameters ---------- row : pd.Series A pandas Series containing the data for a single row, including a 'building_category' field. Returns ------- pd.DataFrame A DataFrame with expanded rows for each sub-category of the building category. """ if row['building_category'] in BuildingCategory: return pd.DataFrame([row.to_dict()]) if row['building_category'] == NON_RESIDENTIAL: categories = [b for b in BuildingCategory if b.is_non_residential()] elif row['building_category'] == RESIDENTIAL: categories = [b for b in BuildingCategory if b.is_residential()] values = {k: [v] * len(categories) for k, v in row.to_dict().items() if k != 'building_category'} return pd.DataFrame({ 'building_category': categories, **values })
# Apply the function to each row and concatenate the results
[docs] def expand_building_categories(df: pd.DataFrame, unique_columns: typing.List[str] = None): """ Transform input dataframe so that building_category within groups (residential/non-residential) are unpacked into all containing categories. Duplicates categories are removed. Specific categories with values area preferred over category groups when there is a conflict. Parameters ---------- df : pandas.core.frame.DataFrame unique_columns : str list of column names that should be treated as joint unique. default: ['building_category'] Returns ------- pandas.core.frame.DataFrame """ if unique_columns: df = df.drop_duplicates(subset=unique_columns, ignore_index=True, keep='last') groups = df[df.building_category.isin([RESIDENTIAL, NON_RESIDENTIAL])] specific = df[~df.building_category.isin(groups.building_category)] expanded_groups = [expand_building_category(row) for _, row in groups.iterrows()] filtered = [d[~d.building_category.isin(specific.building_category)] for d in expanded_groups] return pd.concat(filtered + [specific]).reindex()
[docs] def collapse_building_category(building_category: pd.Series) -> pd.Series: """ Replace building category labels with building group labels where appropriate. This function takes a pandas Series containing building category strings separated by either "+" or ",". It normalizes token case, applies a set of category‑collapse rules, and returns a new Series with collapsed category labels. building groups implemented: - residential house, apartment_block - non_residential culture, hospital, hotel, kindergarten, nursing_home, office, retail, school, sports, storage_repairs, university - default All of the above Parameters ---------- building_category : pd.Series Series of building category strings. Each value must be a string containing tokens separated by either ``"+"`` or `","``. Tokens may contain arbitrary casing; they are normalized to lowercase. Returns ------- pd.Series series with building categories collapsed into groups Examples -------- >>> import pandas as pd >>> collapse_building_category(pd.Series(["house+apartment_block"])) 0 residential dtype: object >>> collapse_building_category(pd.Series( ... ["culture+hospital+hotel+kindergarten+nursing_home+office+retail" ... "+school+sports+storage_repairs+university"] ... )) 0 non_residential dtype: object """ def normalize(val: str) -> str: split_token = '+' if '+' in val else ',' tokens = [t.strip().lower() for t in val.split(split_token)] to_remove = set() to_add = set() if 'apartment_block' in set(tokens) and 'house' in set(tokens): to_remove.add('apartment_block') to_remove.add('house') to_add.add('residential') if {'culture', 'hospital', 'hotel', 'kindergarten', 'nursing_home', 'office', 'retail', 'school', 'sports', 'storage_repairs', 'university'}.issubset(tokens): to_add.add('non_residential') to_remove.update({'culture', 'hospital', 'hotel', 'kindergarten', 'nursing_home', 'office', 'retail', 'school', 'sports', 'storage_repairs', 'university'}) if {'residential', 'non_residential'}.issubset(to_add): to_add.remove('residential') to_add.remove('non_residential') to_add.add('default') return split_token.join(list(to_add) + [t for t in tokens if t not in to_remove]) s = building_category.astype(str).copy() return s.apply(normalize)