import os
import pathlib
import shutil
import typing
import pandas as pd
from loguru import logger
from pandera.errors import SchemaErrors, SchemaError
import ebm.validators as validators
from ebm.model.defaults import default_calibrate_heating_rv, default_calibrate_energy_consumption
[docs]
class FileHandler:
"""
Handles file operations.
"""
# Filenames
BUILDING_CONDITIONS = 'building_conditions.csv'
BUILDING_CODE_PARAMS = 'building_code_parameters.csv'
S_CURVE = 's_curve.csv'
POPULATION_FORECAST = 'population_forecast.csv'
NEW_BUILDINGS_RESIDENTIAL = 'new_buildings_residential.csv'
AREA_NEW_RESIDENTIAL_BUILDINGS = 'area_new_residential_buildings.csv'
AREA = 'area.csv'
BEHAVIOUR_FACTOR = 'energy_need_behaviour_factor.csv'
ENERGY_NEED_ORIGINAL_CONDITION = 'energy_need_original_condition.csv'
IMPROVEMENT_BUILDING_UPGRADE = 'improvement_building_upgrade.csv'
ENERGY_NEED_YEARLY_IMPROVEMENTS = 'energy_need_improvements.csv'
HOLIDAY_HOME_STOCK = 'holiday_home_stock.csv'
HOLIDAY_HOME_ENERGY_CONSUMPTION = 'holiday_home_energy_consumption.csv'
AREA_PER_PERSON = 'area_per_person.csv'
HEATING_SYSTEM_INITIAL_SHARES = 'heating_system_initial_shares.csv'
HEATING_SYSTEM_EFFICIENCIES = 'heating_system_efficiencies.csv'
HEATING_SYSTEM_FORECAST = 'heating_system_forecast.csv'
CALIBRATE_ENERGY_REQUIREMENT = 'calibrate_heating_rv.xlsx'
CALIBRATE_ENERGY_CONSUMPTION = 'calibrate_energy_consumption.xlsx'
input_directory: pathlib.Path
[docs]
def __init__(self, directory: typing.Union[str, pathlib.Path, None] = None):
"""
Constructor for FileHandler Object. Sets FileHandler.input_directory.
Parameters
----------
directory : pathlib.Path | None | (str)
When directory is None the constructor will attempt to read directory location from
environment variable EBM_INPUT_DIRECTORY
"""
if directory is None:
# Use 'input' as fall back when EBM_INPUT_DIRECTORY is not set in environment.
directory = os.environ.get('EBM_INPUT_DIRECTORY', 'input')
self.input_directory = directory if isinstance(directory, pathlib.Path) else pathlib.Path(directory)
self.files_to_check = [self.BUILDING_CODE_PARAMS, self.S_CURVE, self.POPULATION_FORECAST,
self.NEW_BUILDINGS_RESIDENTIAL, self.AREA_NEW_RESIDENTIAL_BUILDINGS,
self.AREA, self.BEHAVIOUR_FACTOR, self.ENERGY_NEED_ORIGINAL_CONDITION,
self.IMPROVEMENT_BUILDING_UPGRADE, self.ENERGY_NEED_YEARLY_IMPROVEMENTS,
self.HOLIDAY_HOME_ENERGY_CONSUMPTION, self.HOLIDAY_HOME_STOCK,
self.AREA_PER_PERSON, self.HEATING_SYSTEM_INITIAL_SHARES, self.HEATING_SYSTEM_EFFICIENCIES, self.HEATING_SYSTEM_FORECAST]
def __repr__(self):
return f'FileHandler(input_directory="{self.input_directory}")'
def __str__(self):
return repr(self)
[docs]
@staticmethod
def default_data_directory() -> pathlib.Path:
"""
Returns the path for ebm default data. The function is used when content is needed for a new input directory
Not to be confused with FileHandler.input_directory.
Returns
-------
pathlib.Path
See Also
--------
create_missing_input_files
"""
data_directory = pathlib.Path(__file__).parent.parent / 'data'
default_data_directory = data_directory / 'calibrated'
if not default_data_directory.is_dir():
msg = f'Could not find default data directory {default_data_directory}'
raise FileNotFoundError(msg)
if not default_data_directory.is_dir():
msg = f'{default_data_directory} is not a directory'
raise NotADirectoryError(msg)
return default_data_directory
[docs]
def get_file(self, file_name: str) -> pd.DataFrame:
"""
Finds and returns a file by searching in the folder defined by self.input_folder
Parameters:
- file_name (str): Name of the file to retrieve.
Returns:
- file_df (pd.DataFrame): DataFrame containing file data.
"""
logger.debug(f'get_file {file_name}')
file_path: pathlib.Path = pathlib.Path(self.input_directory) / file_name
logger.debug(f'{file_path=}')
try:
if file_path.suffix == '.xlsx':
file_df = pd.read_excel(file_path)
elif file_path.suffix == '.csv':
file_df = pd.read_csv(file_path)
else:
msg = f'{file_name} is not of type xlsx or csv'
logger.error(msg)
raise ValueError(msg)
return file_df
except FileNotFoundError as ex:
logger.exception(ex)
logger.debug(f'Current directory is {os.getcwd()}')
logger.error(f'Unable to open {file_path}. File not found.')
raise
except PermissionError as ex:
logger.exception(ex)
logger.error(f'Unable to open {file_path}. Permission denied.')
raise
except IOError as ex:
logger.exception(ex)
logger.error(f'Unable to open {file_path}. Unable to read file.')
raise
[docs]
def get_building_code(self) -> pd.DataFrame:
"""
Get TEK parameters DataFrame.
Returns:
- building_code_params (pd.DataFrame): DataFrame containing TEK parameters.
"""
building_code_params = self.get_file(self.BUILDING_CODE_PARAMS)
return building_code_params
[docs]
def get_s_curve(self) -> pd.DataFrame:
"""
Get S-curve parameters DataFrame.
Returns:
- scurve_params (pd.DataFrame): DataFrame containing S-curve parameters.
"""
scurve_params = self.get_file(self.S_CURVE)
return scurve_params
[docs]
def get_construction_population(self) -> pd.DataFrame:
"""
Get population and household size DataFrame from a file.
Returns:
- construction_population (pd.DataFrame): Dataframe containing population numbers
year population household_size
"""
return self.get_file(self.POPULATION_FORECAST)
[docs]
def get_population(self) -> pd.DataFrame:
"""
Loads population data from population.csv as float64
Should probably be merged with get_construction_population
Returns population : pd.DataFrame
dataframe with population
-------
"""
file_path = self.input_directory / self.POPULATION_FORECAST
logger.debug(f'{file_path=}')
return pd.read_csv(file_path, dtype={"household_size": "float64"})
[docs]
def get_construction_building_category_share(self) -> pd.DataFrame:
"""
Get building category share by year DataFrame from a file.
The number can be used in conjunction with number of households to calculate total number
of buildings of category house and apartment block
Returns:
- construction_population (pd.DataFrame): Dataframe containing population numbers
"year", "Andel nye småhus", "Andel nye leiligheter", "Areal nye småhus", "Areal nye leiligheter"
"""
return self.get_file(self.NEW_BUILDINGS_RESIDENTIAL)
[docs]
def get_building_category_area(self) -> pd.DataFrame:
"""
Get population and household size DataFrame from a file.
Returns:
- construction_population (pd.DataFrame): Dataframe containing population numbers
"area","type of building","2010","2011"
"""
file_path = self.input_directory / self.AREA_NEW_RESIDENTIAL_BUILDINGS
logger.debug(f'{file_path=}')
return pd.read_csv(file_path,
index_col=0, header=0)
[docs]
def get_area_parameters(self) -> pd.DataFrame:
"""
Get dataframe with area parameters.
Returns:
- area_parameters (pd.DataFrame): Dataframe containing total area (m^2) per
building category and TEK.
"""
return self.get_file(self.AREA)
[docs]
def get_energy_req_original_condition(self) -> pd.DataFrame:
"""
Get dataframe with energy requirement (kWh/m^2) for floor area in original condition.
Returns
-------
pd.DataFrame
Dataframe containing energy requirement (kWh/m^2) for floor area in original condition,
per building category and purpose.
"""
return self.get_file(self.ENERGY_NEED_ORIGINAL_CONDITION)
[docs]
def get_energy_req_reduction_per_condition(self) -> pd.DataFrame:
"""
Get dataframe with shares for reducing the energy requirement of the different building conditions.
Returns
-------
pd.DataFrame
Dataframe containing energy requirement reduction shares for the different building conditions,
per building category, TEK and purpose.
"""
return self.get_file(self.IMPROVEMENT_BUILDING_UPGRADE)
[docs]
def get_energy_need_yearly_improvements(self) -> pd.DataFrame:
"""
Get dataframe with yearly efficiency rates for energy requirement improvements.
Returns
-------
pd.DataFrame
Dataframe containing yearly efficiency rates (%) for energy requirement improvements,
per building category, tek and purpose.
"""
return self.get_file(self.ENERGY_NEED_YEARLY_IMPROVEMENTS)
[docs]
def get_holiday_home_energy_consumption(self) -> pd.DataFrame:
return self.get_file(self.HOLIDAY_HOME_ENERGY_CONSUMPTION)
[docs]
def get_holiday_home_by_year(self) -> pd.DataFrame:
return self.get_file(self.HOLIDAY_HOME_STOCK)
[docs]
def get_area_per_person(self):
return self.get_file(self.AREA_PER_PERSON)
[docs]
def get_calibrate_heating_rv(self) -> pd.DataFrame:
"""
Retrieve the calibrated heating requirement values
This method attempts to load the energy requirement calibration file from the
input directory. It first checks for a file without extension, then for a `.csv`
version. If neither is found, it returns a default DataFrame.
Returns
-------
pd.DataFrame
A DataFrame containing the calibrated heating requirement values. If no file
is found, a default DataFrame is returned.
"""
calibrate_heating_rv = self.input_directory / self.CALIBRATE_ENERGY_REQUIREMENT
if calibrate_heating_rv.is_file():
return self.get_file(calibrate_heating_rv.name)
if calibrate_heating_rv.with_suffix('.csv').is_file():
return self.get_file(calibrate_heating_rv.with_suffix('.csv').name)
return default_calibrate_heating_rv()
[docs]
def get_calibrate_heating_systems(self) -> pd.DataFrame:
"""
Retrieve the calibrated energy consumption values for heating systems
This method attempts to load the energy consumption calibration file from the
input directory. It first checks for a file without extension, then for a `.csv`
version. If neither is found, it returns a default DataFrame.
Returns
-------
pd.DataFrame
A DataFrame containing the calibrated energy consumption values. If no file
is found, a default DataFrame is returned.
"""
calibrate_energy_consumption = self.input_directory / self.CALIBRATE_ENERGY_CONSUMPTION
if calibrate_energy_consumption.is_file():
return self.get_file(calibrate_energy_consumption.name)
if calibrate_energy_consumption.with_suffix('.csv').is_file():
return self.get_file(calibrate_energy_consumption.with_suffix('.csv').name)
return default_calibrate_energy_consumption()
[docs]
def get_heating_systems_shares_start_year(self) -> pd.DataFrame:
"""
"""
return self.get_file(self.HEATING_SYSTEM_INITIAL_SHARES)
[docs]
def get_heating_system_efficiencies(self) -> pd.DataFrame:
"""Load heating_system_efficiencies.csv from file into a dataframe
Returns
-------
heating_system_efficiencies : pd.DataFrame
pandas DataFrame with heating system efficiencies
"""
return self.get_file(self.HEATING_SYSTEM_EFFICIENCIES)
[docs]
def get_heating_system_forecast(self) -> pd.DataFrame:
"""
"""
return self.get_file(self.HEATING_SYSTEM_FORECAST)
def _check_is_file(self, filename: str) -> bool:
"""
Check if the filename is a file in self.input_folder
Parameters
----------
filename : str
Returns
-------
file_exists : bool
"""
return (pathlib.Path(self.input_directory) / filename).is_file()
[docs]
def check_for_missing_files(self) -> typing.List[str]:
"""
Returns a list of required files that are not present in self.input_folder
Returns
-------
missing_files : List[str]
Raises
------
FileNotFoundError
If FileHandler::input_directory not found
NotADirectoryError
If FileHandler::input_directory is not a directory
"""
if not self.input_directory.exists():
msg=f'{self.input_directory.absolute()} not found'
logger.error(msg)
raise FileNotFoundError(f'Input Directory Not Found')
if not self.input_directory.is_dir():
raise NotADirectoryError(f'{self.input_directory} is not a directory')
missing_files = [file for file in self.files_to_check if not self._check_is_file(file)]
if missing_files:
plural = 's' if len(missing_files) != 1 else ''
msg = f'{len(missing_files)} required file{plural} missing from {self.input_directory}'
logger.error(msg)
for f in missing_files:
logger.error(f'Could not find {f}')
return missing_files
[docs]
def is_calibrated(self) -> bool:
"""
Check if calibration files exist in the input directory.
This method verifies the presence of both energy consumption and energy
requirement files in either `.xlsx` or `.csv` format within the specified
input directory.
Returns
-------
bool
`True` if both required files exist with the same extension (`.xlsx` or `.csv`),
otherwise `False`.
"""
energy_consumption = (self.input_directory / self.CALIBRATE_ENERGY_CONSUMPTION)
energy_requirement = (self.input_directory / self.CALIBRATE_ENERGY_REQUIREMENT)
if energy_consumption.with_suffix('.xlsx').is_file() and energy_requirement.with_suffix('.xlsx').is_file():
return True
if energy_consumption.with_suffix('.csv').is_file() and energy_requirement.with_suffix('.csv').is_file():
return True
return False