Skip to content

Code Reference

framdata

database_names

DatabaseNames

Container for names and locations of files and folders in the NVE database.

DatabaseNames

Bases: Base

Define names of files and folders in the NVE database and map files to folders.

Source code in framdata/database_names/DatabaseNames.py
class DatabaseNames(Base):
    """Define names of files and folders in the NVE database and map files to folders."""

    # ---------- FILE EXTENSIONS ---------- #
    ext_excel = ".xlsx"
    ext_h5 = ".h5"
    ext_parquet = ".parquet"
    ext_yaml = ".yaml"

    # ---------- SHEETS ---------- #
    data_sheet = "Data"
    metadata_sheet = "Metadata"

    # ---------- SUFFIXES ---------- #
    capacity = ".capacity"
    prices = ".prices"
    profiles = ".profiles"
    curves = ".curves"

    # ---------- DATABASE FOLDERs MAP ---------- #
    db00 = "db00_nodes"
    db01 = "db01_nodes_time_vectors"
    db10 = "db10_wind"
    db20 = "db20_solar"
    db30 = "db30_hydropower"
    db31 = "db31_hydropower_time_vectors"
    db32 = "db32_hydropower_curves"
    db40 = "db40_thermal"
    # db41 = "db41_thermal_time_vectors"
    db50 = "db50_demand"
    # db51 = "db51_demand_time_vectors"
    db60 = "db60_transmission"
    # db61 = "db61_transmission_time_vectors"

    db_folder_list: ClassVar[list] = [db00, db01, db10, db20, db30, db31, db32, db40, db50, db60]

    # ---------- FILENAMES ---------- #
    # ==== NODES ====
    power_nodes = "Power.Nodes"
    power_nodes_prices = "Power.Nodes.prices"
    power_nodes_profiles = "Power.Nodes.profiles"

    fuel_nodes = "Fuel.Nodes"
    fuel_nodes_prices = "Fuel.Nodes.prices"
    fuel_nodes_profiles = "Fuel.Nodes.profiles"

    emission_nodes = "Emission.Nodes"
    emission_nodes_prices = "Emission.Nodes.prices"
    emission_nodes_profiles = "Emission.Nodes.profiles"

    # ==== THERMAL ====
    thermal_generators = "Thermal.Generators"
    thermal_generators_capacity = "Thermal.Generators.capacity"
    thermal_generators_profiles = "Thermal.Generators.profiles"

    # ==== HYDROPOWER ====
    # hydro attribute tables
    hydro_modules = "Hydropower.Modules"
    hydro_modules_volumecapacity = "Hydropower.Modules.VolumeCapacity"
    hydro_modules_enekv_global_derived = "Hydropower.Modules.enekv_global_derived"
    hydro_modules_reggrad_glob_derived = "Hydropower.Modules.reggrad_glob_derived"
    hydro_modules_reggrad_lok_derived = "Hydropower.Modules.reggrad_lok_derived"
    hydro_bypass = "Hydropower.Bypass"
    hydro_generators = "Hydropower.Generators"
    hydro_inflow = "Hydropower.Inflow"
    hydro_inflow_yearvolume = "Hydropower.Inflow.YearVolume"
    hydro_inflow_upstream_inflow_derived = "Hydropower.Inflow.upstream_inflow_derived"
    hydro_pumps = "Hydropower.Pumps"
    hydro_reservoirs = "Hydropower.Reservoirs"

    # hydro time series
    hydro_inflow_profiles = "Hydropower.Inflow.profiles"
    hydro_bypass_operationalbounds_restrictions = "Hydropower.Bypass.OperationalBounds.Restrictions"
    hydro_modules_operationalbounds_restrictions = "Hydropower.Modules.OperationalBounds.Restrictions"
    hydro_reservoirs_operationalbounds_restrictions = "Hydropower.Reservoirs.OperationalBounds.Restrictions"
    hydro_generators_energyeq_mid = "Hydropower.Generators.EnergyEq_mid"

    # hydro curves
    hydro_curves = "Hydropower.curves"
    hydro_pqcurves = "Hydropower.pqcurves"

    # ==== DEMAND ====
    demand_consumers = "Demand.Consumers"
    demand_consumers_capacity = "Demand.Consumers.capacity"
    demand_consumers_normalprices = "Demand.Consumers.normalprices"
    demand_consumers_profiles_weatheryears = "Demand.Consumers.profiles.weatheryears"
    demand_consumers_profiles_oneyear = "Demand.Consumers.profiles"

    # ==== WIND ====
    wind_generators = "Wind.Generators"
    wind_generators_capacity = "Wind.Generators.capacity"
    wind_generators_profiles = "Wind.Generators.profiles"

    # ==== SOLAR ====
    solar_generators = "Solar.Generators"
    solar_generators_capacity = "Solar.Generators.capacity"
    solar_generators_profiles = "Solar.Generators.profiles"

    # ==== Transmission ====
    transmission_grid = "Transmission.Grid"
    transmission_capacity = transmission_grid + ".capacity"
    transmission_loss = transmission_grid + ".loss"
    transmission_profiles = transmission_grid + ".profiles"

    # ---------- DATABASE FOLDER MAP ---------- #
    db_folder_map: ClassVar[dict[str, list[str]]] = {
        # ===: NODES ====,
        power_nodes: db00,
        fuel_nodes: db00,
        emission_nodes: db00,
        power_nodes_prices: db01,
        fuel_nodes_prices: db01,
        emission_nodes_prices: db01,
        power_nodes_profiles: db01,
        fuel_nodes_profiles: db01,
        emission_nodes_profiles: db01,
        # ===: HYDROPOWER ====,
        # hydro attribute tables
        hydro_modules: db30,
        hydro_modules_volumecapacity: db30,
        hydro_modules_enekv_global_derived: db30,
        hydro_modules_reggrad_glob_derived: db30,
        hydro_modules_reggrad_lok_derived: db30,
        hydro_bypass: db30,
        hydro_generators: db30,
        hydro_inflow: db30,
        hydro_inflow_yearvolume: db30,
        hydro_inflow_upstream_inflow_derived: db30,
        hydro_pumps: db30,
        hydro_reservoirs: db30,
        # hydro time series
        hydro_inflow_profiles: db31,
        hydro_bypass_operationalbounds_restrictions: db31,
        hydro_modules_operationalbounds_restrictions: db31,
        hydro_reservoirs_operationalbounds_restrictions: db31,
        hydro_generators_energyeq_mid: db31,
        # hydro curves
        hydro_curves: db32,
        hydro_pqcurves: db32,
        # ==== THERMAL ====,
        thermal_generators: db40,
        thermal_generators_capacity: db40,
        thermal_generators_profiles: db40,
        # ==== DEMAND ====,
        demand_consumers: db50,
        demand_consumers_capacity: db50,
        demand_consumers_normalprices: db50,
        demand_consumers_profiles_weatheryears: db50,
        demand_consumers_profiles_oneyear: db50,
        # ==== WIND ====,
        wind_generators: db10,
        wind_generators_capacity: db10,
        wind_generators_profiles: db10,
        # ==== SOLAR ====
        solar_generators: db20,
        solar_generators_capacity: db20,
        solar_generators_profiles: db20,
        # ==== Transmission ====
        transmission_grid: db60,
        transmission_capacity: db60,
        transmission_loss: db60,
        transmission_profiles: db60,
    }

    @classmethod
    def get_relative_folder_path(cls, file_id: str) -> Path:
        """
        Get the relative database folder path for a given file_id.

        The relative path consists of database folder and file name.

        Args:
            file_id (str): Identifier for the file to retrieve.

        Returns:
            Path: The database folder name.

        """
        try:
            return Path(cls.db_folder_map[file_id])
        except KeyError as e:
            message = f"File id '{file_id}' not found in database folder map."

            raise KeyError(message) from e

    @classmethod
    def get_file_name(cls, source: Path, db_folder: str, file_id: str) -> str | None:
        """
        Get the name of a file, with extension, from a file ID and a path.

        Args:
            source (Path): Root path of the database.
            db_folder (str): Database folder to look for the file in.
            file_id (str): ID of file, i.e the name of the file without extension.

        Raises:
            RuntimeError: If multiple files with the same ID but different extensions are found.

        Returns:
            str | None: File ID and extension combined. If file is not found, return None.

        """
        db_path = source / db_folder
        if not db_path.exists():
            message = f"The database folder {db_path} does not exist."
            raise FileNotFoundError(message)
        candidate_extentions = set()
        for file_path in db_path.iterdir():
            if file_path.is_file() and file_path.stem == file_id:
                candidate_extentions.add(file_path.suffix)
        if len(candidate_extentions) > 1:  # Multiple files of same ID. Ambiguous
            message = (
                f"Found multiple files with ID {file_id} (with different extensions: {candidate_extentions}) in database folder {db_path}."
                " File names must be unique."
            )
            raise RuntimeError(message)
        if len(candidate_extentions) == 0:  # No matching files.
            return None
            # message = f"Found no file with ID {file_id} in database folder {db_path}."
            # raise FileNotFoundError(message)

        (extension,) = candidate_extentions  # We have only one candidate, so we extract it.
        return file_id + extension
get_file_name(source: Path, db_folder: str, file_id: str) -> str | None classmethod

Get the name of a file, with extension, from a file ID and a path.

Parameters:

Name Type Description Default
source Path

Root path of the database.

required
db_folder str

Database folder to look for the file in.

required
file_id str

ID of file, i.e the name of the file without extension.

required

Raises:

Type Description
RuntimeError

If multiple files with the same ID but different extensions are found.

Returns:

Type Description
str | None

str | None: File ID and extension combined. If file is not found, return None.

Source code in framdata/database_names/DatabaseNames.py
@classmethod
def get_file_name(cls, source: Path, db_folder: str, file_id: str) -> str | None:
    """
    Get the name of a file, with extension, from a file ID and a path.

    Args:
        source (Path): Root path of the database.
        db_folder (str): Database folder to look for the file in.
        file_id (str): ID of file, i.e the name of the file without extension.

    Raises:
        RuntimeError: If multiple files with the same ID but different extensions are found.

    Returns:
        str | None: File ID and extension combined. If file is not found, return None.

    """
    db_path = source / db_folder
    if not db_path.exists():
        message = f"The database folder {db_path} does not exist."
        raise FileNotFoundError(message)
    candidate_extentions = set()
    for file_path in db_path.iterdir():
        if file_path.is_file() and file_path.stem == file_id:
            candidate_extentions.add(file_path.suffix)
    if len(candidate_extentions) > 1:  # Multiple files of same ID. Ambiguous
        message = (
            f"Found multiple files with ID {file_id} (with different extensions: {candidate_extentions}) in database folder {db_path}."
            " File names must be unique."
        )
        raise RuntimeError(message)
    if len(candidate_extentions) == 0:  # No matching files.
        return None
        # message = f"Found no file with ID {file_id} in database folder {db_path}."
        # raise FileNotFoundError(message)

    (extension,) = candidate_extentions  # We have only one candidate, so we extract it.
    return file_id + extension
get_relative_folder_path(file_id: str) -> Path classmethod

Get the relative database folder path for a given file_id.

The relative path consists of database folder and file name.

Parameters:

Name Type Description Default
file_id str

Identifier for the file to retrieve.

required

Returns:

Name Type Description
Path Path

The database folder name.

Source code in framdata/database_names/DatabaseNames.py
@classmethod
def get_relative_folder_path(cls, file_id: str) -> Path:
    """
    Get the relative database folder path for a given file_id.

    The relative path consists of database folder and file name.

    Args:
        file_id (str): Identifier for the file to retrieve.

    Returns:
        Path: The database folder name.

    """
    try:
        return Path(cls.db_folder_map[file_id])
    except KeyError as e:
        message = f"File id '{file_id}' not found in database folder map."

        raise KeyError(message) from e

DemandNames

Contains classes defining the demand table and validations.

DemandMetadataSchema

Bases: _AttributeMetadataSchema

Pandera DataFrameModel schema for metadata in the Demand.Consumers file.

Source code in framdata/database_names/DemandNames.py
class DemandMetadataSchema(_AttributeMetadataSchema):
    """Pandera DataFrameModel schema for metadata in the Demand.Consumers file."""

    @pa.dataframe_check
    @classmethod
    def check_unit_is_str_for_attributes(cls, df: pd.DataFrame) -> Series[bool]:
        """
        Check that the 'unit' value is a string for the row where 'attribute' is 'Capacity'.

        Args:
            df (Dataframe): DataFrame used to check value for "unit".

        Returns:
            Series[bool]: Series of boolean values detonating if each element has passed the check.

        """
        return check_unit_is_str_for_attributes(df, [DemandNames.capacity_col])
check_unit_is_str_for_attributes(df: pd.DataFrame) -> Series[bool] classmethod

Check that the 'unit' value is a string for the row where 'attribute' is 'Capacity'.

Parameters:

Name Type Description Default
df Dataframe

DataFrame used to check value for "unit".

required

Returns:

Type Description
Series[bool]

Series[bool]: Series of boolean values detonating if each element has passed the check.

Source code in framdata/database_names/DemandNames.py
@pa.dataframe_check
@classmethod
def check_unit_is_str_for_attributes(cls, df: pd.DataFrame) -> Series[bool]:
    """
    Check that the 'unit' value is a string for the row where 'attribute' is 'Capacity'.

    Args:
        df (Dataframe): DataFrame used to check value for "unit".

    Returns:
        Series[bool]: Series of boolean values detonating if each element has passed the check.

    """
    return check_unit_is_str_for_attributes(df, [DemandNames.capacity_col])
DemandNames

Bases: _BaseComponentsNames

Container class for describing the demand attribute table's names, structure, and convertion to Demand Component.

Source code in framdata/database_names/DemandNames.py
class DemandNames(_BaseComponentsNames):
    """Container class for describing the demand attribute table's names, structure, and convertion to Demand Component."""

    id_col = "ConsumerID"
    node_col = "PowerNode"
    reserve_price_col = "ReservePrice"
    price_elasticity_col = "PriceElasticity"
    min_price_col = "MinPriceLimit"
    max_price_col = "MaxPriceLimit"
    normal_price_col = "NormalPrice"
    capacity_profile_col = "CapacityProfile"
    temperature_profile_col = "TemperatureProfile"
    capacity_col = "Capacity"

    columns: ClassVar[list[str]] = [
        id_col,
        node_col,
        reserve_price_col,
        price_elasticity_col,
        min_price_col,
        max_price_col,
        normal_price_col,
        capacity_profile_col,
        temperature_profile_col,
        capacity_col,
    ]

    ref_columns: ClassVar[list[str]] = [
        node_col,
        reserve_price_col,
        price_elasticity_col,
        min_price_col,
        max_price_col,
        normal_price_col,
        capacity_profile_col,
        temperature_profile_col,
        capacity_col,
    ]

    @staticmethod
    def create_component(
        row: NDArray,
        indices: dict[str, int],
        meta_columns: set[str],
        meta_data: pd.DataFrame,
        attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
    ) -> dict[str, Demand]:
        """
        Create a Demand component from a table row in the Demand.Consumers file.

        Args:
            row (NDArray): Array containing the values of one table row, represeting one Demand object.
            indices (list[str, int]): Mapping of table's Column names to the array's indices.
            meta_columns (list[str]): Set of columns which defines memberships in meta groups for aggregation.
            meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
            attribute_objects (dict[str, tuple[object, dict[str, Meta]]], optional): NOT USED

        Returns:
            dict[str, Demand]: A dictionary with the consumer_id as key and the demand component as value.

        """
        elastic_demand_cols = [
            DemandNames.price_elasticity_col,
            DemandNames.min_price_col,
            DemandNames.max_price_col,
            DemandNames.normal_price_col,
        ]
        columns_to_parse = [
            DemandNames.reserve_price_col,
            DemandNames.capacity_profile_col,
            DemandNames.temperature_profile_col,
            DemandNames.capacity_col,
        ]
        columns_to_parse.extend(elastic_demand_cols)

        arg_user_code = DemandNames._parse_args(row, indices, columns_to_parse, meta_data)

        elastic_demand_values = [value for key, value in arg_user_code.items() if key in elastic_demand_cols]
        if all(value is not None for value in elastic_demand_values):
            elastic_demand = ElasticDemand(
                price_elasticity=Elasticity(level=arg_user_code[DemandNames.price_elasticity_col]),
                min_price=Price(level=arg_user_code[DemandNames.min_price_col]),
                normal_price=Price(level=arg_user_code[DemandNames.normal_price_col]),
                max_price=Price(level=arg_user_code[DemandNames.max_price_col]),
            )
            reserve_price = None
        elif arg_user_code[DemandNames.reserve_price_col] is not None:
            elastic_demand = None
            reserve_price = ReservePrice(level=arg_user_code[DemandNames.reserve_price_col])
        else:
            elastic_demand = None
            reserve_price = None
        demand = Demand(
            node=row[indices[DemandNames.node_col]],
            capacity=MaxFlowVolume(
                level=arg_user_code[DemandNames.capacity_col],
                profile=arg_user_code[DemandNames.capacity_profile_col],
            ),
            reserve_price=reserve_price,
            elastic_demand=elastic_demand,
            temperature_profile=arg_user_code[DemandNames.temperature_profile_col],
        )
        DemandNames._add_meta(demand, row, indices, meta_columns)

        return {row[indices[DemandNames.id_col]]: demand}

    @staticmethod
    def get_attribute_data_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for attribute data in the Demand.Consumers file.

        Returns:
            DemandSchema (pa.DataFrameModel): Pandera DataFrameModel schema for Demand attribute data.

        """
        return DemandSchema

    @staticmethod
    def get_metadata_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for metadata in the Demand.Consumers file.

        Returns:
            DemandMetadataSchema (pa.DataFrameModel): Pandera DataFrameModel schema for Demand metadata.

        """
        return DemandMetadataSchema

    @staticmethod
    def _get_unique_check_descriptions() -> dict[str, tuple[str, bool]]:
        """
        Retrieve a dictionary with descriptons of validation checks that are specific to the Demand schemas.

        Returns:
            dict[str, tuple[str, bool]]: A dictionary where:
                - Keys (str): The name of the validation check method.
                - Values (tuple[str, bool]):
                    - The first element (str) provides a concise and user-friendly description of the check. E.g. what
                      caused the validation error or what is required for the check to pass.
                    - The second element (bool) indicates whether the check is a warning (True) or an error (False).

        """
        return {
            DemandSchema.check_elastic_demand.__name__: ("Missing elastic demand value.", True),
        }

    @staticmethod
    def _format_unique_checks(errors: pd.DataFrame) -> pd.DataFrame:
        """
        Format the error DataFrame according to the validation checks that are specific to the Demand schemas.

        This method processes validation errors that come from a dataframe-level check on elastic demand columns in the
        attribute data schema. The default reporting on failed dataframe-level checks in Pandera's standard error
        reports DataFrame (errors) is not very user-friendly. It can contain uneccassary rows about columns that are not
        relevant to the check and will not include rows about the columns relevant to the check if those columns have
        missing values. This method removes uneccassary rows from the error dataframe and ensures that rows with
        information abot the elastic demand columns that fail the check are included.

        Args:
            errors (pd.DataFrame): DataFrame containing validation errors. Pandera's standard error reports DataFrame.

        Returns:
            pd.DataFrame: The updated error DataFrame with formatted rows for unique validation checks.

        """
        if DemandSchema.check_elastic_demand.__name__ in errors[DemandNames.COL_CHECK].to_numpy():
            check_rows = errors.loc[errors[DemandNames.COL_CHECK] == DemandSchema.check_elastic_demand.__name__]
            errors = errors[~(errors[DemandNames.COL_CHECK] == DemandSchema.check_elastic_demand.__name__)]
            elastic_demand_columns = [
                DemandNames.price_elasticity_col,
                DemandNames.min_price_col,
                DemandNames.max_price_col,
                DemandNames.normal_price_col,
            ]
            check_description_str = check_rows[DemandNames.COL_CHECK_DESC].unique()[0]
            elastic_demand_rows = []
            for idx in check_rows[DemandNames.COL_IDX].unique():
                check_case = check_rows[check_rows[DemandNames.COL_IDX] == idx]
                for col in elastic_demand_columns:
                    if col not in list(check_case[DemandNames.COL_COLUMN].unique()):
                        elastic_demand_rows.append(
                            [
                                col,
                                DemandSchema.check_elastic_demand.__name__,
                                None,
                                idx,
                                check_description_str,
                                True,
                            ],
                        )
            errors = pd.concat([errors, pd.DataFrame(elastic_demand_rows, columns=errors.columns)], ignore_index=True)
        return errors
create_component(row: NDArray, indices: dict[str, int], meta_columns: set[str], meta_data: pd.DataFrame, attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None) -> dict[str, Demand] staticmethod

Create a Demand component from a table row in the Demand.Consumers file.

Parameters:

Name Type Description Default
row NDArray

Array containing the values of one table row, represeting one Demand object.

required
indices list[str, int]

Mapping of table's Column names to the array's indices.

required
meta_columns list[str]

Set of columns which defines memberships in meta groups for aggregation.

required
meta_data DataFrame

Dictionary containing at least unit of every column.

required
attribute_objects dict[str, tuple[object, dict[str, Meta]]]

NOT USED

None

Returns:

Type Description
dict[str, Demand]

dict[str, Demand]: A dictionary with the consumer_id as key and the demand component as value.

Source code in framdata/database_names/DemandNames.py
@staticmethod
def create_component(
    row: NDArray,
    indices: dict[str, int],
    meta_columns: set[str],
    meta_data: pd.DataFrame,
    attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
) -> dict[str, Demand]:
    """
    Create a Demand component from a table row in the Demand.Consumers file.

    Args:
        row (NDArray): Array containing the values of one table row, represeting one Demand object.
        indices (list[str, int]): Mapping of table's Column names to the array's indices.
        meta_columns (list[str]): Set of columns which defines memberships in meta groups for aggregation.
        meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
        attribute_objects (dict[str, tuple[object, dict[str, Meta]]], optional): NOT USED

    Returns:
        dict[str, Demand]: A dictionary with the consumer_id as key and the demand component as value.

    """
    elastic_demand_cols = [
        DemandNames.price_elasticity_col,
        DemandNames.min_price_col,
        DemandNames.max_price_col,
        DemandNames.normal_price_col,
    ]
    columns_to_parse = [
        DemandNames.reserve_price_col,
        DemandNames.capacity_profile_col,
        DemandNames.temperature_profile_col,
        DemandNames.capacity_col,
    ]
    columns_to_parse.extend(elastic_demand_cols)

    arg_user_code = DemandNames._parse_args(row, indices, columns_to_parse, meta_data)

    elastic_demand_values = [value for key, value in arg_user_code.items() if key in elastic_demand_cols]
    if all(value is not None for value in elastic_demand_values):
        elastic_demand = ElasticDemand(
            price_elasticity=Elasticity(level=arg_user_code[DemandNames.price_elasticity_col]),
            min_price=Price(level=arg_user_code[DemandNames.min_price_col]),
            normal_price=Price(level=arg_user_code[DemandNames.normal_price_col]),
            max_price=Price(level=arg_user_code[DemandNames.max_price_col]),
        )
        reserve_price = None
    elif arg_user_code[DemandNames.reserve_price_col] is not None:
        elastic_demand = None
        reserve_price = ReservePrice(level=arg_user_code[DemandNames.reserve_price_col])
    else:
        elastic_demand = None
        reserve_price = None
    demand = Demand(
        node=row[indices[DemandNames.node_col]],
        capacity=MaxFlowVolume(
            level=arg_user_code[DemandNames.capacity_col],
            profile=arg_user_code[DemandNames.capacity_profile_col],
        ),
        reserve_price=reserve_price,
        elastic_demand=elastic_demand,
        temperature_profile=arg_user_code[DemandNames.temperature_profile_col],
    )
    DemandNames._add_meta(demand, row, indices, meta_columns)

    return {row[indices[DemandNames.id_col]]: demand}
get_attribute_data_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for attribute data in the Demand.Consumers file.

Returns:

Name Type Description
DemandSchema DataFrameModel

Pandera DataFrameModel schema for Demand attribute data.

Source code in framdata/database_names/DemandNames.py
@staticmethod
def get_attribute_data_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for attribute data in the Demand.Consumers file.

    Returns:
        DemandSchema (pa.DataFrameModel): Pandera DataFrameModel schema for Demand attribute data.

    """
    return DemandSchema
get_metadata_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for metadata in the Demand.Consumers file.

Returns:

Name Type Description
DemandMetadataSchema DataFrameModel

Pandera DataFrameModel schema for Demand metadata.

Source code in framdata/database_names/DemandNames.py
@staticmethod
def get_metadata_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for metadata in the Demand.Consumers file.

    Returns:
        DemandMetadataSchema (pa.DataFrameModel): Pandera DataFrameModel schema for Demand metadata.

    """
    return DemandMetadataSchema
DemandSchema

Bases: DataFrameModel

Pandera DataFrameModel schema for attribute data in the Demand.Consumers file.

Source code in framdata/database_names/DemandNames.py
class DemandSchema(pa.DataFrameModel):
    """Pandera DataFrameModel schema for attribute data in the Demand.Consumers file."""

    ConsumerID: Series[str] = pa.Field(unique=True, nullable=False)
    PowerNode: Series[str] = pa.Field(nullable=False)
    ReservePrice: Series[Any] = pa.Field(nullable=True)
    PriceElasticity: Series[Any] = pa.Field(nullable=True)
    MinPriceLimit: Series[Any] = pa.Field(nullable=True)
    MaxPriceLimit: Series[Any] = pa.Field(nullable=True)
    NormalPrice: Series[Any] = pa.Field(nullable=True)
    CapacityProfile: Series[Any] = pa.Field(nullable=True)
    TemperatureProfile: Series[Any] = pa.Field(nullable=True)
    Capacity: Series[Any] = pa.Field(nullable=False)

    @pa.check(DemandNames.capacity_col)
    @classmethod
    def dtype_str_int_float(cls, series: Series[Any]) -> Series[bool]:
        """Check if values in the series are of datatype: str, int or float."""
        return dtype_str_int_float(series)

    @pa.check(
        DemandNames.reserve_price_col,
        DemandNames.price_elasticity_col,
        DemandNames.min_price_col,
        DemandNames.max_price_col,
        DemandNames.normal_price_col,
        DemandNames.capacity_profile_col,
        DemandNames.temperature_profile_col,
    )
    @classmethod
    def dtype_str_int_float_none(cls, series: Series[Any]) -> Series[bool]:
        """Check if values in the series are of datatype: str, int, float or None."""
        return dtype_str_int_float_none(series)

    @pa.check(DemandNames.price_elasticity_col)
    @classmethod
    def numeric_values_less_than_or_equal_to_0(cls, series: Series[Any]) -> Series[bool]:
        """Check if numeric values in the series are less than or equal to zero."""
        return numeric_values_less_than_or_equal_to(series, 0)

    @pa.check(
        DemandNames.reserve_price_col,
        DemandNames.min_price_col,
        DemandNames.max_price_col,
        DemandNames.normal_price_col,
        DemandNames.capacity_col,
    )
    @classmethod
    def numeric_values_greater_than_or_equal_to_0(cls, series: Series[Any]) -> Series[bool]:
        """Check if numeric values in the series are greater than or equal to zero."""
        return numeric_values_greater_than_or_equal_to(series, 0)

    @pa.check(DemandNames.capacity_profile_col)
    @classmethod
    def numeric_values_are_between_or_equal_to_0_and_1(cls, series: Series[Any]) -> Series[bool]:
        """Check if numeric values in the series are between zero and one or equal to zero and one."""
        return numeric_values_are_between_or_equal_to(series, 0, 1)

    @pa.dataframe_check
    @classmethod
    def check_elastic_demand(cls, df: DataFrame) -> Series[bool]:
        """Check that all elastic demand values are present if one or more is."""
        elastic_demand = df[
            [
                DemandNames.price_elasticity_col,
                DemandNames.min_price_col,
                DemandNames.max_price_col,
                DemandNames.normal_price_col,
            ]
        ]

        check = elastic_demand.apply(
            lambda row: all(value is not None for value in row) if any(value is not None for value in row) else True,
            axis=1,
        ).tolist()
        return pd.Series(check)

    class Config:
        """Schema-wide configuration for the DemandSchema class."""

        unique_column_names = True
Config

Schema-wide configuration for the DemandSchema class.

Source code in framdata/database_names/DemandNames.py
class Config:
    """Schema-wide configuration for the DemandSchema class."""

    unique_column_names = True
check_elastic_demand(df: DataFrame) -> Series[bool] classmethod

Check that all elastic demand values are present if one or more is.

Source code in framdata/database_names/DemandNames.py
@pa.dataframe_check
@classmethod
def check_elastic_demand(cls, df: DataFrame) -> Series[bool]:
    """Check that all elastic demand values are present if one or more is."""
    elastic_demand = df[
        [
            DemandNames.price_elasticity_col,
            DemandNames.min_price_col,
            DemandNames.max_price_col,
            DemandNames.normal_price_col,
        ]
    ]

    check = elastic_demand.apply(
        lambda row: all(value is not None for value in row) if any(value is not None for value in row) else True,
        axis=1,
    ).tolist()
    return pd.Series(check)
dtype_str_int_float(series: Series[Any]) -> Series[bool] classmethod

Check if values in the series are of datatype: str, int or float.

Source code in framdata/database_names/DemandNames.py
@pa.check(DemandNames.capacity_col)
@classmethod
def dtype_str_int_float(cls, series: Series[Any]) -> Series[bool]:
    """Check if values in the series are of datatype: str, int or float."""
    return dtype_str_int_float(series)
dtype_str_int_float_none(series: Series[Any]) -> Series[bool] classmethod

Check if values in the series are of datatype: str, int, float or None.

Source code in framdata/database_names/DemandNames.py
@pa.check(
    DemandNames.reserve_price_col,
    DemandNames.price_elasticity_col,
    DemandNames.min_price_col,
    DemandNames.max_price_col,
    DemandNames.normal_price_col,
    DemandNames.capacity_profile_col,
    DemandNames.temperature_profile_col,
)
@classmethod
def dtype_str_int_float_none(cls, series: Series[Any]) -> Series[bool]:
    """Check if values in the series are of datatype: str, int, float or None."""
    return dtype_str_int_float_none(series)
numeric_values_are_between_or_equal_to_0_and_1(series: Series[Any]) -> Series[bool] classmethod

Check if numeric values in the series are between zero and one or equal to zero and one.

Source code in framdata/database_names/DemandNames.py
@pa.check(DemandNames.capacity_profile_col)
@classmethod
def numeric_values_are_between_or_equal_to_0_and_1(cls, series: Series[Any]) -> Series[bool]:
    """Check if numeric values in the series are between zero and one or equal to zero and one."""
    return numeric_values_are_between_or_equal_to(series, 0, 1)
numeric_values_greater_than_or_equal_to_0(series: Series[Any]) -> Series[bool] classmethod

Check if numeric values in the series are greater than or equal to zero.

Source code in framdata/database_names/DemandNames.py
@pa.check(
    DemandNames.reserve_price_col,
    DemandNames.min_price_col,
    DemandNames.max_price_col,
    DemandNames.normal_price_col,
    DemandNames.capacity_col,
)
@classmethod
def numeric_values_greater_than_or_equal_to_0(cls, series: Series[Any]) -> Series[bool]:
    """Check if numeric values in the series are greater than or equal to zero."""
    return numeric_values_greater_than_or_equal_to(series, 0)
numeric_values_less_than_or_equal_to_0(series: Series[Any]) -> Series[bool] classmethod

Check if numeric values in the series are less than or equal to zero.

Source code in framdata/database_names/DemandNames.py
@pa.check(DemandNames.price_elasticity_col)
@classmethod
def numeric_values_less_than_or_equal_to_0(cls, series: Series[Any]) -> Series[bool]:
    """Check if numeric values in the series are less than or equal to zero."""
    return numeric_values_less_than_or_equal_to(series, 0)

H5Names

Define names and fields used in H5 files.

H5Names

Container class for names used in H5 files.

Source code in framdata/database_names/H5Names.py
class H5Names:
    """Container class for names used in H5 files."""

    INDEX_GROUP = "index"
    METADATA_GROUP = "metadata"
    VECTORS_GROUP = "vectors"
    COMMON_PREFIX = "common_"

HydroBypassNames

Contain the BypassNames class and related Pandera schemas for handling hydropower bypass data.

Includes attribute and metadata schemas.

HydroBypassMetadataSchema

Bases: _AttributeMetadataSchema

Pandera DataFrameModel schema for metadata in the Hydropower.Bypass file.

Source code in framdata/database_names/HydroBypassNames.py
class HydroBypassMetadataSchema(_AttributeMetadataSchema):
    """Pandera DataFrameModel schema for metadata in the Hydropower.Bypass file."""

    pass
HydroBypassNames

Bases: _BaseComponentsNames

Define naming conventions and attribute object creation for HydroBypass object, which is an attribute of the HydroModule.

Provides methods for creating generator components, retrieving Pandera schemas for attribute and metadata tables, and formatting validation errors specific to generator schemas.

Source code in framdata/database_names/HydroBypassNames.py
class HydroBypassNames(_BaseComponentsNames):
    """
    Define naming conventions and attribute object creation for HydroBypass object, which is an attribute of the HydroModule.

    Provides methods for creating generator components, retrieving Pandera schemas for attribute and metadata tables,
    and formatting validation errors specific to generator schemas.

    """

    id_col = "BypassID"
    to_col = "BypassTo"
    cap_col = "Capacity"
    min_bnd_col = "MinOperationalBypass"
    min_penalty_col = "MinViolationPenalty"

    columns: ClassVar[list[str]] = [id_col, to_col, cap_col, min_bnd_col, min_penalty_col]

    ref_columns: ClassVar[list[str]] = [to_col, cap_col, min_bnd_col, min_penalty_col]

    @staticmethod
    def create_component(
        row: NDArray,
        indices: dict[str, int],
        meta_columns: set[str],
        meta_data: pd.DataFrame,
        attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
    ) -> dict[str, HydroBypass]:
        """
        Create a HydroBypass object.

        Args:
            row (NDArray): Array containing the values of one table row, represeting one HydroModule object.
            indices (list[str, int]): Mapping of table's Column names to the array's indices.
            meta_columns (set[str]): Set of columns used to tag object with memberships.
            meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
            attribute_objects (dict[str, tuple[object, dict[str, Meta]]], optional): NOT USED, currently only used in HydroModulesNames.

        Returns:
            dict[str, HydroBypass]: A dictionary with the bypass ID as key and the module unit as value.

        """
        columns_to_parse = [
            HydroBypassNames.id_col,
            HydroBypassNames.to_col,
            HydroBypassNames.cap_col,
            HydroBypassNames.min_bnd_col,
            HydroBypassNames.min_penalty_col,
        ]

        arg_user_code = HydroBypassNames._parse_args(row, indices, columns_to_parse, meta_data)

        bypass = HydroBypass(
            to_module=row[indices[HydroBypassNames.to_col]],
            # capacity=SoftFlowCapacity(
            #     level_input=arg_user_code[BypassNames.cap_col],
            #     min_profile_input=arg_user_code[BypassNames.min_bnd_col],
            #     min_penalty=arg_user_code[BypassNames.min_penalty_col],
            # ),
            capacity=MaxFlowVolume(level=arg_user_code[HydroBypassNames.cap_col]),
        )

        meta = {}
        HydroBypassNames._add_meta(meta, row, indices, meta_columns)

        return {row[indices[HydroBypassNames.id_col]]: (bypass, meta)}

    @staticmethod
    def get_attribute_data_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for attribute data in the Hydropower.Bypass file.

        Returns:
            pa.DataFrameModel: Pandera DataFrameModel schema for the Bypass attribute data.

        """
        return HydroBypassSchema

    @staticmethod
    def get_metadata_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for the metadata table in the Hydropower.Bypass file.

        Returns:
            pa.DataFrameModel: Pandera DataFrameModel schema for the Bypass metadata.

        """
        return HydroBypassMetadataSchema

    @staticmethod
    def _get_unique_check_descriptions() -> dict[str, tuple[str, bool]]:
        """
        Retrieve a dictionary with descriptons of validation checks that are specific to the Bypass schemas.

        Returns:
            dict[str, tuple[str, bool]]: A dictionary where:
                - Keys (str): The name of the validation check method.
                - Values (tuple[str, bool]):
                    - The first element (str) provides a concise and user-friendly description of the check. E.g. what
                      caused the validation error or what is required for the check to pass.
                    - The second element (bool) indicates whether the check is a warning (True) or an error (False).


        """
        return None

    @staticmethod
    def _format_unique_checks(errors: pd.DataFrame) -> pd.DataFrame:
        """
        Format the error DataFrame according to the validation checks that are specific to the Bypass schemas.

        Args:
            errors (pd.DataFrame): The error DataFrame containing validation errors.

        Returns:
            pd.DataFrame: The updated error DataFrame with formatted rows for unique validation checks.

        """
        return None
create_component(row: NDArray, indices: dict[str, int], meta_columns: set[str], meta_data: pd.DataFrame, attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None) -> dict[str, HydroBypass] staticmethod

Create a HydroBypass object.

Parameters:

Name Type Description Default
row NDArray

Array containing the values of one table row, represeting one HydroModule object.

required
indices list[str, int]

Mapping of table's Column names to the array's indices.

required
meta_columns set[str]

Set of columns used to tag object with memberships.

required
meta_data DataFrame

Dictionary containing at least unit of every column.

required
attribute_objects dict[str, tuple[object, dict[str, Meta]]]

NOT USED, currently only used in HydroModulesNames.

None

Returns:

Type Description
dict[str, HydroBypass]

dict[str, HydroBypass]: A dictionary with the bypass ID as key and the module unit as value.

Source code in framdata/database_names/HydroBypassNames.py
@staticmethod
def create_component(
    row: NDArray,
    indices: dict[str, int],
    meta_columns: set[str],
    meta_data: pd.DataFrame,
    attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
) -> dict[str, HydroBypass]:
    """
    Create a HydroBypass object.

    Args:
        row (NDArray): Array containing the values of one table row, represeting one HydroModule object.
        indices (list[str, int]): Mapping of table's Column names to the array's indices.
        meta_columns (set[str]): Set of columns used to tag object with memberships.
        meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
        attribute_objects (dict[str, tuple[object, dict[str, Meta]]], optional): NOT USED, currently only used in HydroModulesNames.

    Returns:
        dict[str, HydroBypass]: A dictionary with the bypass ID as key and the module unit as value.

    """
    columns_to_parse = [
        HydroBypassNames.id_col,
        HydroBypassNames.to_col,
        HydroBypassNames.cap_col,
        HydroBypassNames.min_bnd_col,
        HydroBypassNames.min_penalty_col,
    ]

    arg_user_code = HydroBypassNames._parse_args(row, indices, columns_to_parse, meta_data)

    bypass = HydroBypass(
        to_module=row[indices[HydroBypassNames.to_col]],
        # capacity=SoftFlowCapacity(
        #     level_input=arg_user_code[BypassNames.cap_col],
        #     min_profile_input=arg_user_code[BypassNames.min_bnd_col],
        #     min_penalty=arg_user_code[BypassNames.min_penalty_col],
        # ),
        capacity=MaxFlowVolume(level=arg_user_code[HydroBypassNames.cap_col]),
    )

    meta = {}
    HydroBypassNames._add_meta(meta, row, indices, meta_columns)

    return {row[indices[HydroBypassNames.id_col]]: (bypass, meta)}
get_attribute_data_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for attribute data in the Hydropower.Bypass file.

Returns:

Type Description
DataFrameModel

pa.DataFrameModel: Pandera DataFrameModel schema for the Bypass attribute data.

Source code in framdata/database_names/HydroBypassNames.py
@staticmethod
def get_attribute_data_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for attribute data in the Hydropower.Bypass file.

    Returns:
        pa.DataFrameModel: Pandera DataFrameModel schema for the Bypass attribute data.

    """
    return HydroBypassSchema
get_metadata_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for the metadata table in the Hydropower.Bypass file.

Returns:

Type Description
DataFrameModel

pa.DataFrameModel: Pandera DataFrameModel schema for the Bypass metadata.

Source code in framdata/database_names/HydroBypassNames.py
@staticmethod
def get_metadata_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for the metadata table in the Hydropower.Bypass file.

    Returns:
        pa.DataFrameModel: Pandera DataFrameModel schema for the Bypass metadata.

    """
    return HydroBypassMetadataSchema
HydroBypassSchema

Bases: DataFrameModel

Pandera DataFrameModel schema for attribute data in the Hydropower.Bypass file.

Source code in framdata/database_names/HydroBypassNames.py
class HydroBypassSchema(pa.DataFrameModel):
    """Pandera DataFrameModel schema for attribute data in the Hydropower.Bypass file."""

    pass

HydroGeneratorNames

Define the GeneratorNames class and related Pandera schemas for hydropower generator data.

Provides: - GeneratorNames: class for handling generator component names and schema validation. - GeneratorSchema: Pandera schema for generator attribute data. - GeneratorMetadataSchema: Pandera schema for generator metadata.

GeneratorMetadataSchema

Bases: _AttributeMetadataSchema

Pandera DataFrameModel schema for metadata in the Hydropower.Generators file.

Source code in framdata/database_names/HydroGeneratorNames.py
class GeneratorMetadataSchema(_AttributeMetadataSchema):
    """Pandera DataFrameModel schema for metadata in the Hydropower.Generators file."""

    pass
GeneratorSchema

Bases: DataFrameModel

Pandera DataFrameModel schema for attribute data in the Hydropower.Generators file.

Source code in framdata/database_names/HydroGeneratorNames.py
class GeneratorSchema(pa.DataFrameModel):
    """Pandera DataFrameModel schema for attribute data in the Hydropower.Generators file."""

    pass
HydroGeneratorNames

Bases: _BaseComponentsNames

Handles generator component names and schema validation for hydropower generator data.

Provides methods for creating generator components, retrieving Pandera schemas for attribute and metadata tables, and formatting validation errors specific to generator schemas.

Source code in framdata/database_names/HydroGeneratorNames.py
class HydroGeneratorNames(_BaseComponentsNames):
    """
    Handles generator component names and schema validation for hydropower generator data.

    Provides methods for creating generator components, retrieving Pandera schemas for attribute and metadata tables,
    and formatting validation errors specific to generator schemas.
    """

    id_col = "GeneratorID"
    node_col = "PowerNode"
    pq_curve_col = "PQCurve"
    tailw_elev_col = "TailwaterElevation"
    head_nom_col = "NominalHead"
    en_eq_col = "EnergyEq"

    columns: ClassVar[list[str]] = [
        id_col,
        node_col,
        pq_curve_col,
        tailw_elev_col,
        head_nom_col,
        en_eq_col,
    ]

    ref_columns: ClassVar[list[str]] = [
        node_col,
        pq_curve_col,
        tailw_elev_col,
        head_nom_col,
        en_eq_col,
    ]

    @staticmethod
    def create_component(
        row: NDArray,
        indices: dict[str, int],
        meta_columns: set[str],
        meta_data: pd.DataFrame,
        attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
    ) -> dict[str, tuple[HydroGenerator, dict[str, Meta]]]:
        """
        Create a hydro generator attribute object.

        Args:
            row (NDArray): Array containing the values of one table row, represeting one HydroModule object.
            indices (list[str, int]): Mapping of table's Column names to the array's indices.
            meta_columns (set[str]): Set of columns used to tag object with memberships.
            meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
            attribute_objects (dict[str, tuple[object, dict[str, Meta]]], optional): NOT USED, currently only used in HydroModulesNames.

        Returns:
            dict[str, dict[str, Meta]]: A dictionary with the generator ID as key and the attribute object and metadata as value.

        """
        columns_to_parse = [
            HydroGeneratorNames.pq_curve_col,
            HydroGeneratorNames.tailw_elev_col,
            HydroGeneratorNames.head_nom_col,
            HydroGeneratorNames.en_eq_col,
        ]

        arg_user_code = HydroGeneratorNames._parse_args(row, indices, columns_to_parse, meta_data)

        generator = HydroGenerator(
            power_node=row[indices[HydroGeneratorNames.node_col]],
            energy_equivalent=Conversion(level=arg_user_code[HydroGeneratorNames.en_eq_col]),
            pq_curve=arg_user_code[HydroGeneratorNames.pq_curve_col],
            tailwater_elevation=arg_user_code[HydroGeneratorNames.tailw_elev_col],
            nominal_head=arg_user_code[HydroGeneratorNames.head_nom_col],
        )

        meta = {}
        HydroGeneratorNames._add_meta(meta, row, indices, meta_columns)

        return {row[indices[HydroGeneratorNames.id_col]]: (generator, meta)}

    @staticmethod
    def get_attribute_data_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for attribute data in the Hydropower.Generators file.

        Returns:
            pa.DataFrameModel: Pandera DataFrameModel schema for the Generator attribute data.

        """
        return GeneratorSchema

    @staticmethod
    def get_metadata_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for the metadata table in the Hydropower.Generators file.

        Returns:
            pa.DataFrameModel: Pandera DataFrameModel schema for the Generator metadata.

        """
        return GeneratorMetadataSchema

    @staticmethod
    def _get_unique_check_descriptions() -> dict[str, tuple[str, bool]]:
        """
        Retrieve a dictionary with descriptons of validation checks that are specific to the Generator schemas.

        Returns:
            dict[str, tuple[str, bool]]: A dictionary where:
                - Keys (str): The name of the validation check method.
                - Values (tuple[str, bool]):
                    - The first element (str) provides a concise and user-friendly description of the check. E.g. what
                      caused the validation error or what is required for the check to pass.
                    - The second element (bool) indicates whether the check is a warning (True) or an error (False).


        """
        return None

    @staticmethod
    def _format_unique_checks(errors: pd.DataFrame) -> pd.DataFrame:
        """
        Format the error DataFrame according to the validation checks that are specific to the Generator schemas.

        Args:
            errors (pd.DataFrame): The error DataFrame containing validation errors.

        Returns:
            pd.DataFrame: The updated error DataFrame with formatted rows for unique validation checks.

        """
        return None
create_component(row: NDArray, indices: dict[str, int], meta_columns: set[str], meta_data: pd.DataFrame, attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None) -> dict[str, tuple[HydroGenerator, dict[str, Meta]]] staticmethod

Create a hydro generator attribute object.

Parameters:

Name Type Description Default
row NDArray

Array containing the values of one table row, represeting one HydroModule object.

required
indices list[str, int]

Mapping of table's Column names to the array's indices.

required
meta_columns set[str]

Set of columns used to tag object with memberships.

required
meta_data DataFrame

Dictionary containing at least unit of every column.

required
attribute_objects dict[str, tuple[object, dict[str, Meta]]]

NOT USED, currently only used in HydroModulesNames.

None

Returns:

Type Description
dict[str, tuple[HydroGenerator, dict[str, Meta]]]

dict[str, dict[str, Meta]]: A dictionary with the generator ID as key and the attribute object and metadata as value.

Source code in framdata/database_names/HydroGeneratorNames.py
@staticmethod
def create_component(
    row: NDArray,
    indices: dict[str, int],
    meta_columns: set[str],
    meta_data: pd.DataFrame,
    attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
) -> dict[str, tuple[HydroGenerator, dict[str, Meta]]]:
    """
    Create a hydro generator attribute object.

    Args:
        row (NDArray): Array containing the values of one table row, represeting one HydroModule object.
        indices (list[str, int]): Mapping of table's Column names to the array's indices.
        meta_columns (set[str]): Set of columns used to tag object with memberships.
        meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
        attribute_objects (dict[str, tuple[object, dict[str, Meta]]], optional): NOT USED, currently only used in HydroModulesNames.

    Returns:
        dict[str, dict[str, Meta]]: A dictionary with the generator ID as key and the attribute object and metadata as value.

    """
    columns_to_parse = [
        HydroGeneratorNames.pq_curve_col,
        HydroGeneratorNames.tailw_elev_col,
        HydroGeneratorNames.head_nom_col,
        HydroGeneratorNames.en_eq_col,
    ]

    arg_user_code = HydroGeneratorNames._parse_args(row, indices, columns_to_parse, meta_data)

    generator = HydroGenerator(
        power_node=row[indices[HydroGeneratorNames.node_col]],
        energy_equivalent=Conversion(level=arg_user_code[HydroGeneratorNames.en_eq_col]),
        pq_curve=arg_user_code[HydroGeneratorNames.pq_curve_col],
        tailwater_elevation=arg_user_code[HydroGeneratorNames.tailw_elev_col],
        nominal_head=arg_user_code[HydroGeneratorNames.head_nom_col],
    )

    meta = {}
    HydroGeneratorNames._add_meta(meta, row, indices, meta_columns)

    return {row[indices[HydroGeneratorNames.id_col]]: (generator, meta)}
get_attribute_data_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for attribute data in the Hydropower.Generators file.

Returns:

Type Description
DataFrameModel

pa.DataFrameModel: Pandera DataFrameModel schema for the Generator attribute data.

Source code in framdata/database_names/HydroGeneratorNames.py
@staticmethod
def get_attribute_data_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for attribute data in the Hydropower.Generators file.

    Returns:
        pa.DataFrameModel: Pandera DataFrameModel schema for the Generator attribute data.

    """
    return GeneratorSchema
get_metadata_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for the metadata table in the Hydropower.Generators file.

Returns:

Type Description
DataFrameModel

pa.DataFrameModel: Pandera DataFrameModel schema for the Generator metadata.

Source code in framdata/database_names/HydroGeneratorNames.py
@staticmethod
def get_metadata_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for the metadata table in the Hydropower.Generators file.

    Returns:
        pa.DataFrameModel: Pandera DataFrameModel schema for the Generator metadata.

    """
    return GeneratorMetadataSchema

HydroInflowNames

Define the InflowNames class and related Pandera schemas for handling hydropower inflow data.

Includes attribute and metadata schemas.

HydroInflowNames

Bases: _BaseComponentsNames

Convert hydropower inflow data to attribute objects for HydroModules. Handle attribute and metadata schema validation.

Source code in framdata/database_names/HydroInflowNames.py
class HydroInflowNames(_BaseComponentsNames):
    """Convert hydropower inflow data to attribute objects for HydroModules. Handle attribute and metadata schema validation."""

    id_col = "InflowID"
    yr_vol_col = "YearlyVolume"
    profile_col = "InflowProfileID"

    columns: ClassVar[list[str]] = [
        id_col,
        yr_vol_col,
        profile_col,
    ]

    ref_columns: ClassVar[list[str]] = [
        yr_vol_col,
        profile_col,
    ]

    @staticmethod
    def create_component(
        row: NDArray,
        indices: dict[str, int],
        meta_columns: set[str],
        meta_data: pd.DataFrame,
        attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
    ) -> dict[str, AvgFlowVolume]:
        """
        Create a hydro inflow component.

        Args:
            row (NDArray): Array containing the values of one table row, represeting one HydroModule object.
            indices (list[str, int]): Mapping of table's Column names to the array's indices.
            meta_columns (set[str]): Set of columns used to tag object with memberships.
            meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
            attribute_objects (dict[str, tuple[object, dict[str, Meta]]], optional): NOT USED, currently only used in HydroModulesNames.

        Returns:
            dict[str, Component]: A dictionary with the inflow ID as key and the module unit as value.

        """
        if HydroInflowNames._ref_period_lacks_profiles(row, indices, [HydroInflowNames.profile_col], meta_data):
            return {row[indices[HydroInflowNames.id_col]]: None}
        columns_to_parse = [
            HydroInflowNames.yr_vol_col,
            HydroInflowNames.profile_col,
        ]

        arg_user_code = HydroInflowNames._parse_args(row, indices, columns_to_parse, meta_data)

        inflow = AvgFlowVolume(
            level=arg_user_code[HydroInflowNames.yr_vol_col],
            profile=arg_user_code[HydroInflowNames.profile_col],
        )

        meta = {}
        HydroInflowNames._add_meta(meta, row, indices, meta_columns)

        return {row[indices[HydroInflowNames.id_col]]: (inflow, meta)}

    @staticmethod
    def get_attribute_data_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for attribute data in the Hydropower.Inflow file.

        Returns:
            pa.DataFrameModel: Pandera DataFrameModel schema for the Inflow attribute data.

        """
        return InflowSchema

    @staticmethod
    def get_metadata_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for the metadata table in the Hydropower.Inflow file.

        Returns:
            pa.DataFrameModel: Pandera DataFrameModel schema for the Inflow metadata.

        """
        return InflowMetadataSchema

    @staticmethod
    def _get_unique_check_descriptions() -> dict[str, tuple[str, bool]]:
        """
        Retrieve a dictionary with descriptons of validation checks that are specific to the Inflow schemas.

        Returns:
            dict[str, tuple[str, bool]]: A dictionary where:
                - Keys (str): The name of the validation check method.
                - Values (tuple[str, bool]):
                    - The first element (str) provides a concise and user-friendly description of the check. E.g. what
                      caused the validation error or what is required for the check to pass.
                    - The second element (bool) indicates whether the check is a warning (True) or an error (False).


        """
        return None

    @staticmethod
    def _format_unique_checks(errors: pd.DataFrame) -> pd.DataFrame:
        """
        Format the error DataFrame according to the validation checks that are specific to the Inflow schemas.

        Args:
            errors (pd.DataFrame): The error DataFrame containing validation errors.

        Returns:
            pd.DataFrame: The updated error DataFrame with formatted rows for unique validation checks.

        """
        return None
create_component(row: NDArray, indices: dict[str, int], meta_columns: set[str], meta_data: pd.DataFrame, attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None) -> dict[str, AvgFlowVolume] staticmethod

Create a hydro inflow component.

Parameters:

Name Type Description Default
row NDArray

Array containing the values of one table row, represeting one HydroModule object.

required
indices list[str, int]

Mapping of table's Column names to the array's indices.

required
meta_columns set[str]

Set of columns used to tag object with memberships.

required
meta_data DataFrame

Dictionary containing at least unit of every column.

required
attribute_objects dict[str, tuple[object, dict[str, Meta]]]

NOT USED, currently only used in HydroModulesNames.

None

Returns:

Type Description
dict[str, AvgFlowVolume]

dict[str, Component]: A dictionary with the inflow ID as key and the module unit as value.

Source code in framdata/database_names/HydroInflowNames.py
@staticmethod
def create_component(
    row: NDArray,
    indices: dict[str, int],
    meta_columns: set[str],
    meta_data: pd.DataFrame,
    attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
) -> dict[str, AvgFlowVolume]:
    """
    Create a hydro inflow component.

    Args:
        row (NDArray): Array containing the values of one table row, represeting one HydroModule object.
        indices (list[str, int]): Mapping of table's Column names to the array's indices.
        meta_columns (set[str]): Set of columns used to tag object with memberships.
        meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
        attribute_objects (dict[str, tuple[object, dict[str, Meta]]], optional): NOT USED, currently only used in HydroModulesNames.

    Returns:
        dict[str, Component]: A dictionary with the inflow ID as key and the module unit as value.

    """
    if HydroInflowNames._ref_period_lacks_profiles(row, indices, [HydroInflowNames.profile_col], meta_data):
        return {row[indices[HydroInflowNames.id_col]]: None}
    columns_to_parse = [
        HydroInflowNames.yr_vol_col,
        HydroInflowNames.profile_col,
    ]

    arg_user_code = HydroInflowNames._parse_args(row, indices, columns_to_parse, meta_data)

    inflow = AvgFlowVolume(
        level=arg_user_code[HydroInflowNames.yr_vol_col],
        profile=arg_user_code[HydroInflowNames.profile_col],
    )

    meta = {}
    HydroInflowNames._add_meta(meta, row, indices, meta_columns)

    return {row[indices[HydroInflowNames.id_col]]: (inflow, meta)}
get_attribute_data_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for attribute data in the Hydropower.Inflow file.

Returns:

Type Description
DataFrameModel

pa.DataFrameModel: Pandera DataFrameModel schema for the Inflow attribute data.

Source code in framdata/database_names/HydroInflowNames.py
@staticmethod
def get_attribute_data_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for attribute data in the Hydropower.Inflow file.

    Returns:
        pa.DataFrameModel: Pandera DataFrameModel schema for the Inflow attribute data.

    """
    return InflowSchema
get_metadata_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for the metadata table in the Hydropower.Inflow file.

Returns:

Type Description
DataFrameModel

pa.DataFrameModel: Pandera DataFrameModel schema for the Inflow metadata.

Source code in framdata/database_names/HydroInflowNames.py
@staticmethod
def get_metadata_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for the metadata table in the Hydropower.Inflow file.

    Returns:
        pa.DataFrameModel: Pandera DataFrameModel schema for the Inflow metadata.

    """
    return InflowMetadataSchema
InflowMetadataSchema

Bases: _AttributeMetadataSchema

Pandera DataFrameModel schema for metadata in the Hydropower.Inflow file.

Source code in framdata/database_names/HydroInflowNames.py
class InflowMetadataSchema(_AttributeMetadataSchema):
    """Pandera DataFrameModel schema for metadata in the Hydropower.Inflow file."""

    pass
InflowSchema

Bases: DataFrameModel

Pandera DataFrameModel schema for attribute data in the Hydropower.Inflow file.

Source code in framdata/database_names/HydroInflowNames.py
class InflowSchema(pa.DataFrameModel):
    """Pandera DataFrameModel schema for attribute data in the Hydropower.Inflow file."""

    pass

HydroModulesNames

Defines schema, names, and component creation logic for hydropower modules.

This module provides: - HydroModulesNames: class for column names and component creation for hydropower modules. - HydroModuleSchema: Pandera schema for attribute data. - HydroModuleMetadataSchema: Pandera schema for metadata.

HydroModuleMetadataSchema

Bases: _AttributeMetadataSchema

Pandera DataFrameModel schema for metadata in the Hydropower.Modules file.

Source code in framdata/database_names/HydroModulesNames.py
class HydroModuleMetadataSchema(_AttributeMetadataSchema):
    """Pandera DataFrameModel schema for metadata in the Hydropower.Modules file."""

    pass
HydroModuleSchema

Bases: DataFrameModel

Pandera DataFrameModel schema for attribute data in the Hydropower.Modules file.

Source code in framdata/database_names/HydroModulesNames.py
class HydroModuleSchema(pa.DataFrameModel):
    """Pandera DataFrameModel schema for attribute data in the Hydropower.Modules file."""

    pass
HydroModulesNames

Bases: _BaseComponentsNames

Provides column names, schema accessors, and component creation logic for hydropower modules.

This class defines constants for column names, methods for creating HydroModule components from data rows, and accessors for Pandera schemas used for validation of attribute and metadata tables.

Source code in framdata/database_names/HydroModulesNames.py
class HydroModulesNames(_BaseComponentsNames):
    """
    Provides column names, schema accessors, and component creation logic for hydropower modules.

    This class defines constants for column names, methods for creating HydroModule components from data rows,
    and accessors for Pandera schemas used for validation of attribute and metadata tables.
    """

    filename = "Hydropower.Modules"

    id_col = "ModuleID"
    pump_col = "Pump"
    gen_col = "Generator"
    res_col = "Reservoir"
    byp_col = "Bypass"
    hyd_code_col = "HydraulicCoupling"
    inflow_col = "Inflow"
    rel_to_col = "ReleaseTo"
    spill_to_col = "SpillTo"
    rel_cap_col = "CapacityRelease"
    min_bnd_col = "MinOperationalRelease"
    max_bnd_col = "MaxOperationalRelease"
    min_penalty_col = "MinViolationPenalty"
    max_penalty_col = "MaxViolationPenalty"

    columns: ClassVar[list[str]] = [
        id_col,
        pump_col,
        gen_col,
        res_col,
        byp_col,
        hyd_code_col,
        inflow_col,
        rel_to_col,
        spill_to_col,
        rel_cap_col,
        min_bnd_col,
        max_bnd_col,
        min_penalty_col,
        max_penalty_col,
    ]

    ref_columns: ClassVar[list[str]] = [
        rel_to_col,
        spill_to_col,
        rel_cap_col,
        min_bnd_col,
        max_bnd_col,
        min_penalty_col,
        max_penalty_col,
    ]

    @staticmethod
    def create_component(
        row: NDArray,
        indices: dict[str, int],
        meta_columns: set[str],
        meta_data: pd.DataFrame,
        attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
    ) -> dict[str, Component]:
        """
        Create a hydro module component.

        Args:
            row (NDArray): Array containing the values of one table row, represeting one HydroModule object.
            indices (list[str, int]): Mapping of table's Column names to the array's indices.
            meta_columns (set[str]): Set of columns used to tag object with memberships.
            meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
            attribute_objects (dict[str, tuple[object, dict[str, Meta]]], optional): Dictionary of attributes to link to the HydroModule.

        Returns:
            dict[str, Component]: A dictionary with the module_id as key and the module unit as value.

        """
        columns_to_parse = [
            HydroModulesNames.rel_cap_col,
            HydroModulesNames.min_bnd_col,
            HydroModulesNames.max_bnd_col,
            HydroModulesNames.min_penalty_col,
            HydroModulesNames.max_penalty_col,
        ]
        name = row[indices[HydroModulesNames.id_col]]
        inflow_name = indices[HydroModulesNames.inflow_col]
        pump_name = indices[HydroModulesNames.pump_col]
        gen_name = indices[HydroModulesNames.gen_col]
        res_name = indices[HydroModulesNames.res_col]
        byp_name = indices[HydroModulesNames.byp_col]
        arg_user_code = HydroModulesNames._parse_args(row, indices, columns_to_parse, meta_data)
        inflow, inflow_meta = HydroModulesNames._get_attribute_object(
            attribute_objects,
            row[inflow_name],
            name,
            HydroModule,
            AvgFlowVolume,
        )
        pump, pump_meta = HydroModulesNames._get_attribute_object(
            attribute_objects,
            row[pump_name],
            name,
            HydroModule,
            HydroPump,
        )
        generator, generator_meta = HydroModulesNames._get_attribute_object(
            attribute_objects,
            row[gen_name],
            name,
            HydroModule,
            HydroGenerator,
        )
        reservoir, reservoir_meta = HydroModulesNames._get_attribute_object(
            attribute_objects,
            row[res_name],
            name,
            HydroModule,
            HydroReservoir,
        )
        bypass, bypass_meta = HydroModulesNames._get_attribute_object(
            attribute_objects,
            row[byp_name],
            name,
            HydroModule,
            HydroBypass,
        )
        module = HydroModule(
            release_capacity=MaxFlowVolume(level=arg_user_code[HydroModulesNames.rel_cap_col]),
            hydraulic_coupling=row[indices[HydroModulesNames.hyd_code_col]],
            inflow=inflow,
            pump=pump,
            generator=generator,
            reservoir=reservoir,
            bypass=bypass,
            release_to=row[indices[HydroModulesNames.rel_to_col]],
            spill_to=row[indices[HydroModulesNames.spill_to_col]],
        )

        if "EnergyEqDownstream" in meta_columns:
            HydroModulesNames._add_meta(module, row, indices, ["EnergyEqDownstream"], unit="kWh/m3")

        meta_columns = [c for c in meta_columns if c != "EnergyEqDownstream"]
        HydroModulesNames._add_meta(module, row, indices, meta_columns)  # fails because Modules want floats in Meta.

        attr_meta = {
            inflow_name: inflow_meta,
            pump_name: pump_meta,
            gen_name: generator_meta,
            res_name: reservoir_meta,
            byp_name: bypass_meta,
        }
        HydroModulesNames._merge_attribute_meta(
            name,
            module,
            {k: v for k, v in attr_meta.items() if k and v},
        )

        return {name: module}

    @staticmethod
    def get_attribute_data_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for attribute data in the Hydropower.Modules file.

        Returns:
            pa.DataFrameModel: Pandera DataFrameModel schema for the HydroModule attribute data.

        """
        return HydroModuleSchema

    @staticmethod
    def get_metadata_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for the metadata table in the Hydropower.Modules file.

        Returns:
            pa.DataFrameModel: Pandera DataFrameModel schema for the HydroModule metadata.

        """
        return HydroModuleMetadataSchema

    @staticmethod
    def _get_unique_check_descriptions() -> dict[str, tuple[str, bool]]:
        """
        Retrieve a dictionary with descriptons of validation checks that are specific to the HydroModule schemas.

        Returns:
            dict[str, tuple[str, bool]]: A dictionary where:
                - Keys (str): The name of the validation check method.
                - Values (tuple[str, bool]):
                    - The first element (str) provides a concise and user-friendly description of the check. E.g. what
                      caused the validation error or what is required for the check to pass.
                    - The second element (bool) indicates whether the check is a warning (True) or an error (False).


        """
        return None

    @staticmethod
    def _format_unique_checks(errors: pd.DataFrame) -> pd.DataFrame:
        """
        Format the error DataFrame according to the validation checks that are specific to the HydroModule schemas.

        Args:
            errors (pd.DataFrame): The error DataFrame containing validation errors.

        Returns:
            pd.DataFrame: The updated error DataFrame with formatted rows for unique validation checks.

        """
        return None
create_component(row: NDArray, indices: dict[str, int], meta_columns: set[str], meta_data: pd.DataFrame, attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None) -> dict[str, Component] staticmethod

Create a hydro module component.

Parameters:

Name Type Description Default
row NDArray

Array containing the values of one table row, represeting one HydroModule object.

required
indices list[str, int]

Mapping of table's Column names to the array's indices.

required
meta_columns set[str]

Set of columns used to tag object with memberships.

required
meta_data DataFrame

Dictionary containing at least unit of every column.

required
attribute_objects dict[str, tuple[object, dict[str, Meta]]]

Dictionary of attributes to link to the HydroModule.

None

Returns:

Type Description
dict[str, Component]

dict[str, Component]: A dictionary with the module_id as key and the module unit as value.

Source code in framdata/database_names/HydroModulesNames.py
@staticmethod
def create_component(
    row: NDArray,
    indices: dict[str, int],
    meta_columns: set[str],
    meta_data: pd.DataFrame,
    attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
) -> dict[str, Component]:
    """
    Create a hydro module component.

    Args:
        row (NDArray): Array containing the values of one table row, represeting one HydroModule object.
        indices (list[str, int]): Mapping of table's Column names to the array's indices.
        meta_columns (set[str]): Set of columns used to tag object with memberships.
        meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
        attribute_objects (dict[str, tuple[object, dict[str, Meta]]], optional): Dictionary of attributes to link to the HydroModule.

    Returns:
        dict[str, Component]: A dictionary with the module_id as key and the module unit as value.

    """
    columns_to_parse = [
        HydroModulesNames.rel_cap_col,
        HydroModulesNames.min_bnd_col,
        HydroModulesNames.max_bnd_col,
        HydroModulesNames.min_penalty_col,
        HydroModulesNames.max_penalty_col,
    ]
    name = row[indices[HydroModulesNames.id_col]]
    inflow_name = indices[HydroModulesNames.inflow_col]
    pump_name = indices[HydroModulesNames.pump_col]
    gen_name = indices[HydroModulesNames.gen_col]
    res_name = indices[HydroModulesNames.res_col]
    byp_name = indices[HydroModulesNames.byp_col]
    arg_user_code = HydroModulesNames._parse_args(row, indices, columns_to_parse, meta_data)
    inflow, inflow_meta = HydroModulesNames._get_attribute_object(
        attribute_objects,
        row[inflow_name],
        name,
        HydroModule,
        AvgFlowVolume,
    )
    pump, pump_meta = HydroModulesNames._get_attribute_object(
        attribute_objects,
        row[pump_name],
        name,
        HydroModule,
        HydroPump,
    )
    generator, generator_meta = HydroModulesNames._get_attribute_object(
        attribute_objects,
        row[gen_name],
        name,
        HydroModule,
        HydroGenerator,
    )
    reservoir, reservoir_meta = HydroModulesNames._get_attribute_object(
        attribute_objects,
        row[res_name],
        name,
        HydroModule,
        HydroReservoir,
    )
    bypass, bypass_meta = HydroModulesNames._get_attribute_object(
        attribute_objects,
        row[byp_name],
        name,
        HydroModule,
        HydroBypass,
    )
    module = HydroModule(
        release_capacity=MaxFlowVolume(level=arg_user_code[HydroModulesNames.rel_cap_col]),
        hydraulic_coupling=row[indices[HydroModulesNames.hyd_code_col]],
        inflow=inflow,
        pump=pump,
        generator=generator,
        reservoir=reservoir,
        bypass=bypass,
        release_to=row[indices[HydroModulesNames.rel_to_col]],
        spill_to=row[indices[HydroModulesNames.spill_to_col]],
    )

    if "EnergyEqDownstream" in meta_columns:
        HydroModulesNames._add_meta(module, row, indices, ["EnergyEqDownstream"], unit="kWh/m3")

    meta_columns = [c for c in meta_columns if c != "EnergyEqDownstream"]
    HydroModulesNames._add_meta(module, row, indices, meta_columns)  # fails because Modules want floats in Meta.

    attr_meta = {
        inflow_name: inflow_meta,
        pump_name: pump_meta,
        gen_name: generator_meta,
        res_name: reservoir_meta,
        byp_name: bypass_meta,
    }
    HydroModulesNames._merge_attribute_meta(
        name,
        module,
        {k: v for k, v in attr_meta.items() if k and v},
    )

    return {name: module}
get_attribute_data_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for attribute data in the Hydropower.Modules file.

Returns:

Type Description
DataFrameModel

pa.DataFrameModel: Pandera DataFrameModel schema for the HydroModule attribute data.

Source code in framdata/database_names/HydroModulesNames.py
@staticmethod
def get_attribute_data_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for attribute data in the Hydropower.Modules file.

    Returns:
        pa.DataFrameModel: Pandera DataFrameModel schema for the HydroModule attribute data.

    """
    return HydroModuleSchema
get_metadata_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for the metadata table in the Hydropower.Modules file.

Returns:

Type Description
DataFrameModel

pa.DataFrameModel: Pandera DataFrameModel schema for the HydroModule metadata.

Source code in framdata/database_names/HydroModulesNames.py
@staticmethod
def get_metadata_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for the metadata table in the Hydropower.Modules file.

    Returns:
        pa.DataFrameModel: Pandera DataFrameModel schema for the HydroModule metadata.

    """
    return HydroModuleMetadataSchema

HydroPumpNames

Define the PumpNames class and related Pandera schemas for handling hydropower pump data.

Includes attribute and metadata validation for the Hydropower.Pumps file.

HydroPumpNames

Bases: _BaseComponentsNames

Handle naming conventions, schema definitions, and component creation for hydropower pump data.

Source code in framdata/database_names/HydroPumpNames.py
class HydroPumpNames(_BaseComponentsNames):
    """Handle naming conventions, schema definitions, and component creation for hydropower pump data."""

    id_col = "PumpID"
    node_col = "PowerNode"
    pump_from_col = "PumpFrom"
    pump_to_col = "PumpTo"
    power_capacity_col = "PowerCapacity"
    vol_capacity_col = "Capacity"
    energy_equiv_col = "EnergyEq"
    h_min_col = "HeadMin"
    h_max_col = "HeadMax"
    q_min_col = "QMin"
    q_max_col = "QMax"

    columns: ClassVar[list[str]] = [
        id_col,
        node_col,
        pump_from_col,
        pump_to_col,
        power_capacity_col,
        vol_capacity_col,
        energy_equiv_col,
        h_min_col,
        h_max_col,
        q_min_col,
        q_max_col,
    ]

    ref_columns: ClassVar[list[str]] = [
        node_col,
        pump_from_col,
        pump_to_col,
        power_capacity_col,
        vol_capacity_col,
        energy_equiv_col,
        h_min_col,
        h_max_col,
        q_min_col,
        q_max_col,
    ]

    @staticmethod
    def create_component(
        row: NDArray,
        indices: dict[str, int],
        meta_columns: set[str],
        meta_data: pd.DataFrame,
        attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
    ) -> dict[str, HydroPump]:
        """
        Create a HydroPump object from a row in the Hydropower.Pumps table.

        Args:
            row (NDArray): Array containing the values of one table row, represeting one HydroModule object.
            indices (list[str, int]): Mapping of table's Column names to the array's indices.
            meta_columns (set[str]): Set of columns used to tag object with memberships.
            meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
            attribute_objects (dict[str, tuple[object, dict[str, Meta]]], optional): NOT USED, currently only used in HydroModulesNames.

        Returns:
            dict[str, HydroPump]: A dictionary with the pump ID as key and the module unit as value.

        """
        columns_to_parse = [
            HydroPumpNames.power_capacity_col,
            HydroPumpNames.vol_capacity_col,
            HydroPumpNames.energy_equiv_col,
            HydroPumpNames.h_min_col,
            HydroPumpNames.h_max_col,
            HydroPumpNames.q_min_col,
            HydroPumpNames.q_max_col,
        ]

        arg_user_code = HydroPumpNames._parse_args(row, indices, columns_to_parse, meta_data)

        pump = HydroPump(
            power_node=row[indices[HydroPumpNames.node_col]],
            from_module=row[indices[HydroPumpNames.pump_from_col]],
            to_module=row[indices[HydroPumpNames.pump_to_col]],
            water_capacity=MaxFlowVolume(level=arg_user_code[HydroPumpNames.vol_capacity_col]),
            energy_equivalent=Conversion(level=arg_user_code[HydroPumpNames.energy_equiv_col]),
            power_capacity=MaxFlowVolume(level=arg_user_code[HydroPumpNames.power_capacity_col]),
            head_max=arg_user_code[HydroPumpNames.h_max_col],
            head_min=arg_user_code[HydroPumpNames.h_min_col],
            q_max=arg_user_code[HydroPumpNames.q_max_col],
            q_min=arg_user_code[HydroPumpNames.q_min_col],
        )

        meta = {}
        HydroPumpNames._add_meta(meta, row, indices, meta_columns)

        return {row[indices[HydroPumpNames.id_col]]: (pump, meta)}

    @staticmethod
    def get_attribute_data_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for attribute data in the Hydropower.Pumps file.

        Returns:
            pa.DataFrameModel: Pandera DataFrameModel schema for the Pump attribute data.

        """
        return PumpSchema

    @staticmethod
    def get_metadata_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for the metadata table in the Hydropower.Pumps file.

        Returns:
            pa.DataFrameModel: Pandera DataFrameModel schema for the Pump metadata.

        """
        return PumpMetadataSchema

    @staticmethod
    def _get_unique_check_descriptions() -> dict[str, tuple[str, bool]]:
        """
        Retrieve a dictionary with descriptons of validation checks that are specific to the Pump schemas.

        Returns:
            dict[str, tuple[str, bool]]: A dictionary where:
                - Keys (str): The name of the validation check method.
                - Values (tuple[str, bool]):
                    - The first element (str) provides a concise and user-friendly description of the check. E.g. what
                      caused the validation error or what is required for the check to pass.
                    - The second element (bool) indicates whether the check is a warning (True) or an error (False).


        """
        return None

    @staticmethod
    def _format_unique_checks(errors: pd.DataFrame) -> pd.DataFrame:
        """
        Format the error DataFrame according to the validation checks that are specific to the Pump schemas.

        Args:
            errors (pd.DataFrame): The error DataFrame containing validation errors.

        Returns:
            pd.DataFrame: The updated error DataFrame with formatted rows for unique validation checks.

        """
        return None
create_component(row: NDArray, indices: dict[str, int], meta_columns: set[str], meta_data: pd.DataFrame, attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None) -> dict[str, HydroPump] staticmethod

Create a HydroPump object from a row in the Hydropower.Pumps table.

Parameters:

Name Type Description Default
row NDArray

Array containing the values of one table row, represeting one HydroModule object.

required
indices list[str, int]

Mapping of table's Column names to the array's indices.

required
meta_columns set[str]

Set of columns used to tag object with memberships.

required
meta_data DataFrame

Dictionary containing at least unit of every column.

required
attribute_objects dict[str, tuple[object, dict[str, Meta]]]

NOT USED, currently only used in HydroModulesNames.

None

Returns:

Type Description
dict[str, HydroPump]

dict[str, HydroPump]: A dictionary with the pump ID as key and the module unit as value.

Source code in framdata/database_names/HydroPumpNames.py
@staticmethod
def create_component(
    row: NDArray,
    indices: dict[str, int],
    meta_columns: set[str],
    meta_data: pd.DataFrame,
    attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
) -> dict[str, HydroPump]:
    """
    Create a HydroPump object from a row in the Hydropower.Pumps table.

    Args:
        row (NDArray): Array containing the values of one table row, represeting one HydroModule object.
        indices (list[str, int]): Mapping of table's Column names to the array's indices.
        meta_columns (set[str]): Set of columns used to tag object with memberships.
        meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
        attribute_objects (dict[str, tuple[object, dict[str, Meta]]], optional): NOT USED, currently only used in HydroModulesNames.

    Returns:
        dict[str, HydroPump]: A dictionary with the pump ID as key and the module unit as value.

    """
    columns_to_parse = [
        HydroPumpNames.power_capacity_col,
        HydroPumpNames.vol_capacity_col,
        HydroPumpNames.energy_equiv_col,
        HydroPumpNames.h_min_col,
        HydroPumpNames.h_max_col,
        HydroPumpNames.q_min_col,
        HydroPumpNames.q_max_col,
    ]

    arg_user_code = HydroPumpNames._parse_args(row, indices, columns_to_parse, meta_data)

    pump = HydroPump(
        power_node=row[indices[HydroPumpNames.node_col]],
        from_module=row[indices[HydroPumpNames.pump_from_col]],
        to_module=row[indices[HydroPumpNames.pump_to_col]],
        water_capacity=MaxFlowVolume(level=arg_user_code[HydroPumpNames.vol_capacity_col]),
        energy_equivalent=Conversion(level=arg_user_code[HydroPumpNames.energy_equiv_col]),
        power_capacity=MaxFlowVolume(level=arg_user_code[HydroPumpNames.power_capacity_col]),
        head_max=arg_user_code[HydroPumpNames.h_max_col],
        head_min=arg_user_code[HydroPumpNames.h_min_col],
        q_max=arg_user_code[HydroPumpNames.q_max_col],
        q_min=arg_user_code[HydroPumpNames.q_min_col],
    )

    meta = {}
    HydroPumpNames._add_meta(meta, row, indices, meta_columns)

    return {row[indices[HydroPumpNames.id_col]]: (pump, meta)}
get_attribute_data_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for attribute data in the Hydropower.Pumps file.

Returns:

Type Description
DataFrameModel

pa.DataFrameModel: Pandera DataFrameModel schema for the Pump attribute data.

Source code in framdata/database_names/HydroPumpNames.py
@staticmethod
def get_attribute_data_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for attribute data in the Hydropower.Pumps file.

    Returns:
        pa.DataFrameModel: Pandera DataFrameModel schema for the Pump attribute data.

    """
    return PumpSchema
get_metadata_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for the metadata table in the Hydropower.Pumps file.

Returns:

Type Description
DataFrameModel

pa.DataFrameModel: Pandera DataFrameModel schema for the Pump metadata.

Source code in framdata/database_names/HydroPumpNames.py
@staticmethod
def get_metadata_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for the metadata table in the Hydropower.Pumps file.

    Returns:
        pa.DataFrameModel: Pandera DataFrameModel schema for the Pump metadata.

    """
    return PumpMetadataSchema
PumpMetadataSchema

Bases: _AttributeMetadataSchema

Pandera DataFrameModel schema for metadata in the Hydropower.Pumps file.

Source code in framdata/database_names/HydroPumpNames.py
class PumpMetadataSchema(_AttributeMetadataSchema):
    """Pandera DataFrameModel schema for metadata in the Hydropower.Pumps file."""

    pass
PumpSchema

Bases: DataFrameModel

Pandera DataFrameModel schema for attribute data in the Hydropower.Pumps file.

Source code in framdata/database_names/HydroPumpNames.py
class PumpSchema(pa.DataFrameModel):
    """Pandera DataFrameModel schema for attribute data in the Hydropower.Pumps file."""

    pass

HydroReservoirNames

Module for handling reservoir names and schemas in hydropower data.

This module defines the ReservoirNames class for managing reservoir attributes, and provides Pandera schemas for validating reservoir attribute and metadata tables.

HydroReservoirMetadataSchema

Bases: _AttributeMetadataSchema

Pandera DataFrameModel schema for metadata in the Hydropower.Reservoirs file.

Source code in framdata/database_names/HydroReservoirNames.py
class HydroReservoirMetadataSchema(_AttributeMetadataSchema):
    """Pandera DataFrameModel schema for metadata in the Hydropower.Reservoirs file."""

    pass
HydroReservoirNames

Bases: _BaseComponentsNames

Class for managing reservoir attribute names and providing methods for schema validation and component creation.

This class defines column names for reservoir attributes, methods for creating HydroReservoir components, and functions to retrieve Pandera schemas for validating reservoir attribute and metadata tables.

Source code in framdata/database_names/HydroReservoirNames.py
class HydroReservoirNames(_BaseComponentsNames):
    """
    Class for managing reservoir attribute names and providing methods for schema validation and component creation.

    This class defines column names for reservoir attributes, methods for creating HydroReservoir components,
    and functions to retrieve Pandera schemas for validating reservoir attribute and metadata tables.
    """

    id_col = "ReservoirID"
    capacity_col = "Capacity"
    res_curve_col = "ReservoirCurve"
    min_res_col = "MinOperationalFilling"
    min_penalty_col = "MinViolationPenalty"
    max_res_col = "MaxOperationalFilling"
    max_penalty_col = "MaxViolationPenalty"
    res_buf_col = "TargetFilling"
    buf_penalty_col = "TargetViolationPenalty"

    columns: ClassVar[list[str]] = [
        id_col,
        capacity_col,
        res_curve_col,
        min_res_col,
        max_res_col,
        res_buf_col,
        min_penalty_col,
        max_penalty_col,
        buf_penalty_col,
    ]

    ref_columns: ClassVar[list[str]] = [
        capacity_col,
        res_curve_col,
        min_res_col,
        max_res_col,
        res_buf_col,
        min_penalty_col,
        max_penalty_col,
        buf_penalty_col,
    ]

    @staticmethod
    def create_component(
        row: NDArray,
        indices: dict[str, int],
        meta_columns: set[str],
        meta_data: pd.DataFrame,
        attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
    ) -> dict[str, HydroReservoir]:
        """
        Create a HydroReservoir object.

        Args:
            row (NDArray): Array containing the values of one table row, represeting one HydroModule object.
            indices (list[str, int]): Mapping of table's Column names to the array's indices.
            meta_columns (set[str]): Set of columns used to tag object with memberships.
            meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
            attribute_objects (dict[str, tuple[object, dict[str, Meta]]], optional): NOT USED, currently only used in HydroModulesNames.

        Returns:
            dict[str, HydroReservoir]: A dictionary with the inflow ID as key and the module unit as value.

        """
        columns_to_parse = [
            HydroReservoirNames.capacity_col,
            HydroReservoirNames.res_curve_col,
            HydroReservoirNames.min_res_col,
            HydroReservoirNames.max_res_col,
            HydroReservoirNames.res_buf_col,
            HydroReservoirNames.min_penalty_col,
            HydroReservoirNames.max_penalty_col,
            HydroReservoirNames.buf_penalty_col,
        ]

        arg_user_code = HydroReservoirNames._parse_args(row, indices, columns_to_parse, meta_data)

        reservoir_curve = ReservoirCurve(arg_user_code[HydroReservoirNames.res_curve_col])

        reservoir = HydroReservoir(
            capacity=StockVolume(level=arg_user_code[HydroReservoirNames.capacity_col]),
            reservoir_curve=reservoir_curve,
        )

        meta = {}
        HydroReservoirNames._add_meta(meta, row, indices, meta_columns)

        return {row[indices[HydroReservoirNames.id_col]]: (reservoir, meta)}

    @staticmethod
    def get_attribute_data_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for attribute data in the Hydropower.Reservoirs file.

        Returns:
            pa.DataFrameModel: Pandera DataFrameModel schema for the Reservoir attribute data.

        """
        return HydroReservoirSchema

    @staticmethod
    def get_metadata_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for the metadata table in the Hydropower.Reservoirs file.

        Returns:
            pa.DataFrameModel: Pandera DataFrameModel schema for the Reservoir metadata.

        """
        return HydroReservoirMetadataSchema

    @staticmethod
    def _get_unique_check_descriptions() -> dict[str, tuple[str, bool]]:
        """
        Retrieve a dictionary with descriptons of validation checks that are specific to the Reservoir schemas.

        Returns:
            dict[str, tuple[str, bool]]: A dictionary where:
                - Keys (str): The name of the validation check method.
                - Values (tuple[str, bool]):
                    - The first element (str) provides a concise and user-friendly description of the check. E.g. what
                      caused the validation error or what is required for the check to pass.
                    - The second element (bool) indicates whether the check is a warning (True) or an error (False).


        """
        return None

    @staticmethod
    def _format_unique_checks(errors: pd.DataFrame) -> pd.DataFrame:
        """
        Format the error DataFrame according to the validation checks that are specific to the Reservoir schemas.

        Args:
            errors (pd.DataFrame): The error DataFrame containing validation errors.

        Returns:
            pd.DataFrame: The updated error DataFrame with formatted rows for unique validation checks.

        """
        return None
create_component(row: NDArray, indices: dict[str, int], meta_columns: set[str], meta_data: pd.DataFrame, attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None) -> dict[str, HydroReservoir] staticmethod

Create a HydroReservoir object.

Parameters:

Name Type Description Default
row NDArray

Array containing the values of one table row, represeting one HydroModule object.

required
indices list[str, int]

Mapping of table's Column names to the array's indices.

required
meta_columns set[str]

Set of columns used to tag object with memberships.

required
meta_data DataFrame

Dictionary containing at least unit of every column.

required
attribute_objects dict[str, tuple[object, dict[str, Meta]]]

NOT USED, currently only used in HydroModulesNames.

None

Returns:

Type Description
dict[str, HydroReservoir]

dict[str, HydroReservoir]: A dictionary with the inflow ID as key and the module unit as value.

Source code in framdata/database_names/HydroReservoirNames.py
@staticmethod
def create_component(
    row: NDArray,
    indices: dict[str, int],
    meta_columns: set[str],
    meta_data: pd.DataFrame,
    attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
) -> dict[str, HydroReservoir]:
    """
    Create a HydroReservoir object.

    Args:
        row (NDArray): Array containing the values of one table row, represeting one HydroModule object.
        indices (list[str, int]): Mapping of table's Column names to the array's indices.
        meta_columns (set[str]): Set of columns used to tag object with memberships.
        meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
        attribute_objects (dict[str, tuple[object, dict[str, Meta]]], optional): NOT USED, currently only used in HydroModulesNames.

    Returns:
        dict[str, HydroReservoir]: A dictionary with the inflow ID as key and the module unit as value.

    """
    columns_to_parse = [
        HydroReservoirNames.capacity_col,
        HydroReservoirNames.res_curve_col,
        HydroReservoirNames.min_res_col,
        HydroReservoirNames.max_res_col,
        HydroReservoirNames.res_buf_col,
        HydroReservoirNames.min_penalty_col,
        HydroReservoirNames.max_penalty_col,
        HydroReservoirNames.buf_penalty_col,
    ]

    arg_user_code = HydroReservoirNames._parse_args(row, indices, columns_to_parse, meta_data)

    reservoir_curve = ReservoirCurve(arg_user_code[HydroReservoirNames.res_curve_col])

    reservoir = HydroReservoir(
        capacity=StockVolume(level=arg_user_code[HydroReservoirNames.capacity_col]),
        reservoir_curve=reservoir_curve,
    )

    meta = {}
    HydroReservoirNames._add_meta(meta, row, indices, meta_columns)

    return {row[indices[HydroReservoirNames.id_col]]: (reservoir, meta)}
get_attribute_data_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for attribute data in the Hydropower.Reservoirs file.

Returns:

Type Description
DataFrameModel

pa.DataFrameModel: Pandera DataFrameModel schema for the Reservoir attribute data.

Source code in framdata/database_names/HydroReservoirNames.py
@staticmethod
def get_attribute_data_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for attribute data in the Hydropower.Reservoirs file.

    Returns:
        pa.DataFrameModel: Pandera DataFrameModel schema for the Reservoir attribute data.

    """
    return HydroReservoirSchema
get_metadata_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for the metadata table in the Hydropower.Reservoirs file.

Returns:

Type Description
DataFrameModel

pa.DataFrameModel: Pandera DataFrameModel schema for the Reservoir metadata.

Source code in framdata/database_names/HydroReservoirNames.py
@staticmethod
def get_metadata_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for the metadata table in the Hydropower.Reservoirs file.

    Returns:
        pa.DataFrameModel: Pandera DataFrameModel schema for the Reservoir metadata.

    """
    return HydroReservoirMetadataSchema
HydroReservoirSchema

Bases: DataFrameModel

Pandera DataFrameModel schema for attribute data in the Hydropower.Reservoirs file.

Source code in framdata/database_names/HydroReservoirNames.py
class HydroReservoirSchema(pa.DataFrameModel):
    """Pandera DataFrameModel schema for attribute data in the Hydropower.Reservoirs file."""

    pass

ThermalNames

Classes defining Thermal tables.

ThermalMetadataSchema

Bases: _AttributeMetadataSchema

Pandera DataFrameModel schema for metadata in the Thermal.Generators file.

Source code in framdata/database_names/ThermalNames.py
class ThermalMetadataSchema(_AttributeMetadataSchema):
    """Pandera DataFrameModel schema for metadata in the Thermal.Generators file."""

    @pa.dataframe_check
    @classmethod
    def check_unit_is_str_for_attributes(cls, df: pd.DataFrame) -> Series[bool]:
        """
        Check that the 'unit' value is a string for the row where 'attribute' is 'Capacity'.

        Args:
            df (Dataframe): DataFrame used to check value for "unit".

        Returns:
            Series[bool]: Series of boolean values detonating if each element has passed the check.

        """
        return check_unit_is_str_for_attributes(
            df,
            [
                ThermalNames.emission_coeff_col,
                ThermalNames.capacity_col,
                ThermalNames.voc_col,
                ThermalNames.start_costs_col,
                # ThermalNames.ramp_up_col, # ?
                # ThermalNames.ramp_down_col, # ?
            ],
        )
check_unit_is_str_for_attributes(df: pd.DataFrame) -> Series[bool] classmethod

Check that the 'unit' value is a string for the row where 'attribute' is 'Capacity'.

Parameters:

Name Type Description Default
df Dataframe

DataFrame used to check value for "unit".

required

Returns:

Type Description
Series[bool]

Series[bool]: Series of boolean values detonating if each element has passed the check.

Source code in framdata/database_names/ThermalNames.py
@pa.dataframe_check
@classmethod
def check_unit_is_str_for_attributes(cls, df: pd.DataFrame) -> Series[bool]:
    """
    Check that the 'unit' value is a string for the row where 'attribute' is 'Capacity'.

    Args:
        df (Dataframe): DataFrame used to check value for "unit".

    Returns:
        Series[bool]: Series of boolean values detonating if each element has passed the check.

    """
    return check_unit_is_str_for_attributes(
        df,
        [
            ThermalNames.emission_coeff_col,
            ThermalNames.capacity_col,
            ThermalNames.voc_col,
            ThermalNames.start_costs_col,
            # ThermalNames.ramp_up_col, # ?
            # ThermalNames.ramp_down_col, # ?
        ],
    )
ThermalNames

Bases: _BaseComponentsNames

Container class for describing the Thermal attribute table's names and structure.

Source code in framdata/database_names/ThermalNames.py
class ThermalNames(_BaseComponentsNames):
    """Container class for describing the Thermal attribute table's names and structure."""

    id_col = "ThermalID"
    main_unit_col = "MainUnit"
    nice_name_col = "NiceName"
    power_node_col = "PowerNode"
    fuel_node_col = "FuelNode"
    emission_node_col = "EmissionNode"
    emission_coeff_col = "EmissionCoefficient"
    type_col = "Type"
    capacity_col = "Capacity"
    full_load_col = "FullLoadEfficiency"
    part_load_col = "PartLoadEfficiency"
    voc_col = "VOC"
    start_costs_col = "StartCosts"
    start_hours_col = "StartHours"
    min_stable_load_col = "MinStableLoad"
    min_op_bound_col = "MinOperationalBound"
    max_op_bound_col = "MaxOperationalBound"
    ramp_up_col = "RampUp"
    ramp_down_col = "RampDown"

    # Should include rampup/down data in Thermal, when we get data for this
    columns: ClassVar[list[str]] = [
        id_col,
        nice_name_col,
        type_col,
        main_unit_col,
        power_node_col,
        fuel_node_col,
        emission_node_col,
        capacity_col,
        full_load_col,
        part_load_col,
        voc_col,
        start_costs_col,
        start_hours_col,
        min_stable_load_col,
        min_op_bound_col,
        max_op_bound_col,
        emission_coeff_col,
    ]

    ref_columns: ClassVar[list[str]] = [
        power_node_col,
        fuel_node_col,
        emission_node_col,
        capacity_col,
        full_load_col,
        part_load_col,
        voc_col,
        start_costs_col,
        start_hours_col,
        min_stable_load_col,
        min_op_bound_col,
        max_op_bound_col,
        emission_coeff_col,
    ]

    @staticmethod
    def create_component(
        row: NDArray,
        indices: dict[str, int],
        meta_columns: set[str],
        meta_data: pd.DataFrame,
        attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
    ) -> dict[str, Thermal]:
        """
        Create a thermal unit component.

        Args:
            row (NDArray): Array containing the values of one table row, represeting one Thermal object.
            indices (list[str, int]): Mapping of table's Column names to the array's indices.
            meta_columns (set[str]): Set of columns used to tag object with memberships.
            meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
            attribute_objects (dict[str, tuple[object, dict[str, Meta]]], optional): NOT USED

        Returns:
            dict[str, Thermal]: A dictionary with the thermal_id as key and the thermal unit as value.

        """
        columns_to_parse = [
            ThermalNames.emission_node_col,
            ThermalNames.capacity_col,
            ThermalNames.full_load_col,
            ThermalNames.part_load_col,
            ThermalNames.voc_col,
            ThermalNames.start_costs_col,
            ThermalNames.start_hours_col,
            ThermalNames.min_stable_load_col,
            ThermalNames.min_op_bound_col,
            ThermalNames.max_op_bound_col,
            ThermalNames.emission_coeff_col,
        ]

        arg_user_code = ThermalNames._parse_args(row, indices, columns_to_parse, meta_data)

        no_start_up_costs_condition = (
            (arg_user_code[ThermalNames.start_costs_col] is None)
            or (arg_user_code[ThermalNames.min_stable_load_col] is None)
            or (arg_user_code[ThermalNames.start_hours_col] is None)
            or (arg_user_code[ThermalNames.part_load_col] is None)
        )
        start_up_cost = (
            None
            if no_start_up_costs_condition
            else StartUpCost(
                startup_cost=Cost(level=arg_user_code[ThermalNames.start_costs_col]),
                min_stable_load=Proportion(level=arg_user_code[ThermalNames.min_stable_load_col]),
                start_hours=Hours(level=arg_user_code[ThermalNames.start_hours_col]),
                part_load_efficiency=Efficiency(level=arg_user_code[ThermalNames.part_load_col]),
            )
        )

        voc = (
            None
            if arg_user_code[ThermalNames.voc_col] is None
            else Cost(
                level=arg_user_code[ThermalNames.voc_col],
                profile=None,
            )
        )

        min_capacity = (
            None
            if arg_user_code[ThermalNames.min_op_bound_col] is None
            else MaxFlowVolume(
                level=arg_user_code[ThermalNames.capacity_col],
                profile=arg_user_code[ThermalNames.min_op_bound_col],
            )
        )

        thermal = Thermal(
            power_node=row[indices[ThermalNames.power_node_col]],
            fuel_node=row[indices[ThermalNames.fuel_node_col]],
            efficiency=Efficiency(level=arg_user_code[ThermalNames.full_load_col]),
            emission_node=row[indices[ThermalNames.emission_node_col]],
            emission_coefficient=Conversion(level=arg_user_code[FuelNodesNames.emission_coefficient_col]),
            max_capacity=MaxFlowVolume(
                level=arg_user_code[ThermalNames.capacity_col],
                profile=arg_user_code[ThermalNames.max_op_bound_col],
            ),
            min_capacity=min_capacity,
            voc=voc,
            startupcost=start_up_cost,
        )
        ThermalNames._add_meta(thermal, row, indices, meta_columns)

        return {row[indices[ThermalNames.id_col]]: thermal}

    @staticmethod
    def get_attribute_data_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for attribute data in the Thermal.Generators file.

        Returns:
            pa.DataFrameModel: Pandera DataFrameModel schema for Thermal attribute data.

        """
        return ThermalSchema

    @staticmethod
    def get_metadata_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for the metadata table in the Thermal.Generators file.

        Returns:
            pa.DataFrameModel: Pandera DataFrameModel schema for the Thermal metadata.

        """
        return ThermalMetadataSchema

    @staticmethod
    def _get_unique_check_descriptions() -> dict[str, tuple[str, bool]]:
        """
        Retrieve a dictionary with descriptons of validation checks that are specific to the Thermal schemas.

        Returns:
            dict[str, tuple[str, bool]]: A dictionary where:
                - Keys (str): The name of the validation check method.
                - Values (tuple[str, bool]):
                    - The first element (str) provides a concise and user-friendly description of the check. E.g. what
                      caused the validation error or what is required for the check to pass.
                    - The second element (bool) indicates whether the check is a warning (True) or an error (False).


        """
        return None

    @staticmethod
    def _format_unique_checks(errors: pd.DataFrame) -> pd.DataFrame:
        """
        Format the error DataFrame according to the validation checks that are specific to the Thermal schemas.

        Args:
            errors (pd.DataFrame): The error DataFrame containing validation errors.

        Returns:
            pd.DataFrame: The updated error DataFrame with formatted rows for unique validation checks.

        """
        return None
create_component(row: NDArray, indices: dict[str, int], meta_columns: set[str], meta_data: pd.DataFrame, attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None) -> dict[str, Thermal] staticmethod

Create a thermal unit component.

Parameters:

Name Type Description Default
row NDArray

Array containing the values of one table row, represeting one Thermal object.

required
indices list[str, int]

Mapping of table's Column names to the array's indices.

required
meta_columns set[str]

Set of columns used to tag object with memberships.

required
meta_data DataFrame

Dictionary containing at least unit of every column.

required
attribute_objects dict[str, tuple[object, dict[str, Meta]]]

NOT USED

None

Returns:

Type Description
dict[str, Thermal]

dict[str, Thermal]: A dictionary with the thermal_id as key and the thermal unit as value.

Source code in framdata/database_names/ThermalNames.py
@staticmethod
def create_component(
    row: NDArray,
    indices: dict[str, int],
    meta_columns: set[str],
    meta_data: pd.DataFrame,
    attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
) -> dict[str, Thermal]:
    """
    Create a thermal unit component.

    Args:
        row (NDArray): Array containing the values of one table row, represeting one Thermal object.
        indices (list[str, int]): Mapping of table's Column names to the array's indices.
        meta_columns (set[str]): Set of columns used to tag object with memberships.
        meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
        attribute_objects (dict[str, tuple[object, dict[str, Meta]]], optional): NOT USED

    Returns:
        dict[str, Thermal]: A dictionary with the thermal_id as key and the thermal unit as value.

    """
    columns_to_parse = [
        ThermalNames.emission_node_col,
        ThermalNames.capacity_col,
        ThermalNames.full_load_col,
        ThermalNames.part_load_col,
        ThermalNames.voc_col,
        ThermalNames.start_costs_col,
        ThermalNames.start_hours_col,
        ThermalNames.min_stable_load_col,
        ThermalNames.min_op_bound_col,
        ThermalNames.max_op_bound_col,
        ThermalNames.emission_coeff_col,
    ]

    arg_user_code = ThermalNames._parse_args(row, indices, columns_to_parse, meta_data)

    no_start_up_costs_condition = (
        (arg_user_code[ThermalNames.start_costs_col] is None)
        or (arg_user_code[ThermalNames.min_stable_load_col] is None)
        or (arg_user_code[ThermalNames.start_hours_col] is None)
        or (arg_user_code[ThermalNames.part_load_col] is None)
    )
    start_up_cost = (
        None
        if no_start_up_costs_condition
        else StartUpCost(
            startup_cost=Cost(level=arg_user_code[ThermalNames.start_costs_col]),
            min_stable_load=Proportion(level=arg_user_code[ThermalNames.min_stable_load_col]),
            start_hours=Hours(level=arg_user_code[ThermalNames.start_hours_col]),
            part_load_efficiency=Efficiency(level=arg_user_code[ThermalNames.part_load_col]),
        )
    )

    voc = (
        None
        if arg_user_code[ThermalNames.voc_col] is None
        else Cost(
            level=arg_user_code[ThermalNames.voc_col],
            profile=None,
        )
    )

    min_capacity = (
        None
        if arg_user_code[ThermalNames.min_op_bound_col] is None
        else MaxFlowVolume(
            level=arg_user_code[ThermalNames.capacity_col],
            profile=arg_user_code[ThermalNames.min_op_bound_col],
        )
    )

    thermal = Thermal(
        power_node=row[indices[ThermalNames.power_node_col]],
        fuel_node=row[indices[ThermalNames.fuel_node_col]],
        efficiency=Efficiency(level=arg_user_code[ThermalNames.full_load_col]),
        emission_node=row[indices[ThermalNames.emission_node_col]],
        emission_coefficient=Conversion(level=arg_user_code[FuelNodesNames.emission_coefficient_col]),
        max_capacity=MaxFlowVolume(
            level=arg_user_code[ThermalNames.capacity_col],
            profile=arg_user_code[ThermalNames.max_op_bound_col],
        ),
        min_capacity=min_capacity,
        voc=voc,
        startupcost=start_up_cost,
    )
    ThermalNames._add_meta(thermal, row, indices, meta_columns)

    return {row[indices[ThermalNames.id_col]]: thermal}
get_attribute_data_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for attribute data in the Thermal.Generators file.

Returns:

Type Description
DataFrameModel

pa.DataFrameModel: Pandera DataFrameModel schema for Thermal attribute data.

Source code in framdata/database_names/ThermalNames.py
@staticmethod
def get_attribute_data_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for attribute data in the Thermal.Generators file.

    Returns:
        pa.DataFrameModel: Pandera DataFrameModel schema for Thermal attribute data.

    """
    return ThermalSchema
get_metadata_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for the metadata table in the Thermal.Generators file.

Returns:

Type Description
DataFrameModel

pa.DataFrameModel: Pandera DataFrameModel schema for the Thermal metadata.

Source code in framdata/database_names/ThermalNames.py
@staticmethod
def get_metadata_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for the metadata table in the Thermal.Generators file.

    Returns:
        pa.DataFrameModel: Pandera DataFrameModel schema for the Thermal metadata.

    """
    return ThermalMetadataSchema
ThermalSchema

Bases: DataFrameModel

Pandera DataFrameModel schema for attribute data in the Thermal.Generators file.

Source code in framdata/database_names/ThermalNames.py
class ThermalSchema(pa.DataFrameModel):
    """Pandera DataFrameModel schema for attribute data in the Thermal.Generators file."""

    ThermalID: Series[str] = pa.Field(unique=True, nullable=False)
    PowerNode: Series[str] = pa.Field(nullable=False)
    FuelNode: Series[str] = pa.Field(nullable=False)
    EmissionCoefficient: Series[Any] = pa.Field(nullable=True)
    EmissionNode: Series[str] = pa.Field(nullable=True)
    Capacity: Series[Any] = pa.Field(nullable=False)
    FullLoadEfficiency: Series[Any] = pa.Field(nullable=True)
    PartLoadEfficiency: Series[Any] = pa.Field(nullable=True)
    VOC: Series[Any] = pa.Field(nullable=True)
    StartCosts: Series[Any] = pa.Field(nullable=True)
    StartHours: Series[Any] = pa.Field(nullable=True)
    MinStableLoad: Series[Any] = pa.Field(nullable=True)
    MinOperationalBound: Series[Any] = pa.Field(nullable=True)
    MaxOperationalBound: Series[Any] = pa.Field(nullable=True)
    RampUp: Series[Any] = pa.Field(nullable=True)
    RampDown: Series[Any] = pa.Field(nullable=True)

    @pa.check(ThermalNames.capacity_col)
    @classmethod
    def dtype_str_int_float(cls, series: Series[Any]) -> Series[bool]:
        """Check if values in the series are of datatype: str, int or float."""
        return dtype_str_int_float(series)

    @pa.check(
        ThermalNames.emission_coeff_col,
        ThermalNames.full_load_col,
        ThermalNames.part_load_col,
        ThermalNames.voc_col,
        ThermalNames.start_costs_col,
        ThermalNames.start_hours_col,
        ThermalNames.min_stable_load_col,
        ThermalNames.max_op_bound_col,
        ThermalNames.min_op_bound_col,
        ThermalNames.ramp_up_col,
        ThermalNames.ramp_down_col,
    )
    @classmethod
    def dtype_str_int_float_none(cls, series: Series[Any]) -> Series[bool]:
        """Check if values in the series are of datatype: str, int, float or None."""
        return dtype_str_int_float_none(series)

    @pa.check(
        ThermalNames.capacity_col,
        ThermalNames.full_load_col,
        ThermalNames.part_load_col,
        ThermalNames.voc_col,
        ThermalNames.emission_coeff_col,
    )
    @classmethod
    def numeric_values_greater_than_or_equal_to_0(cls, series: Series[Any]) -> Series[bool]:
        """Check if numeric values in the series are greater than or equal to zero."""
        return numeric_values_greater_than_or_equal_to(series, 0)
dtype_str_int_float(series: Series[Any]) -> Series[bool] classmethod

Check if values in the series are of datatype: str, int or float.

Source code in framdata/database_names/ThermalNames.py
@pa.check(ThermalNames.capacity_col)
@classmethod
def dtype_str_int_float(cls, series: Series[Any]) -> Series[bool]:
    """Check if values in the series are of datatype: str, int or float."""
    return dtype_str_int_float(series)
dtype_str_int_float_none(series: Series[Any]) -> Series[bool] classmethod

Check if values in the series are of datatype: str, int, float or None.

Source code in framdata/database_names/ThermalNames.py
@pa.check(
    ThermalNames.emission_coeff_col,
    ThermalNames.full_load_col,
    ThermalNames.part_load_col,
    ThermalNames.voc_col,
    ThermalNames.start_costs_col,
    ThermalNames.start_hours_col,
    ThermalNames.min_stable_load_col,
    ThermalNames.max_op_bound_col,
    ThermalNames.min_op_bound_col,
    ThermalNames.ramp_up_col,
    ThermalNames.ramp_down_col,
)
@classmethod
def dtype_str_int_float_none(cls, series: Series[Any]) -> Series[bool]:
    """Check if values in the series are of datatype: str, int, float or None."""
    return dtype_str_int_float_none(series)
numeric_values_greater_than_or_equal_to_0(series: Series[Any]) -> Series[bool] classmethod

Check if numeric values in the series are greater than or equal to zero.

Source code in framdata/database_names/ThermalNames.py
@pa.check(
    ThermalNames.capacity_col,
    ThermalNames.full_load_col,
    ThermalNames.part_load_col,
    ThermalNames.voc_col,
    ThermalNames.emission_coeff_col,
)
@classmethod
def numeric_values_greater_than_or_equal_to_0(cls, series: Series[Any]) -> Series[bool]:
    """Check if numeric values in the series are greater than or equal to zero."""
    return numeric_values_greater_than_or_equal_to(series, 0)

TimeVectorMetadataNames

Contains names of fields in time vector metadata.

TimeVectorMetadataNames

Denote available fields in time vector metadata, and provide functionality for time vector metadata processing.

The processing is concerned with casting the metadata fields to correct types and decoding the fields and/or values if they are stored as bytes.

Source code in framdata/database_names/TimeVectorMetadataNames.py
class TimeVectorMetadataNames:
    """
    Denote available fields in time vector metadata, and provide functionality for time vector metadata processing.

    The processing is concerned with casting the metadata fields to correct types and decoding the fields and/or values if they are stored as bytes.

    """

    ENCODING = "utf-8"

    DATETIME_COL = "DateTime"
    # OBS! when adding new metadata entries, you also have to parse them in FileHandler.get_parquet_metadata
    # otherwise they will not be read.
    # Metadata fields

    # Id column name
    ID_COLUMN_NAME = "ID"

    # Required bools
    IS_MAX_LEVEL = "IsMaxLevel"
    IS_ZERO_ONE_PROFILE = "IsZeroOneProfile"
    IS_52_WEEK_YEARS = "Is52WeekYears"
    EXTRAPOLATE_FISRT_POINT = "ExtrapolateFirstPoint"
    EXTRAPOLATE_LAST_POINT = "ExtrapolateLastPoint"

    # reference period
    REF_PERIOD_START_YEAR = "RefPeriodStartYear"
    REF_PERIOD_NUM_YEARS = "RefPeriodNumberOfYears"

    START = "StartDateTime"
    FREQUENCY = "Frequency"
    NUM_POINTS = "NumberOfPoints"
    TIMEZONE = "TimeZone"

    UNIT = "Unit"

    METADATA_TYPES = bool | int | str | datetime | timedelta | tzinfo | None
    METADATA_TYPES_TUPLE = (bool, int, str, datetime, timedelta, tzinfo, type(None))

    # reference_period = "ReferencePeriod"

    B_IS_MAX_LEVEL = IS_MAX_LEVEL.encode(ENCODING)
    B_IS_ZERO_ONE_PROFILE = IS_ZERO_ONE_PROFILE.encode(ENCODING)
    B_IS_52_WEEK_YEARS = IS_52_WEEK_YEARS.encode(ENCODING)
    B_ID_COLUMN_NAME = ID_COLUMN_NAME.encode(ENCODING)
    B_EXTRAPOLATE_FISRT_POINT = EXTRAPOLATE_FISRT_POINT.encode(ENCODING)
    B_EXTRAPOLATE_LAST_POINT = EXTRAPOLATE_LAST_POINT.encode(ENCODING)

    # reference period
    B_REF_PERIOD_START_YEAR = REF_PERIOD_START_YEAR.encode(ENCODING)
    B_REF_PERIOD_NUM_YEARS = REF_PERIOD_NUM_YEARS.encode(ENCODING)

    B_START = START.encode(ENCODING)
    B_FREQUENCY = FREQUENCY.encode(ENCODING)
    B_NUM_POINTS = NUM_POINTS.encode(ENCODING)
    B_TIMEZONE = TIMEZONE.encode(ENCODING)
    B_UNIT = UNIT.encode(ENCODING)

    str_keys_to_bytes_map: ClassVar[dict[str, bytes]] = {
        ID_COLUMN_NAME: B_ID_COLUMN_NAME,
        IS_MAX_LEVEL: B_IS_MAX_LEVEL,
        IS_ZERO_ONE_PROFILE: B_IS_ZERO_ONE_PROFILE,
        IS_52_WEEK_YEARS: B_IS_52_WEEK_YEARS,
        EXTRAPOLATE_FISRT_POINT: B_EXTRAPOLATE_FISRT_POINT,
        EXTRAPOLATE_LAST_POINT: B_EXTRAPOLATE_LAST_POINT,
        REF_PERIOD_START_YEAR: B_REF_PERIOD_START_YEAR,
        REF_PERIOD_NUM_YEARS: B_REF_PERIOD_NUM_YEARS,
        START: B_START,
        FREQUENCY: B_FREQUENCY,
        NUM_POINTS: B_NUM_POINTS,
        TIMEZONE: B_TIMEZONE,
        UNIT: B_UNIT,
    }

    strict_bools_cast: ClassVar[set[str]] = {
        IS_52_WEEK_YEARS,
        EXTRAPOLATE_FISRT_POINT,
        EXTRAPOLATE_LAST_POINT,
    }
    keys_cast_methods: ClassVar[dict[str, Callable | type]] = {
        ID_COLUMN_NAME: str,
        IS_MAX_LEVEL: bool,
        IS_ZERO_ONE_PROFILE: bool,
        REF_PERIOD_START_YEAR: int,
        REF_PERIOD_NUM_YEARS: int,
        START: pd.to_datetime,
        FREQUENCY: pd.to_timedelta,
        NUM_POINTS: int,
        TIMEZONE: pytz.timezone,
        UNIT: str,
    }

    @staticmethod
    def cast_meta(
        raw_meta: dict[str | bytes, str | bytes | int | bool | None],
    ) -> tuple[dict[str, str, bool | int | str | datetime | timedelta | tzinfo | None], set[str]]:
        """
        Decode possible binary keys and values and cast values of metadata dict to their defined types.

        Args:
            raw_meta (dict[str  |  bytes, str  |  bytes  |  int  |  bool  |  None]): Dictionary to decode and cast.

        Returns:
            tuple[dict[str, Any], set[str]]: Decoded and cast dictionary, set of missing keys.

        """
        tvmn = TimeVectorMetadataNames
        str_bytes_map = tvmn.str_keys_to_bytes_map
        cast_meta = {key: raw_meta[key] for key in set(str_bytes_map.keys()) | set(str_bytes_map.values()) if key in raw_meta}
        str_to_bytes_meta = tvmn.bytes_keys_to_str(cast_meta)
        cast_meta = str_to_bytes_meta if str_to_bytes_meta else cast_meta  # Keys were bytes and we decode to str.

        missing_keys: set[str] = {key for key in str_bytes_map if key not in cast_meta}

        # Update with cast values for strict bools and others.
        cast_meta.update({key: tvmn.cast_strict_bool_value(cast_meta[key]) for key in tvmn.strict_bools_cast if key in cast_meta})
        cast_meta.update({key: tvmn.cast_value(cast_meta[key], cast_method) for key, cast_method in tvmn.keys_cast_methods.items() if key in cast_meta})

        return cast_meta, missing_keys

    @staticmethod
    def str_keys_to_bytes(raw_meta: dict[str, bytes]) -> dict[bytes, bytes]:
        return {bytes_name: raw_meta[str_name] for str_name, bytes_name in TimeVectorMetadataNames.str_keys_to_bytes_map.items() if str_name in raw_meta}

    @staticmethod
    def bytes_keys_to_str(raw_meta: dict[bytes, bytes]) -> dict[str, bytes]:
        return {str_name: raw_meta[bytes_name] for str_name, bytes_name in TimeVectorMetadataNames.str_keys_to_bytes_map.items() if bytes_name in raw_meta}

    @staticmethod
    def cast_value(value: str | bytes | None, cast_function: Callable | type) -> object | None:
        """
        Cast a string value into new type, but always return None if value is None or "None".

        Args:
            value (str | None): A string value or None.
            cast_function (Union[Callable, type]): Function or type with which to cast the value into.

        Raises:
            RuntimeError: If anything goes wrong in the cast_function.

        Returns:
            object|None: Value as new type or None.

        """
        if isinstance(value, bytes):
            if cast_function is bool:
                return None if value == b"None" else value == b"True"
            value = value.decode(encoding=TimeVectorMetadataNames.ENCODING)

        if value is None or value in {"None", ""}:  # Handle missing values
            return None
        try:
            return cast_function(value)
        except Exception as e:
            msg = f"Could not cast metadata value: {value}. Casting method: {cast_function}"
            raise RuntimeError(msg) from e

    @staticmethod
    def cast_strict_bool_value(value: str | bool | bytes) -> bool:
        if isinstance(value, bytes):
            return value == b"True"
        return bool(value)
cast_meta(raw_meta: dict[str | bytes, str | bytes | int | bool | None]) -> tuple[dict[str, str, bool | int | str | datetime | timedelta | tzinfo | None], set[str]] staticmethod

Decode possible binary keys and values and cast values of metadata dict to their defined types.

Parameters:

Name Type Description Default
raw_meta dict[str | bytes, str | bytes | int | bool | None]

Dictionary to decode and cast.

required

Returns:

Type Description
tuple[dict[str, str, bool | int | str | datetime | timedelta | tzinfo | None], set[str]]

tuple[dict[str, Any], set[str]]: Decoded and cast dictionary, set of missing keys.

Source code in framdata/database_names/TimeVectorMetadataNames.py
@staticmethod
def cast_meta(
    raw_meta: dict[str | bytes, str | bytes | int | bool | None],
) -> tuple[dict[str, str, bool | int | str | datetime | timedelta | tzinfo | None], set[str]]:
    """
    Decode possible binary keys and values and cast values of metadata dict to their defined types.

    Args:
        raw_meta (dict[str  |  bytes, str  |  bytes  |  int  |  bool  |  None]): Dictionary to decode and cast.

    Returns:
        tuple[dict[str, Any], set[str]]: Decoded and cast dictionary, set of missing keys.

    """
    tvmn = TimeVectorMetadataNames
    str_bytes_map = tvmn.str_keys_to_bytes_map
    cast_meta = {key: raw_meta[key] for key in set(str_bytes_map.keys()) | set(str_bytes_map.values()) if key in raw_meta}
    str_to_bytes_meta = tvmn.bytes_keys_to_str(cast_meta)
    cast_meta = str_to_bytes_meta if str_to_bytes_meta else cast_meta  # Keys were bytes and we decode to str.

    missing_keys: set[str] = {key for key in str_bytes_map if key not in cast_meta}

    # Update with cast values for strict bools and others.
    cast_meta.update({key: tvmn.cast_strict_bool_value(cast_meta[key]) for key in tvmn.strict_bools_cast if key in cast_meta})
    cast_meta.update({key: tvmn.cast_value(cast_meta[key], cast_method) for key, cast_method in tvmn.keys_cast_methods.items() if key in cast_meta})

    return cast_meta, missing_keys
cast_value(value: str | bytes | None, cast_function: Callable | type) -> object | None staticmethod

Cast a string value into new type, but always return None if value is None or "None".

Parameters:

Name Type Description Default
value str | None

A string value or None.

required
cast_function Union[Callable, type]

Function or type with which to cast the value into.

required

Raises:

Type Description
RuntimeError

If anything goes wrong in the cast_function.

Returns:

Type Description
object | None

object|None: Value as new type or None.

Source code in framdata/database_names/TimeVectorMetadataNames.py
@staticmethod
def cast_value(value: str | bytes | None, cast_function: Callable | type) -> object | None:
    """
    Cast a string value into new type, but always return None if value is None or "None".

    Args:
        value (str | None): A string value or None.
        cast_function (Union[Callable, type]): Function or type with which to cast the value into.

    Raises:
        RuntimeError: If anything goes wrong in the cast_function.

    Returns:
        object|None: Value as new type or None.

    """
    if isinstance(value, bytes):
        if cast_function is bool:
            return None if value == b"None" else value == b"True"
        value = value.decode(encoding=TimeVectorMetadataNames.ENCODING)

    if value is None or value in {"None", ""}:  # Handle missing values
        return None
    try:
        return cast_function(value)
    except Exception as e:
        msg = f"Could not cast metadata value: {value}. Casting method: {cast_function}"
        raise RuntimeError(msg) from e

TransmissionNames

Defines the TransmissionNames class and related Pandera schemas.

These describe validate Transmission attributes and metadata tables in the energy model database.

TransmissionMetadataSchema

Bases: _AttributeMetadataSchema

Pandera DataFrameModel schema for metadata in the Transmission.Grid file.

Source code in framdata/database_names/TransmissionNames.py
class TransmissionMetadataSchema(_AttributeMetadataSchema):
    """Pandera DataFrameModel schema for metadata in the Transmission.Grid file."""

    @pa.dataframe_check
    @classmethod
    def check_unit_is_str_for_attributes(cls, df: pd.DataFrame) -> Series[bool]:
        """
        Check that the 'unit' value is a string for the rows where 'attribute' is 'Capacity' and 'Loss'.

        Args:
            df (Dataframe): DataFrame used to check value for "unit".

        Returns:
            Series[bool]: Series of boolean values detonating if each element has passed the check.

        """
        return check_unit_is_str_for_attributes(df, [TransmissionNames.capacity_col, TransmissionNames.tariff_col])
check_unit_is_str_for_attributes(df: pd.DataFrame) -> Series[bool] classmethod

Check that the 'unit' value is a string for the rows where 'attribute' is 'Capacity' and 'Loss'.

Parameters:

Name Type Description Default
df Dataframe

DataFrame used to check value for "unit".

required

Returns:

Type Description
Series[bool]

Series[bool]: Series of boolean values detonating if each element has passed the check.

Source code in framdata/database_names/TransmissionNames.py
@pa.dataframe_check
@classmethod
def check_unit_is_str_for_attributes(cls, df: pd.DataFrame) -> Series[bool]:
    """
    Check that the 'unit' value is a string for the rows where 'attribute' is 'Capacity' and 'Loss'.

    Args:
        df (Dataframe): DataFrame used to check value for "unit".

    Returns:
        Series[bool]: Series of boolean values detonating if each element has passed the check.

    """
    return check_unit_is_str_for_attributes(df, [TransmissionNames.capacity_col, TransmissionNames.tariff_col])
TransmissionNames

Bases: _BaseComponentsNames

Container class for describing the Transmission attribute table's names and structure.

Source code in framdata/database_names/TransmissionNames.py
class TransmissionNames(_BaseComponentsNames):
    """Container class for describing the Transmission attribute table's names and structure."""

    id_col = "TransmissionID"
    from_node_col = "FromNode"
    to_node_col = "ToNode"
    capacity_col = "Capacity"
    loss_col = "Loss"
    tariff_col = "Tariff"
    max_op_bound_col = "MaxOperationalBound"
    min_op_bound_col = "MinOperationalBound"
    ramp_up_col = "RampUp"
    ramp_down_col = "RampDown"

    columns: ClassVar[list[str]] = [
        id_col,
        from_node_col,
        to_node_col,
        capacity_col,
        loss_col,
        tariff_col,
        max_op_bound_col,
        min_op_bound_col,
        ramp_up_col,
        ramp_down_col,
    ]

    ref_columns: ClassVar[list[str]] = [
        from_node_col,
        to_node_col,
        capacity_col,
        loss_col,
        tariff_col,
        max_op_bound_col,
        min_op_bound_col,
        ramp_up_col,
        ramp_down_col,
    ]

    @staticmethod
    def create_component(
        row: NDArray,
        indices: dict[str, int],
        meta_columns: set[str],
        meta_data: pd.DataFrame,
        attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
    ) -> dict[str, Transmission]:
        """
        Create a transmission unit component.

        Args:
            row (NDArray): Array containing the values of one table row, represeting one Transmission object.
            indices (list[str, int]): Mapping of table's Column names to the array's indices.
            meta_columns (set[str]): Set of columns used to tag object with memberships.
            meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
            attribute_objects (dict[str, tuple[object, dict[str, Meta]]] | None, optional): NOT USED

        Returns:
            dict[str, Transmission]: A dictionary with the transmission_id as key and the transmission unit as value.

        """
        columns_to_parse = [
            TransmissionNames.capacity_col,
            TransmissionNames.loss_col,
            TransmissionNames.tariff_col,
            TransmissionNames.max_op_bound_col,
            TransmissionNames.min_op_bound_col,
            TransmissionNames.ramp_up_col,
            TransmissionNames.ramp_down_col,
        ]

        arg_user_code = TransmissionNames._parse_args(row, indices, columns_to_parse, meta_data)

        ramp_up = None if arg_user_code[TransmissionNames.ramp_up_col] is None else Proportion(level=arg_user_code[TransmissionNames.ramp_up_col])
        ramp_down = None if arg_user_code[TransmissionNames.ramp_down_col] is None else Proportion(level=arg_user_code[TransmissionNames.ramp_down_col])
        loss = None if arg_user_code[TransmissionNames.loss_col] is None else Loss(level=arg_user_code[TransmissionNames.loss_col])

        tariff = None if arg_user_code[TransmissionNames.tariff_col] is None else Cost(level=arg_user_code[TransmissionNames.tariff_col])

        min_capacity = (
            None
            if arg_user_code[TransmissionNames.min_op_bound_col] is None
            else MaxFlowVolume(
                level=arg_user_code[TransmissionNames.capacity_col],
                profile=arg_user_code[TransmissionNames.min_op_bound_col],
            )
        )

        transmission = Transmission(
            from_node=row[indices[TransmissionNames.from_node_col]],
            to_node=row[indices[TransmissionNames.to_node_col]],
            max_capacity=MaxFlowVolume(
                level=arg_user_code[TransmissionNames.capacity_col],
                profile=arg_user_code[TransmissionNames.max_op_bound_col],
            ),
            min_capacity=min_capacity,
            loss=loss,
            tariff=tariff,
            ramp_up=ramp_up,
            ramp_down=ramp_down,
        )
        TransmissionNames._add_meta(transmission, row, indices, meta_columns)

        return {row[indices[TransmissionNames.id_col]]: transmission}

    @staticmethod
    def get_attribute_data_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for attribute data in the Transmission.Grid file.

        Returns:
            pa.DataFrameModel: Pandera DataFrameModel schema for Transmission attribute data.

        """
        return TransmissionSchema

    @staticmethod
    def get_metadata_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for the metadata table in the Transmission.Grid file.

        Returns:
            pa.DataFrameModel: Pandera DataFrameModel schema for the Transmission metadata.

        """
        return TransmissionMetadataSchema

    @staticmethod
    def _get_unique_check_descriptions() -> dict[str, tuple[str, bool]]:
        """
        Retrieve a dictionary with descriptons of validation checks that are specific to the Transmission schemas.

        Returns:
            dict[str, tuple[str, bool]]: A dictionary where:
                - Keys (str): The name of the validation check method.
                - Values (tuple[str, bool]):
                    - The first element (str) provides a concise and user-friendly description of the check. E.g. what
                      caused the validation error or what is required for the check to pass.
                    - The second element (bool) indicates whether the check is a warning (True) or an error (False).


        """
        return {
            "check_internal_line_error": ("Transmission line is internal (FromNode equals ToNode).", False),
        }

    @staticmethod
    def _format_unique_checks(errors: pd.DataFrame) -> pd.DataFrame:
        """
        Format the error DataFrame according to the validation checks that are specific to the Transmission schemas.

        Args:
            errors (pd.DataFrame): The error DataFrame containing validation errors.

        Returns:
            pd.DataFrame: The updated error DataFrame with formatted rows for unique validation checks.

        """
        check_name = "check_internal_line_error"
        if check_name in errors[TransmissionNames.COL_CHECK].to_numpy():
            check_rows = errors.loc[
                (errors[TransmissionNames.COL_CHECK] == check_name)
                & (
                    errors[TransmissionNames.COL_COLUMN].isin(
                        [TransmissionNames.from_node_col, TransmissionNames.to_node_col],
                    )
                )
            ]
            check_rows.loc[:, TransmissionNames.COL_COLUMN] = f"{TransmissionNames.from_node_col}, {TransmissionNames.to_node_col}"
            check_rows = check_rows.drop_duplicates()
            errors = errors[~(errors[TransmissionNames.COL_CHECK] == check_name)]
            errors = pd.concat([errors, check_rows], ignore_index=True)

        return errors
create_component(row: NDArray, indices: dict[str, int], meta_columns: set[str], meta_data: pd.DataFrame, attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None) -> dict[str, Transmission] staticmethod

Create a transmission unit component.

Parameters:

Name Type Description Default
row NDArray

Array containing the values of one table row, represeting one Transmission object.

required
indices list[str, int]

Mapping of table's Column names to the array's indices.

required
meta_columns set[str]

Set of columns used to tag object with memberships.

required
meta_data DataFrame

Dictionary containing at least unit of every column.

required
attribute_objects dict[str, tuple[object, dict[str, Meta]]] | None

NOT USED

None

Returns:

Type Description
dict[str, Transmission]

dict[str, Transmission]: A dictionary with the transmission_id as key and the transmission unit as value.

Source code in framdata/database_names/TransmissionNames.py
@staticmethod
def create_component(
    row: NDArray,
    indices: dict[str, int],
    meta_columns: set[str],
    meta_data: pd.DataFrame,
    attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
) -> dict[str, Transmission]:
    """
    Create a transmission unit component.

    Args:
        row (NDArray): Array containing the values of one table row, represeting one Transmission object.
        indices (list[str, int]): Mapping of table's Column names to the array's indices.
        meta_columns (set[str]): Set of columns used to tag object with memberships.
        meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
        attribute_objects (dict[str, tuple[object, dict[str, Meta]]] | None, optional): NOT USED

    Returns:
        dict[str, Transmission]: A dictionary with the transmission_id as key and the transmission unit as value.

    """
    columns_to_parse = [
        TransmissionNames.capacity_col,
        TransmissionNames.loss_col,
        TransmissionNames.tariff_col,
        TransmissionNames.max_op_bound_col,
        TransmissionNames.min_op_bound_col,
        TransmissionNames.ramp_up_col,
        TransmissionNames.ramp_down_col,
    ]

    arg_user_code = TransmissionNames._parse_args(row, indices, columns_to_parse, meta_data)

    ramp_up = None if arg_user_code[TransmissionNames.ramp_up_col] is None else Proportion(level=arg_user_code[TransmissionNames.ramp_up_col])
    ramp_down = None if arg_user_code[TransmissionNames.ramp_down_col] is None else Proportion(level=arg_user_code[TransmissionNames.ramp_down_col])
    loss = None if arg_user_code[TransmissionNames.loss_col] is None else Loss(level=arg_user_code[TransmissionNames.loss_col])

    tariff = None if arg_user_code[TransmissionNames.tariff_col] is None else Cost(level=arg_user_code[TransmissionNames.tariff_col])

    min_capacity = (
        None
        if arg_user_code[TransmissionNames.min_op_bound_col] is None
        else MaxFlowVolume(
            level=arg_user_code[TransmissionNames.capacity_col],
            profile=arg_user_code[TransmissionNames.min_op_bound_col],
        )
    )

    transmission = Transmission(
        from_node=row[indices[TransmissionNames.from_node_col]],
        to_node=row[indices[TransmissionNames.to_node_col]],
        max_capacity=MaxFlowVolume(
            level=arg_user_code[TransmissionNames.capacity_col],
            profile=arg_user_code[TransmissionNames.max_op_bound_col],
        ),
        min_capacity=min_capacity,
        loss=loss,
        tariff=tariff,
        ramp_up=ramp_up,
        ramp_down=ramp_down,
    )
    TransmissionNames._add_meta(transmission, row, indices, meta_columns)

    return {row[indices[TransmissionNames.id_col]]: transmission}
get_attribute_data_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for attribute data in the Transmission.Grid file.

Returns:

Type Description
DataFrameModel

pa.DataFrameModel: Pandera DataFrameModel schema for Transmission attribute data.

Source code in framdata/database_names/TransmissionNames.py
@staticmethod
def get_attribute_data_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for attribute data in the Transmission.Grid file.

    Returns:
        pa.DataFrameModel: Pandera DataFrameModel schema for Transmission attribute data.

    """
    return TransmissionSchema
get_metadata_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for the metadata table in the Transmission.Grid file.

Returns:

Type Description
DataFrameModel

pa.DataFrameModel: Pandera DataFrameModel schema for the Transmission metadata.

Source code in framdata/database_names/TransmissionNames.py
@staticmethod
def get_metadata_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for the metadata table in the Transmission.Grid file.

    Returns:
        pa.DataFrameModel: Pandera DataFrameModel schema for the Transmission metadata.

    """
    return TransmissionMetadataSchema
TransmissionSchema

Bases: DataFrameModel

Pandera DataFrameModel schema for attribute data in the Transmission.Grid file.

Source code in framdata/database_names/TransmissionNames.py
class TransmissionSchema(pa.DataFrameModel):
    """Pandera DataFrameModel schema for attribute data in the Transmission.Grid file."""

    TransmissionID: Series[str] = pa.Field(unique=True, nullable=False)
    FromNode: Series[str] = pa.Field(nullable=False)
    ToNode: Series[str] = pa.Field(nullable=False)
    Capacity: Series[Any] = pa.Field(nullable=False)
    Loss: Series[Any] = pa.Field(nullable=True)
    Tariff: Series[Any] = pa.Field(nullable=True)
    MaxOperationalBound: Series[Any] = pa.Field(nullable=True)
    MinOperationalBound: Series[Any] = pa.Field(nullable=True)
    RampUp: Series[Any] = pa.Field(nullable=True)
    RampDown: Series[Any] = pa.Field(nullable=True)

    @pa.check(TransmissionNames.capacity_col)
    @classmethod
    def dtype_str_int_float(cls, series: Series[Any]) -> Series[bool]:
        """Check if values in the series are of datatype: str, int or float."""
        return dtype_str_int_float(series)

    @pa.check(
        TransmissionNames.loss_col,
        TransmissionNames.tariff_col,
        TransmissionNames.max_op_bound_col,
        TransmissionNames.min_op_bound_col,
        TransmissionNames.ramp_up_col,
        TransmissionNames.ramp_down_col,
    )
    @classmethod
    def dtype_str_int_float_none(cls, series: Series[Any]) -> Series[bool]:
        """Check if values in the series are of datatype: str, int, float or None."""
        return dtype_str_int_float_none(series)

    @pa.check(TransmissionNames.capacity_col)
    @classmethod
    def numeric_values_greater_than_or_equal_to_0(cls, series: Series[Any]) -> Series[bool]:
        """Check if numeric values in the series are greater than or equal to zero."""
        return numeric_values_greater_than_or_equal_to(series, 0)

    @pa.check(TransmissionNames.loss_col)
    @classmethod
    def numeric_values_are_between_or_equal_to_0_and_1(cls, series: Series[Any]) -> Series[bool]:
        """Check if numeric values in the series are between zero and one or equal to zero and one."""
        return numeric_values_are_between_or_equal_to(series, 0, 1)

    @pa.dataframe_check
    @classmethod
    def check_internal_line_error(cls, dataframe: pd.DataFrame) -> Series[bool]:
        """
        Raise warning if origin node is the same as destination node, in which case we have an internal line.

        Args:
            dataframe (pd.DataFrame): DataFrame to check.

        Returns:
            Series[bool]: Series of boolean values denoting if each element has passed the check.

        """
        return dataframe[TransmissionNames.from_node_col] != dataframe[TransmissionNames.to_node_col]

    class Config:
        """Schema-wide configuration for the DemandSchema class."""

        unique_column_names = True
Config

Schema-wide configuration for the DemandSchema class.

Source code in framdata/database_names/TransmissionNames.py
class Config:
    """Schema-wide configuration for the DemandSchema class."""

    unique_column_names = True
check_internal_line_error(dataframe: pd.DataFrame) -> Series[bool] classmethod

Raise warning if origin node is the same as destination node, in which case we have an internal line.

Parameters:

Name Type Description Default
dataframe DataFrame

DataFrame to check.

required

Returns:

Type Description
Series[bool]

Series[bool]: Series of boolean values denoting if each element has passed the check.

Source code in framdata/database_names/TransmissionNames.py
@pa.dataframe_check
@classmethod
def check_internal_line_error(cls, dataframe: pd.DataFrame) -> Series[bool]:
    """
    Raise warning if origin node is the same as destination node, in which case we have an internal line.

    Args:
        dataframe (pd.DataFrame): DataFrame to check.

    Returns:
        Series[bool]: Series of boolean values denoting if each element has passed the check.

    """
    return dataframe[TransmissionNames.from_node_col] != dataframe[TransmissionNames.to_node_col]
dtype_str_int_float(series: Series[Any]) -> Series[bool] classmethod

Check if values in the series are of datatype: str, int or float.

Source code in framdata/database_names/TransmissionNames.py
@pa.check(TransmissionNames.capacity_col)
@classmethod
def dtype_str_int_float(cls, series: Series[Any]) -> Series[bool]:
    """Check if values in the series are of datatype: str, int or float."""
    return dtype_str_int_float(series)
dtype_str_int_float_none(series: Series[Any]) -> Series[bool] classmethod

Check if values in the series are of datatype: str, int, float or None.

Source code in framdata/database_names/TransmissionNames.py
@pa.check(
    TransmissionNames.loss_col,
    TransmissionNames.tariff_col,
    TransmissionNames.max_op_bound_col,
    TransmissionNames.min_op_bound_col,
    TransmissionNames.ramp_up_col,
    TransmissionNames.ramp_down_col,
)
@classmethod
def dtype_str_int_float_none(cls, series: Series[Any]) -> Series[bool]:
    """Check if values in the series are of datatype: str, int, float or None."""
    return dtype_str_int_float_none(series)
numeric_values_are_between_or_equal_to_0_and_1(series: Series[Any]) -> Series[bool] classmethod

Check if numeric values in the series are between zero and one or equal to zero and one.

Source code in framdata/database_names/TransmissionNames.py
@pa.check(TransmissionNames.loss_col)
@classmethod
def numeric_values_are_between_or_equal_to_0_and_1(cls, series: Series[Any]) -> Series[bool]:
    """Check if numeric values in the series are between zero and one or equal to zero and one."""
    return numeric_values_are_between_or_equal_to(series, 0, 1)
numeric_values_greater_than_or_equal_to_0(series: Series[Any]) -> Series[bool] classmethod

Check if numeric values in the series are greater than or equal to zero.

Source code in framdata/database_names/TransmissionNames.py
@pa.check(TransmissionNames.capacity_col)
@classmethod
def numeric_values_greater_than_or_equal_to_0(cls, series: Series[Any]) -> Series[bool]:
    """Check if numeric values in the series are greater than or equal to zero."""
    return numeric_values_greater_than_or_equal_to(series, 0)

WindSolarNames

Classes defining Wind and Solar tables and how to create Components from them.

SolarNames

Bases: WindSolarNames

Class representing the names and structure of Solar tables, and method for creating Solar Component objects.

Source code in framdata/database_names/WindSolarNames.py
class SolarNames(WindSolarNames):
    """Class representing the names and structure of Solar tables, and method for creating Solar Component objects."""

    id_col = "SolarID"

    columns: ClassVar[list[str]] = [
        id_col,
        WindSolarNames.power_node_col,
        WindSolarNames.profile_col,
        WindSolarNames.capacity_col,
    ]

    @staticmethod
    def create_component(
        row: NDArray,
        indices: dict[str, int],
        meta_columns: set[str],
        meta_data: pd.DataFrame,
        attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
    ) -> dict[str, Solar]:
        """
        Create a Solar Component from a row in the Solar.Generators table.

        Args:
            row (NDArray): Array containing the values of one table row, represeting one solar object.
            indices (list[str, int]): Mapping of table's Column names to the array's indices.
            meta_columns (set[str]): Set of columns used to tag object with memberships.
            meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
            attribute_objects (dict[str, tuple[object, dict[str, Meta]]] | None, optional): NOT USED

        Returns:
            dict[str, Solar]: A dictionary with the id as key and the solar unit as value.

        """
        columns_to_parse = [
            SolarNames.profile_col,
            SolarNames.capacity_col,
        ]

        arg_user_code = SolarNames._parse_args(row, indices, columns_to_parse, meta_data)

        solar = Solar(
            power_node=row[indices[SolarNames.power_node_col]],
            max_capacity=MaxFlowVolume(
                level=arg_user_code[SolarNames.capacity_col],
                profile=arg_user_code[SolarNames.profile_col],
            ),
            voc=None,
        )

        SolarNames._add_meta(solar, row, indices, meta_columns)

        return {row[indices[SolarNames.id_col]]: solar}
create_component(row: NDArray, indices: dict[str, int], meta_columns: set[str], meta_data: pd.DataFrame, attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None) -> dict[str, Solar] staticmethod

Create a Solar Component from a row in the Solar.Generators table.

Parameters:

Name Type Description Default
row NDArray

Array containing the values of one table row, represeting one solar object.

required
indices list[str, int]

Mapping of table's Column names to the array's indices.

required
meta_columns set[str]

Set of columns used to tag object with memberships.

required
meta_data DataFrame

Dictionary containing at least unit of every column.

required
attribute_objects dict[str, tuple[object, dict[str, Meta]]] | None

NOT USED

None

Returns:

Type Description
dict[str, Solar]

dict[str, Solar]: A dictionary with the id as key and the solar unit as value.

Source code in framdata/database_names/WindSolarNames.py
@staticmethod
def create_component(
    row: NDArray,
    indices: dict[str, int],
    meta_columns: set[str],
    meta_data: pd.DataFrame,
    attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
) -> dict[str, Solar]:
    """
    Create a Solar Component from a row in the Solar.Generators table.

    Args:
        row (NDArray): Array containing the values of one table row, represeting one solar object.
        indices (list[str, int]): Mapping of table's Column names to the array's indices.
        meta_columns (set[str]): Set of columns used to tag object with memberships.
        meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
        attribute_objects (dict[str, tuple[object, dict[str, Meta]]] | None, optional): NOT USED

    Returns:
        dict[str, Solar]: A dictionary with the id as key and the solar unit as value.

    """
    columns_to_parse = [
        SolarNames.profile_col,
        SolarNames.capacity_col,
    ]

    arg_user_code = SolarNames._parse_args(row, indices, columns_to_parse, meta_data)

    solar = Solar(
        power_node=row[indices[SolarNames.power_node_col]],
        max_capacity=MaxFlowVolume(
            level=arg_user_code[SolarNames.capacity_col],
            profile=arg_user_code[SolarNames.profile_col],
        ),
        voc=None,
    )

    SolarNames._add_meta(solar, row, indices, meta_columns)

    return {row[indices[SolarNames.id_col]]: solar}
WindNames

Bases: WindSolarNames

Class representing the names and structure of Wind tables, and method for creating Wind Component objects.

Source code in framdata/database_names/WindSolarNames.py
class WindNames(WindSolarNames):
    """Class representing the names and structure of Wind tables, and method for creating Wind Component objects."""

    id_col = "WindID"

    columns: ClassVar[list[str]] = [
        id_col,
        WindSolarNames.power_node_col,
        WindSolarNames.profile_col,
        WindSolarNames.capacity_col,
    ]

    @staticmethod
    def create_component(
        row: NDArray,
        indices: dict[str, int],
        meta_columns: set[str],
        meta_data: pd.DataFrame,
        attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
    ) -> dict[str, Wind]:
        """
        Create a Wind Component from a row in the Wind.Generators table.

        Args:
            row (NDArray): Array containing the values of one table row, represeting one Wind object.
            indices (list[str, int]): Mapping of table's Column names to the array's indices.
            meta_columns (set[str]): Set of columns used to tag object with memberships.
            meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
            attribute_objects (dict[str, tuple[object, dict[str, Meta]]] | None, optional): NOT USED

        Returns:
            dict[str, Wind]: A dictionary with the wind_id as key and the wind unit as value.

        """
        columns_to_parse = [
            WindNames.profile_col,
            WindNames.capacity_col,
        ]

        arg_user_code = WindNames._parse_args(row, indices, columns_to_parse, meta_data)

        wind = Wind(
            power_node=row[indices[WindNames.power_node_col]],
            max_capacity=MaxFlowVolume(
                level=arg_user_code[WindNames.capacity_col],
                profile=arg_user_code[WindNames.profile_col],
            ),
            voc=None,
        )
        WindNames._add_meta(wind, row, indices, meta_columns)

        return {row[indices[WindNames.id_col]]: wind}
create_component(row: NDArray, indices: dict[str, int], meta_columns: set[str], meta_data: pd.DataFrame, attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None) -> dict[str, Wind] staticmethod

Create a Wind Component from a row in the Wind.Generators table.

Parameters:

Name Type Description Default
row NDArray

Array containing the values of one table row, represeting one Wind object.

required
indices list[str, int]

Mapping of table's Column names to the array's indices.

required
meta_columns set[str]

Set of columns used to tag object with memberships.

required
meta_data DataFrame

Dictionary containing at least unit of every column.

required
attribute_objects dict[str, tuple[object, dict[str, Meta]]] | None

NOT USED

None

Returns:

Type Description
dict[str, Wind]

dict[str, Wind]: A dictionary with the wind_id as key and the wind unit as value.

Source code in framdata/database_names/WindSolarNames.py
@staticmethod
def create_component(
    row: NDArray,
    indices: dict[str, int],
    meta_columns: set[str],
    meta_data: pd.DataFrame,
    attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
) -> dict[str, Wind]:
    """
    Create a Wind Component from a row in the Wind.Generators table.

    Args:
        row (NDArray): Array containing the values of one table row, represeting one Wind object.
        indices (list[str, int]): Mapping of table's Column names to the array's indices.
        meta_columns (set[str]): Set of columns used to tag object with memberships.
        meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
        attribute_objects (dict[str, tuple[object, dict[str, Meta]]] | None, optional): NOT USED

    Returns:
        dict[str, Wind]: A dictionary with the wind_id as key and the wind unit as value.

    """
    columns_to_parse = [
        WindNames.profile_col,
        WindNames.capacity_col,
    ]

    arg_user_code = WindNames._parse_args(row, indices, columns_to_parse, meta_data)

    wind = Wind(
        power_node=row[indices[WindNames.power_node_col]],
        max_capacity=MaxFlowVolume(
            level=arg_user_code[WindNames.capacity_col],
            profile=arg_user_code[WindNames.profile_col],
        ),
        voc=None,
    )
    WindNames._add_meta(wind, row, indices, meta_columns)

    return {row[indices[WindNames.id_col]]: wind}
WindSolarMetadataSchema

Bases: _AttributeMetadataSchema

Standard Pandera DataFrameModel schema for metadata in the Wind and Solar files.

Source code in framdata/database_names/WindSolarNames.py
class WindSolarMetadataSchema(_AttributeMetadataSchema):
    """Standard Pandera DataFrameModel schema for metadata in the Wind and Solar files."""

    @pa.dataframe_check
    @classmethod
    def check_unit_is_str_for_attributes(cls, df: pd.DataFrame) -> Series[bool]:
        """
        Check that the 'unit' value is a string for the row where 'attribute' is 'Capacity'.

        Args:
            df (Dataframe): DataFrame used to check value for "unit".

        Returns:
            Series[bool]: Series of boolean values detonating if each element has passed the check.

        """
        return check_unit_is_str_for_attributes(df, [WindSolarNames.capacity_col])
check_unit_is_str_for_attributes(df: pd.DataFrame) -> Series[bool] classmethod

Check that the 'unit' value is a string for the row where 'attribute' is 'Capacity'.

Parameters:

Name Type Description Default
df Dataframe

DataFrame used to check value for "unit".

required

Returns:

Type Description
Series[bool]

Series[bool]: Series of boolean values detonating if each element has passed the check.

Source code in framdata/database_names/WindSolarNames.py
@pa.dataframe_check
@classmethod
def check_unit_is_str_for_attributes(cls, df: pd.DataFrame) -> Series[bool]:
    """
    Check that the 'unit' value is a string for the row where 'attribute' is 'Capacity'.

    Args:
        df (Dataframe): DataFrame used to check value for "unit".

    Returns:
        Series[bool]: Series of boolean values detonating if each element has passed the check.

    """
    return check_unit_is_str_for_attributes(df, [WindSolarNames.capacity_col])
WindSolarNames

Bases: _BaseComponentsNames

Class representing the names and structure of Wind and Solar tables.

Source code in framdata/database_names/WindSolarNames.py
class WindSolarNames(_BaseComponentsNames):
    """Class representing the names and structure of Wind and Solar tables."""

    power_node_col = "PowerNode"
    profile_col = "Profile"
    type_col = "TechnologyType"
    capacity_col = "Capacity"

    ref_columns: ClassVar[list[str]] = [
        power_node_col,
        profile_col,
        capacity_col,
    ]

    @staticmethod
    def get_attribute_data_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for attribute data in a Wind and Solar file.

        Returns:
            pa.DataFrameModel: Pandera DataFrameModel schema for Wind and Solar attribute data.

        """
        return WindSolarSchema

    @staticmethod
    def get_metadata_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for the metadata table in a Wind and Solar file.

        Returns:
            pa.DataFrameModel: Pandera DataFrameModel schema for the Thermal metadata.

        """
        return WindSolarMetadataSchema

    @staticmethod
    def _get_unique_check_descriptions() -> dict[str, tuple[str, bool]]:
        """
        Retrieve a dictionary with descriptons of validation checks that are specific to the Wind and Solar schemas.

        Returns:
            dict[str, tuple[str, bool]]: A dictionary where:
                - Keys (str): The name of the validation check method.
                - Values (tuple[str, bool]):
                    - The first element (str) provides a concise and user-friendly description of the check. E.g. what
                      caused the validation error or what is required for the check to pass.
                    - The second element (bool) indicates whether the check is a warning (True) or an error (False).


        """
        return None

    @staticmethod
    def _format_unique_checks(errors: pd.DataFrame) -> pd.DataFrame:
        """
        Format the error DataFrame according to the validation checks that are specific to the Wind and Solar schemas.

        Args:
            errors (pd.DataFrame): The error DataFrame containing validation errors.

        Returns:
            pd.DataFrame: The updated error DataFrame with formatted rows for unique validation checks.

        """
        return None
get_attribute_data_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for attribute data in a Wind and Solar file.

Returns:

Type Description
DataFrameModel

pa.DataFrameModel: Pandera DataFrameModel schema for Wind and Solar attribute data.

Source code in framdata/database_names/WindSolarNames.py
@staticmethod
def get_attribute_data_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for attribute data in a Wind and Solar file.

    Returns:
        pa.DataFrameModel: Pandera DataFrameModel schema for Wind and Solar attribute data.

    """
    return WindSolarSchema
get_metadata_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for the metadata table in a Wind and Solar file.

Returns:

Type Description
DataFrameModel

pa.DataFrameModel: Pandera DataFrameModel schema for the Thermal metadata.

Source code in framdata/database_names/WindSolarNames.py
@staticmethod
def get_metadata_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for the metadata table in a Wind and Solar file.

    Returns:
        pa.DataFrameModel: Pandera DataFrameModel schema for the Thermal metadata.

    """
    return WindSolarMetadataSchema
WindSolarSchema

Bases: DataFrameModel

Standard Pandera DataFrameModel schema for attribute data in the Wind and Solar files.

Source code in framdata/database_names/WindSolarNames.py
class WindSolarSchema(pa.DataFrameModel):
    """Standard Pandera DataFrameModel schema for attribute data in the Wind and Solar files."""

    ID: Series[str] = pa.Field(unique=True, nullable=False)
    Capacity: Series[Any] = pa.Field(nullable=False)
    PowerNode: Series[str] = pa.Field(nullable=False)
    Profile: Series[Any] = pa.Field(nullable=True)

    @pa.check(WindSolarNames.capacity_col)
    @classmethod
    def dtype_str_int_float(cls, series: Series[Any]) -> Series[bool]:
        """Check if values in the series are of datatype: str, int or float."""
        return dtype_str_int_float(series)

    @pa.check(WindSolarNames.profile_col)
    @classmethod
    def dtype_str_int_float_none(cls, series: Series[Any]) -> Series[bool]:
        """Check if values in the series are of datatype: str, int, float or None."""
        return dtype_str_int_float_none(series)
dtype_str_int_float(series: Series[Any]) -> Series[bool] classmethod

Check if values in the series are of datatype: str, int or float.

Source code in framdata/database_names/WindSolarNames.py
@pa.check(WindSolarNames.capacity_col)
@classmethod
def dtype_str_int_float(cls, series: Series[Any]) -> Series[bool]:
    """Check if values in the series are of datatype: str, int or float."""
    return dtype_str_int_float(series)
dtype_str_int_float_none(series: Series[Any]) -> Series[bool] classmethod

Check if values in the series are of datatype: str, int, float or None.

Source code in framdata/database_names/WindSolarNames.py
@pa.check(WindSolarNames.profile_col)
@classmethod
def dtype_str_int_float_none(cls, series: Series[Any]) -> Series[bool]:
    """Check if values in the series are of datatype: str, int, float or None."""
    return dtype_str_int_float_none(series)

YamlNames

Define names and fields used in yaml files.

YamlNames

Contain names in yaml files.

Source code in framdata/database_names/YamlNames.py
class YamlNames:
    """Contain names in yaml files."""

    encoding = "utf-8"

    metadata_field = "Metadata"
    x_field = "X"
    y_field = "Y"

    # ========= Metadata fields =========
    attribute = "Attribute"
    description = "Description"
    dtype = "Dtype"
    unit = "Unit"

nodes_names

Define class for handling tables with Nodes.

EmissionNodesNames

Bases: NodesNames

Class representing the names and structure of emission nodes tables.

Source code in framdata/database_names/nodes_names.py
class EmissionNodesNames(NodesNames):
    """Class representing the names and structure of emission nodes tables."""

    filename = "Emission.Nodes"

    tax_col = "Tax"  # deprecated?
FuelNodesNames

Bases: NodesNames

Class representing the names and structure of fuel nodes tables.

Source code in framdata/database_names/nodes_names.py
class FuelNodesNames(NodesNames):
    """Class representing the names and structure of fuel nodes tables."""

    filename = "Fuel.Nodes"

    emission_coefficient_col = "EmissionCoefficient"
    tax_col = "Tax"  # deprecated?
NodesMetadataSchema

Bases: _AttributeMetadataSchema

Standard Pandera DataFrameModel schema for metadata in the Nodes files.

Source code in framdata/database_names/nodes_names.py
class NodesMetadataSchema(_AttributeMetadataSchema):
    """Standard Pandera DataFrameModel schema for metadata in the Nodes files."""

    @pa.dataframe_check
    @classmethod
    def check_unit_is_str_for_attributes(cls, df: pd.DataFrame) -> Series[bool]:
        """
        Check that the 'unit' value is a string for the row where 'attribute' is 'Capacity'.

        Args:
            df (Dataframe): DataFrame used to check value for "unit".

        Returns:
            Series[bool]: Series of boolean values detonating if each element has passed the check.

        """
        return check_unit_is_str_for_attributes(df, [NodesNames.price_col])
check_unit_is_str_for_attributes(df: pd.DataFrame) -> Series[bool] classmethod

Check that the 'unit' value is a string for the row where 'attribute' is 'Capacity'.

Parameters:

Name Type Description Default
df Dataframe

DataFrame used to check value for "unit".

required

Returns:

Type Description
Series[bool]

Series[bool]: Series of boolean values detonating if each element has passed the check.

Source code in framdata/database_names/nodes_names.py
@pa.dataframe_check
@classmethod
def check_unit_is_str_for_attributes(cls, df: pd.DataFrame) -> Series[bool]:
    """
    Check that the 'unit' value is a string for the row where 'attribute' is 'Capacity'.

    Args:
        df (Dataframe): DataFrame used to check value for "unit".

    Returns:
        Series[bool]: Series of boolean values detonating if each element has passed the check.

    """
    return check_unit_is_str_for_attributes(df, [NodesNames.price_col])
NodesNames

Bases: _BaseComponentsNames

Class representing the names and structure of nodes tables, and the convertion of the table to Node objects.

Source code in framdata/database_names/nodes_names.py
class NodesNames(_BaseComponentsNames):
    """Class representing the names and structure of nodes tables, and the convertion of the table to Node objects."""

    id_col = "NodeID"

    commodity_col = "Commodity"
    nice_name = "NiceName"
    price_col = "ExogenousPrice"
    profile_col = "PriceProfile"
    exogenous_col = "IsExogenous"

    columns: ClassVar[list[str]] = [id_col, nice_name, commodity_col, price_col, profile_col, exogenous_col]

    ref_columns: ClassVar[list[str]] = [price_col, profile_col]

    @staticmethod
    def create_component(
        row: NDArray,
        indices: dict[str, int],
        meta_columns: set[str],
        meta_data: pd.DataFrame,
        attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
    ) -> tuple[dict[str, Node], list[str]]:
        """
        Create a node object from direct parameters.

        Args:
            row (NDArray): Array containing the values of one table row, represeting one Node object.
            indices (list[str, int]): Mapping of table's Column names to the array's indices.
            meta_columns (list[str]): Set of columns which defines memberships in meta groups for aggregation.
            meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
            attribute_objects (dict[str, tuple[object, dict[str, Meta]]], optional): NOT USED

        Returns:
            dict[str, Node]: Dictionary of node id and the Node object.

        """
        columns_to_parse = [
            NodesNames.price_col,
            NodesNames.profile_col,
        ]

        arg_user_code = NodesNames._parse_args(row, indices, columns_to_parse, meta_data)
        price = None
        if arg_user_code[NodesNames.price_col] is not None:
            price = Price(
                level=arg_user_code[NodesNames.price_col],
                profile=arg_user_code[NodesNames.profile_col],
            )

        node = Node(
            row[indices[NodesNames.commodity_col]],
            is_exogenous=row[indices[NodesNames.exogenous_col]],
            price=price,
        )
        NodesNames._add_meta(node, row, indices, meta_columns)
        return {str(row[indices[NodesNames.id_col]]): node}

    @staticmethod
    def get_attribute_data_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for attribute data in a Nodes file.

        Returns:
            pa.DataFrameModel: Pandera DataFrameModel schema for Nodes attribute data.

        """
        return NodesSchema

    @staticmethod
    def get_metadata_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for the metadata table in a Nodes file.

        Returns:
            pa.DataFrameModel: Pandera DataFrameModel schema for the Thermal metadata.

        """
        return NodesMetadataSchema

    @staticmethod
    def _get_unique_check_descriptions() -> dict[str, tuple[str, bool]]:
        """
        Retrieve a dictionary with descriptons of validation checks that are specific to the Nodes schemas.

        Returns:
            dict[str, tuple[str, bool]]: A dictionary where:
                - Keys (str): The name of the validation check method.
                - Values (tuple[str, bool]):
                    - The first element (str) provides a concise and user-friendly description of the check. E.g. what
                      caused the validation error or what is required for the check to pass.
                    - The second element (bool) indicates whether the check is a warning (True) or an error (False).


        """
        return {
            NodesSchema.check_exogenous_price.__name__: ("Missing price value for exogenous Node.", True),
        }

    @staticmethod
    def _format_unique_checks(errors: pd.DataFrame) -> pd.DataFrame:
        """
        Format the error DataFrame according to the validation checks that are specific to the Nodes schemas.

        Args:
            errors (pd.DataFrame): The error DataFrame containing validation errors.

        Returns:
            pd.DataFrame: The updated error DataFrame with formatted rows for unique validation checks.

        """
        if NodesSchema.check_exogenous_price.__name__ in errors[NodesNames.COL_CHECK].to_numpy():
            check_rows = errors.loc[errors[NodesNames.COL_CHECK] == NodesSchema.check_exogenous_price.__name__]
            errors = errors[~(errors[NodesNames.COL_CHECK] == NodesSchema.check_exogenous_price.__name__)]
            price_exogenous_cols = [NodesNames.id_col, NodesNames.price_col, NodesNames.exogenous_col]
            check_description_str = check_rows[NodesNames.COL_CHECK_DESC].unique()[0]
            price_exogenous_rows = []

            for idx in check_rows[NodesNames.COL_IDX].unique():
                check_case = check_rows[check_rows[NodesNames.COL_IDX] == idx]

                for col in price_exogenous_cols:
                    if col not in list(check_case[NodesNames.COL_COLUMN].unique()):
                        price_exogenous_rows.append(
                            [
                                col,
                                NodesSchema.check_exogenous_price.__name__,
                                None,
                                idx,
                                check_description_str,
                                True,
                            ],
                        )
            errors = pd.concat([errors, pd.DataFrame(price_exogenous_rows, columns=errors.columns)], ignore_index=True)
        return errors
create_component(row: NDArray, indices: dict[str, int], meta_columns: set[str], meta_data: pd.DataFrame, attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None) -> tuple[dict[str, Node], list[str]] staticmethod

Create a node object from direct parameters.

Parameters:

Name Type Description Default
row NDArray

Array containing the values of one table row, represeting one Node object.

required
indices list[str, int]

Mapping of table's Column names to the array's indices.

required
meta_columns list[str]

Set of columns which defines memberships in meta groups for aggregation.

required
meta_data DataFrame

Dictionary containing at least unit of every column.

required
attribute_objects dict[str, tuple[object, dict[str, Meta]]]

NOT USED

None

Returns:

Type Description
tuple[dict[str, Node], list[str]]

dict[str, Node]: Dictionary of node id and the Node object.

Source code in framdata/database_names/nodes_names.py
@staticmethod
def create_component(
    row: NDArray,
    indices: dict[str, int],
    meta_columns: set[str],
    meta_data: pd.DataFrame,
    attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
) -> tuple[dict[str, Node], list[str]]:
    """
    Create a node object from direct parameters.

    Args:
        row (NDArray): Array containing the values of one table row, represeting one Node object.
        indices (list[str, int]): Mapping of table's Column names to the array's indices.
        meta_columns (list[str]): Set of columns which defines memberships in meta groups for aggregation.
        meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
        attribute_objects (dict[str, tuple[object, dict[str, Meta]]], optional): NOT USED

    Returns:
        dict[str, Node]: Dictionary of node id and the Node object.

    """
    columns_to_parse = [
        NodesNames.price_col,
        NodesNames.profile_col,
    ]

    arg_user_code = NodesNames._parse_args(row, indices, columns_to_parse, meta_data)
    price = None
    if arg_user_code[NodesNames.price_col] is not None:
        price = Price(
            level=arg_user_code[NodesNames.price_col],
            profile=arg_user_code[NodesNames.profile_col],
        )

    node = Node(
        row[indices[NodesNames.commodity_col]],
        is_exogenous=row[indices[NodesNames.exogenous_col]],
        price=price,
    )
    NodesNames._add_meta(node, row, indices, meta_columns)
    return {str(row[indices[NodesNames.id_col]]): node}
get_attribute_data_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for attribute data in a Nodes file.

Returns:

Type Description
DataFrameModel

pa.DataFrameModel: Pandera DataFrameModel schema for Nodes attribute data.

Source code in framdata/database_names/nodes_names.py
@staticmethod
def get_attribute_data_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for attribute data in a Nodes file.

    Returns:
        pa.DataFrameModel: Pandera DataFrameModel schema for Nodes attribute data.

    """
    return NodesSchema
get_metadata_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for the metadata table in a Nodes file.

Returns:

Type Description
DataFrameModel

pa.DataFrameModel: Pandera DataFrameModel schema for the Thermal metadata.

Source code in framdata/database_names/nodes_names.py
@staticmethod
def get_metadata_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for the metadata table in a Nodes file.

    Returns:
        pa.DataFrameModel: Pandera DataFrameModel schema for the Thermal metadata.

    """
    return NodesMetadataSchema
NodesSchema

Bases: DataFrameModel

Standard Pandera DataFrameModel schema for attribute data in the Nodes files.

Source code in framdata/database_names/nodes_names.py
class NodesSchema(pa.DataFrameModel):
    """Standard Pandera DataFrameModel schema for attribute data in the Nodes files."""

    NodeID: Series[str] = pa.Field(unique=True, nullable=False)
    Commodity: Series[str] = pa.Field(nullable=False)
    ExogenousPrice: Series[Any] = pa.Field(nullable=True)
    PriceProfile: Series[Any] = pa.Field(nullable=True)
    IsExogenous: Series[bool] = pa.Field(nullable=False)

    @pa.check(NodesNames.price_col, NodesNames.profile_col)
    @classmethod
    def dtype_str_int_float_none(cls, series: Series[Any]) -> Series[bool]:
        """Check if values in the series are of datatype: str, int, float or None."""
        return dtype_str_int_float_none(series)

    @pa.dataframe_check
    @classmethod
    def check_exogenous_price(cls, df: DataFrame) -> Series[bool]:
        """Check that all elastic demand values are present if one or more is."""
        price = df[NodesNames.price_col]
        exogenous = df[NodesNames.exogenous_col]
        return ~pd.Series(price.isna() & exogenous)  # return should be false when price is None and Node is exogenous.
check_exogenous_price(df: DataFrame) -> Series[bool] classmethod

Check that all elastic demand values are present if one or more is.

Source code in framdata/database_names/nodes_names.py
@pa.dataframe_check
@classmethod
def check_exogenous_price(cls, df: DataFrame) -> Series[bool]:
    """Check that all elastic demand values are present if one or more is."""
    price = df[NodesNames.price_col]
    exogenous = df[NodesNames.exogenous_col]
    return ~pd.Series(price.isna() & exogenous)  # return should be false when price is None and Node is exogenous.
dtype_str_int_float_none(series: Series[Any]) -> Series[bool] classmethod

Check if values in the series are of datatype: str, int, float or None.

Source code in framdata/database_names/nodes_names.py
@pa.check(NodesNames.price_col, NodesNames.profile_col)
@classmethod
def dtype_str_int_float_none(cls, series: Series[Any]) -> Series[bool]:
    """Check if values in the series are of datatype: str, int, float or None."""
    return dtype_str_int_float_none(series)
PowerNodesNames

Bases: NodesNames

Class representing the names and structure of power nodes tables.

Source code in framdata/database_names/nodes_names.py
class PowerNodesNames(NodesNames):
    """Class representing the names and structure of power nodes tables."""

    filename = "Power.Nodes"

validation_functions

Module containing registered custom check functions used by Pandera schema classes.

check_unit_is_str_for_attributes(df: pd.DataFrame, attribute_names: list[str]) -> Series[bool]

Check if 'Unit' column values are strings for the rows where the 'Attribute' column matches specific attributes.

This function checks whether the values in the 'Unit' column are strings for rows where the 'Attribute' column matches any of the specified attribute names. Rows that do not match the specified attributes are considered valid by default. This function is commonly used by subclasses of 'AttributeMetadataSchema' to validate that a unit is given for certain attributes in the metadata belonging to a Component.

Parameters:

Name Type Description Default
df DataFrame

The DataFrame containing the columns to validate.

required
attribute_names list[str]

A list with the names of the attributes to check in the 'Attribute' column.

required

Returns:

Type Description
Series[bool]

Series[bool]: A boolean Series indicating whether each row passes the validation. Rows where the 'Attribute'

Series[bool]

column does not match the specified attribute are automatically marked as valid.

Example

Given the following DataFrame:

attribute unit
Volume MWh
Temperature None
Capacity None

And attribute_names = ["Volume", "Capacity"], the method will validate that the 'Unit' column contains strings for rows where 'attribute' is "Volume" and "Capacity". The resulting Series will be:

validation_result
True
True
False
Source code in framdata/database_names/validation_functions.py
@extensions.register_check_method()
def check_unit_is_str_for_attributes(df: pd.DataFrame, attribute_names: list[str]) -> Series[bool]:
    """
    Check if 'Unit' column values are strings for the rows where the 'Attribute' column matches specific attributes.

    This function checks whether the values in the 'Unit' column are strings for rows where the 'Attribute' column
    matches any of the specified attribute names. Rows that do not match the specified attributes are considered valid
    by default. This function is commonly used by subclasses of 'AttributeMetadataSchema' to validate that a unit is
    given for certain attributes in the metadata belonging to a Component.

    Args:
        df (pd.DataFrame): The DataFrame containing the columns to validate.
        attribute_names (list[str]): A list with the names of the attributes to check in the 'Attribute' column.

    Returns:
        Series[bool]: A boolean Series indicating whether each row passes the validation. Rows where the 'Attribute'
        column does not match the specified attribute are automatically marked as valid.

    Example:
        Given the following DataFrame:

        | attribute   | unit       |
        |-------------|------------|
        | Volume      | MWh        |
        | Temperature | None       |
        | Capacity    | None       |

        And `attribute_names = ["Volume", "Capacity"]`, the method will validate that the 'Unit' column contains strings
        for rows where 'attribute' is "Volume" and "Capacity". The resulting Series will be:

        | validation_result |
        |-------------------|
        | True              |
        | True              |
        | False             |

    """
    is_attribute_rows = df[_AttributeMetadataNames.attribute].isin(attribute_names)
    unit_is_str = df[_AttributeMetadataNames.unit].apply(lambda x: isinstance(x, str))
    return ~is_attribute_rows | unit_is_str
dtype_str_int_float(series: Series[Any]) -> Series[bool]

Check if the series contains only str, int or float values.

Parameters:

Name Type Description Default
series Series[Any]

Series to check.

required

Returns:

Type Description
Series[bool]

Series[bool]: Series of boolean values detonating if each element has passed the check.

Source code in framdata/database_names/validation_functions.py
@extensions.register_check_method()
def dtype_str_int_float(series: Series[Any]) -> Series[bool]:
    """
    Check if the series contains only str, int or float values.

    Args:
        series (Series[Any]): Series to check.

    Returns:
        Series[bool]: Series of boolean values detonating if each element has passed the check.

    """
    return series.apply(lambda value: isinstance(value, str | int | float))
dtype_str_int_float_none(series: Series[Any]) -> Series[bool]

Check if the series contains only str, int, float or None values.

Parameters:

Name Type Description Default
series Series[Any]

Series to check.

required

Returns:

Type Description
Series[bool]

Series[bool]: Series of boolean values detonating if each element has passed the check.

Source code in framdata/database_names/validation_functions.py
@extensions.register_check_method()
def dtype_str_int_float_none(series: Series[Any]) -> Series[bool]:
    """
    Check if the series contains only str, int, float or None values.

    Args:
        series (Series[Any]): Series to check.

    Returns:
        Series[bool]: Series of boolean values detonating if each element has passed the check.

    """
    return series.apply(lambda value: isinstance(value, str | int | float | type(None)))
numeric_values_are_between_or_equal_to(series: Series[Any], min_value: int | float, max_value: int | float) -> Series[bool]

Check if values are between or equal to a min and max value if they are of type int or float.

Parameters:

Name Type Description Default
series Series[Any]

Series to check.

required
min_value int | float

Value that the elements in the series should be greater than or equal.

required
max_value int | float

Value that the elements in the series should be less than or equal.

required

Returns:

Type Description
Series[bool]

Series[bool]: Series of boolean values detonating if each element has passed the check.

Source code in framdata/database_names/validation_functions.py
@extensions.register_check_method()
def numeric_values_are_between_or_equal_to(
    series: Series[Any],
    min_value: int | float,
    max_value: int | float,
) -> Series[bool]:
    """
    Check if values are between or equal to a min and max value if they are of type int or float.

    Args:
        series (Series[Any]): Series to check.
        min_value (int | float): Value that the elements in the series should be greater than or equal.
        max_value (int | float): Value that the elements in the series should be less than or equal.

    Returns:
        Series[bool]: Series of boolean values detonating if each element has passed the check.

    """
    if not isinstance(min_value, (int | float)) and not isinstance(max_value, (int | float)):
        message = "min and max value must be of type int or float."
        raise ValueError(message)
    return series.apply(lambda x: min_value <= x <= max_value if isinstance(x, (int | float)) else True)
numeric_values_greater_than(series: Series[Any], min_value: int | float) -> Series[bool]

Check if values are greater than or equal to min_value if they are of type int or float.

Parameters:

Name Type Description Default
series Series[Any]

Series to check.

required
min_value int | float

Value that the elements in the series should be greater than or equal.

required

Returns:

Type Description
Series[bool]

Series[bool]: Series of boolean values detonating if each element has passed the check.

Source code in framdata/database_names/validation_functions.py
@extensions.register_check_method()
def numeric_values_greater_than(series: Series[Any], min_value: int | float) -> Series[bool]:
    """
    Check if values are greater than or equal to min_value if they are of type int or float.

    Args:
        series (Series[Any]): Series to check.
        min_value (int | float): Value that the elements in the series should be greater than or equal.

    Returns:
        Series[bool]: Series of boolean values detonating if each element has passed the check.

    """
    if not isinstance(min_value, (int | float)):
        message = "min_value must be of type int or float."
        raise ValueError(message)
    return series.apply(lambda x: x > min_value if isinstance(x, (int | float)) else True)
numeric_values_greater_than_or_equal_to(series: Series[Any], min_value: int | float) -> Series[bool]

Check if values are greater than or equal to min_value if they are of type int or float.

Parameters:

Name Type Description Default
series Series[Any]

Series to check.

required
min_value int | float

Value that the elements in the series should be greater than or equal.

required

Returns:

Type Description
Series[bool]

Series[bool]: Series of boolean values detonating if each element has passed the check.

Source code in framdata/database_names/validation_functions.py
@extensions.register_check_method()
def numeric_values_greater_than_or_equal_to(series: Series[Any], min_value: int | float) -> Series[bool]:
    """
    Check if values are greater than or equal to min_value if they are of type int or float.

    Args:
        series (Series[Any]): Series to check.
        min_value (int | float): Value that the elements in the series should be greater than or equal.

    Returns:
        Series[bool]: Series of boolean values detonating if each element has passed the check.

    """
    if not isinstance(min_value, (int | float)):
        message = "min_value must be of type int or float."
        raise ValueError(message)
    return series.apply(lambda x: x >= min_value if isinstance(x, (int | float)) else True)
numeric_values_less_than_or_equal_to(series: Series[Any], max_value: int | float) -> Series[bool]

Check if values are less than or equal to max_value if they are of type int or float.

Parameters:

Name Type Description Default
series Series[Any]

Series to check.

required
max_value int | float

Value that the elements in the series should be greater than or equal.

required

Returns:

Type Description
Series[bool]

Series[bool]: Series of boolean values detonating if each element has passed the check.

Source code in framdata/database_names/validation_functions.py
@extensions.register_check_method()
def numeric_values_less_than_or_equal_to(series: Series[Any], max_value: int | float) -> Series[bool]:
    """
    Check if values are less than or equal to max_value if they are of type int or float.

    Args:
        series (Series[Any]): Series to check.
        max_value (int | float): Value that the elements in the series should be greater than or equal.

    Returns:
        Series[bool]: Series of boolean values detonating if each element has passed the check.

    """
    if not isinstance(max_value, (int | float)):
        message = "max_value must be of type int or float."
        raise ValueError(message)
    return series.apply(lambda x: x <= max_value if isinstance(x, (int | float)) else True)

file_editors

NVEFileEditor

Contain class with common functionality for editing files.

NVEFileEditor

Bases: Base

Parent class with common functionality for classes concerned with editing FRAM files.

Source code in framdata/file_editors/NVEFileEditor.py
class NVEFileEditor(Base):
    """Parent class with common functionality for classes concerned with editing FRAM files."""

    def __init__(self, source: Path | str | None = None) -> None:
        """
        Set path to parquet file if supplied, load/initialize table and metadata as pd.DataFrame and dictionary respectively.

        Args:
            source (Path | str | None, optional): Path to parquet file with timevectors. Defaults to None.

        """
        super().__init__()

        self._check_type(source, (Path, str, type(None)))
        self._source = None if source is None else Path(source)

    def get_source(self) -> Path:
        """Get the source file path of the editor."""
        return self._source

    def set_source(self, source: Path) -> None:
        """Set the source file path of the editor."""
        self._check_type(source, (Path, str))
        self._source = Path(source)
__init__(source: Path | str | None = None) -> None

Set path to parquet file if supplied, load/initialize table and metadata as pd.DataFrame and dictionary respectively.

Parameters:

Name Type Description Default
source Path | str | None

Path to parquet file with timevectors. Defaults to None.

None
Source code in framdata/file_editors/NVEFileEditor.py
def __init__(self, source: Path | str | None = None) -> None:
    """
    Set path to parquet file if supplied, load/initialize table and metadata as pd.DataFrame and dictionary respectively.

    Args:
        source (Path | str | None, optional): Path to parquet file with timevectors. Defaults to None.

    """
    super().__init__()

    self._check_type(source, (Path, str, type(None)))
    self._source = None if source is None else Path(source)
get_source() -> Path

Get the source file path of the editor.

Source code in framdata/file_editors/NVEFileEditor.py
def get_source(self) -> Path:
    """Get the source file path of the editor."""
    return self._source
set_source(source: Path) -> None

Set the source file path of the editor.

Source code in framdata/file_editors/NVEFileEditor.py
def set_source(self, source: Path) -> None:
    """Set the source file path of the editor."""
    self._check_type(source, (Path, str))
    self._source = Path(source)

NVEH5TimeVectorEditor

Contains class for editing time vectors in H5 files.

NVEH5TimeVectorEditor

Bases: NVEFileEditor

Class with functionality concerned with editing time vectors and their metadata in H5 files.

Structure of the NVE h5 files
  • common_index dataset: Contains a numpy array with index applied to all vectors missing a specific index.
  • index group of datasets: Contains indexes coupled to specific vectors by the vector IDs.
  • common_metadata group: Contains dictionary with metadata applied to all vectors missing a specific metadata dictionary.
  • metadata group of groups: Contains metadata dictionaries coupled to specific vectors by the vector IDs.
  • vectors group of datasets: Contains numpy arrays with the vector values.
Source code in framdata/file_editors/NVEH5TimeVectorEditor.py
class NVEH5TimeVectorEditor(NVEFileEditor):
    """
    Class with functionality concerned with editing time vectors and their metadata in H5 files.

    Structure of the NVE h5 files:
     - common_index dataset: Contains a numpy array with index applied to all vectors missing a specific index.
     - index group of datasets: Contains indexes coupled to specific vectors by the vector IDs.
     - common_metadata group: Contains dictionary with metadata applied to all vectors missing a specific metadata dictionary.
     - metadata group of groups: Contains metadata dictionaries coupled to specific vectors by the vector IDs.
     - vectors group of datasets: Contains numpy arrays with the vector values.

    """

    def __init__(self, source: Path | str | None = None) -> None:
        """
        Set path to parquet file if supplied, load/initialize table and metadata as pd.DataFrame and dictionary respectively.

        Args:
            source (Path | str | None, optional): Path to parquet file with timevectors. Defaults to None.

        """
        super().__init__(source)

        meta_tuple = ({}, None) if self._source is None or not self._source.exists() else self._read_data(H5Names.METADATA_GROUP, True)
        self._metadata, self._common_metadata = meta_tuple
        index_tuple = (defaultdict(NDArray), None) if self._source is None or not self._source.exists() else self._read_data(H5Names.INDEX_GROUP, False)
        self._index, self._common_index = index_tuple
        self._index = {k: v.astype(str) for k, v in self._index.items()}

        vectors_tuple = (defaultdict(NDArray), None) if self._source is None or not self._source.exists() else self._read_data(H5Names.VECTORS_GROUP, False)
        self._vectors, __ = vectors_tuple

    def get_metadata(self, vector_id: str) -> None | dict:
        """Get a copy of the metadata of all vectors in the h5 file."""
        try:
            return self._metadata[vector_id].copy()
        except KeyError as e:
            f"Found no ID '{vector_id}' in metadata."
            raise KeyError from e

    def set_metadata(self, vector_id: str, metadata: dict[str, TvMn.METADATA_TYPES]) -> None:
        """Set the metadata dictionary of a specific vector (overwrites existing)."""
        self._check_type(vector_id, str)
        self._check_type(metadata, dict)
        for key, value in metadata.items():
            self._check_type(key, str)
            self._check_type(value, TvMn.METADATA_TYPES_TUPLE)
        self._metadata[vector_id] = metadata

    def set_metadata_by_key(self, vector_id: str, key: str, value: TvMn.METADATA_TYPES) -> None:
        """Set a field (new or overwrite) in the metadata of a vector."""
        self._check_type(key, str)
        self._check_type(vector_id, str)
        self._check_type(value, TvMn.METADATA_TYPES_TUPLE)
        if vector_id not in self._metadata or not isinstance(self._metadata[vector_id], dict):
            self._metadata[vector_id] = {}
        self._metadata[vector_id][key] = value

    def get_common_metadata(self) -> None | dict:
        """Get a copy of the common metadata of vectors in the h5 file."""
        return self._common_metadata if self._common_metadata is None else self._common_metadata.copy()

    def set_common_metadata(self, metadata: dict[str, TvMn.METADATA_TYPES]) -> None:
        """Set the common metadata dictionary (overwrites existing)."""
        self._check_type(metadata, dict)
        self._common_metadata = metadata

    def set_common_metadata_by_key(self, key: str, value: TvMn.METADATA_TYPES) -> None:
        """Set a field (new or overwrite) in the common metadata."""
        self._check_type(key, str)
        self._check_type(value, TvMn.METADATA_TYPES_TUPLE)
        if self._common_metadata is None:
            self._common_metadata = {}
        self._common_metadata[key] = value

    def set_index(self, vector_id: str, index: NDArray) -> None:
        """
        Set the index of a vector.

        Index is paired with a vector of the same vector_id.

        """
        self._check_type(vector_id, str)
        self._check_type(index, np.ndarray)
        self._index[vector_id] = index

    def get_index(self, vector_id: str) -> NDArray:
        """Return a copy of a given index as a pandas series from the table."""
        try:
            return self._index[vector_id]
        except KeyError as e:
            f"Found no ID '{vector_id}' among indexes."
            raise KeyError from e

    def set_common_index(self, values: NDArray) -> None:
        """Set the common index which will be used for vectors which have not specified their own index by its ID."""
        self._check_type(values, np.ndarray)
        self._common_index = values

    def get_common_index(self) -> NDArray | None:
        """Return a copy of a given index as a pandas series from the table."""
        return self._common_index

    def set_vector(self, vector_id: str, values: NDArray) -> None:
        """Set vector values."""
        self._check_type(vector_id, str)
        self._check_type(values, np.ndarray)
        self._vectors[vector_id] = values

    def get_vector(self, vector_id: str) -> NDArray:
        """Return a copy of a given vector as a pandas series from the table."""
        try:
            return self._vectors[vector_id]
        except KeyError as e:
            msg = f"Found no ID '{vector_id}' among vectors."
            raise KeyError(msg) from e

    def get_vector_ids(self) -> list[str]:
        """Get the IDs of all vectors available in the file."""
        return list(self._vectors.keys())

    def save_to_h5(self, path: Path | str) -> None:
        """
        Store the data to h5 file.

        Args:
            path (Path | str): Path to save the file. Overwrites existing files.

        Raises:
            KeyError: If common index is None and there are vectors missing specific index.
            KeyError: If common metadata is None and there are vectors missing specific metadata.

        """
        self._check_type(path, (Path, str))
        path = Path(path)

        self._check_missing_indexes()
        self._check_missing_metadata()

        with h5py.File(path, mode="w") as f:
            if self._common_metadata is not None:
                common_meta_group = f.create_group(H5Names.COMMON_PREFIX + H5Names.METADATA_GROUP)
                self._write_meta_to_group(common_meta_group, self._common_metadata)
            if self._common_index is not None:
                f.create_dataset(H5Names.COMMON_PREFIX + H5Names.INDEX_GROUP, data=self._common_index.astype(bytes))

            if self._metadata:
                meta_group = f.create_group(H5Names.METADATA_GROUP)
                for vector_id, meta in self._metadata.items():
                    vm_group = meta_group.create_group(vector_id)
                    self._write_meta_to_group(vm_group, meta)

            if self._index:
                index_group = f.create_group(H5Names.INDEX_GROUP)
                for vector_id, index in self._index.items():
                    index_group.create_dataset(vector_id, data=index.astype(bytes))

            if self._vectors:
                vector_group = f.create_group(H5Names.VECTORS_GROUP)
                for vector_id, vector in self._vectors.items():
                    vector_group.create_dataset(vector_id, data=vector)

    def _check_missing_indexes(self) -> None:
        missing_index = {v for v in self._vectors if v not in self._index}
        if self._common_index is None and len(missing_index) != 0:
            msg = f"Found vectors missing indexes and common index is not set: {missing_index}."
            raise KeyError(msg)

    def _check_missing_metadata(self) -> None:
        missing_meta = {v for v in self._vectors if v not in self._metadata}
        if self._common_metadata is None and len(missing_meta) != 0:
            msg = f"Found vectors missing metadata and common metadata is not set: {missing_meta}."
            raise KeyError(msg)

    def _write_meta_to_group(self, meta_group: h5py.Group, metadata: dict) -> None:
        for k, v in metadata.items():
            meta_group.create_dataset(k, data=str(v).encode(TvMn.ENCODING))

    def _read_data(
        self,
        group_name: str,
        cast_meta: bool,
    ) -> tuple[dict[str, dict[str, TvMn.METADATA_TYPES]] | dict[str, dict[str, NDArray]], dict[str, TvMn.METADATA_TYPES] | dict[str, NDArray]]:
        common_field = H5Names.COMMON_PREFIX + group_name
        data = {}
        common_data = None
        with h5py.File(self._source, mode="r") as f:
            if group_name in f and isinstance(f[group_name], h5py.Group):
                group = f[group_name]
                data.update(
                    {
                        vector_id: TvMn.cast_meta(self._read_datasets(vector_data)) if cast_meta else self._read_datasets(vector_data)
                        for vector_id, vector_data in group.items()
                    },
                )

            if common_field in f and isinstance(f[common_field], h5py.Group):
                datasets = self._read_datasets(f[common_field])
                common_data, __ = TvMn.cast_meta(datasets) if cast_meta else (datasets, None)
            elif common_field in f and isinstance(f[common_field], h5py.Dataset):
                common_data = f[common_field][()]

        return data, common_data

    def _read_datasets(self, field: h5py.Group | h5py.Dataset) -> dict | NDArray | bytes:
        if isinstance(field, h5py.Dataset):
            return field[()]
        datasets = {}
        for key, val in field.items():
            if isinstance(val, h5py.Dataset):
                datasets[key] = val[()]
            else:
                msg = f"Expected only {h5py.Dataset} in field, but found {type(val)}"
                raise TypeError(msg)

        return datasets
__init__(source: Path | str | None = None) -> None

Set path to parquet file if supplied, load/initialize table and metadata as pd.DataFrame and dictionary respectively.

Parameters:

Name Type Description Default
source Path | str | None

Path to parquet file with timevectors. Defaults to None.

None
Source code in framdata/file_editors/NVEH5TimeVectorEditor.py
def __init__(self, source: Path | str | None = None) -> None:
    """
    Set path to parquet file if supplied, load/initialize table and metadata as pd.DataFrame and dictionary respectively.

    Args:
        source (Path | str | None, optional): Path to parquet file with timevectors. Defaults to None.

    """
    super().__init__(source)

    meta_tuple = ({}, None) if self._source is None or not self._source.exists() else self._read_data(H5Names.METADATA_GROUP, True)
    self._metadata, self._common_metadata = meta_tuple
    index_tuple = (defaultdict(NDArray), None) if self._source is None or not self._source.exists() else self._read_data(H5Names.INDEX_GROUP, False)
    self._index, self._common_index = index_tuple
    self._index = {k: v.astype(str) for k, v in self._index.items()}

    vectors_tuple = (defaultdict(NDArray), None) if self._source is None or not self._source.exists() else self._read_data(H5Names.VECTORS_GROUP, False)
    self._vectors, __ = vectors_tuple
get_common_index() -> NDArray | None

Return a copy of a given index as a pandas series from the table.

Source code in framdata/file_editors/NVEH5TimeVectorEditor.py
def get_common_index(self) -> NDArray | None:
    """Return a copy of a given index as a pandas series from the table."""
    return self._common_index
get_common_metadata() -> None | dict

Get a copy of the common metadata of vectors in the h5 file.

Source code in framdata/file_editors/NVEH5TimeVectorEditor.py
def get_common_metadata(self) -> None | dict:
    """Get a copy of the common metadata of vectors in the h5 file."""
    return self._common_metadata if self._common_metadata is None else self._common_metadata.copy()
get_index(vector_id: str) -> NDArray

Return a copy of a given index as a pandas series from the table.

Source code in framdata/file_editors/NVEH5TimeVectorEditor.py
def get_index(self, vector_id: str) -> NDArray:
    """Return a copy of a given index as a pandas series from the table."""
    try:
        return self._index[vector_id]
    except KeyError as e:
        f"Found no ID '{vector_id}' among indexes."
        raise KeyError from e
get_metadata(vector_id: str) -> None | dict

Get a copy of the metadata of all vectors in the h5 file.

Source code in framdata/file_editors/NVEH5TimeVectorEditor.py
def get_metadata(self, vector_id: str) -> None | dict:
    """Get a copy of the metadata of all vectors in the h5 file."""
    try:
        return self._metadata[vector_id].copy()
    except KeyError as e:
        f"Found no ID '{vector_id}' in metadata."
        raise KeyError from e
get_vector(vector_id: str) -> NDArray

Return a copy of a given vector as a pandas series from the table.

Source code in framdata/file_editors/NVEH5TimeVectorEditor.py
def get_vector(self, vector_id: str) -> NDArray:
    """Return a copy of a given vector as a pandas series from the table."""
    try:
        return self._vectors[vector_id]
    except KeyError as e:
        msg = f"Found no ID '{vector_id}' among vectors."
        raise KeyError(msg) from e
get_vector_ids() -> list[str]

Get the IDs of all vectors available in the file.

Source code in framdata/file_editors/NVEH5TimeVectorEditor.py
def get_vector_ids(self) -> list[str]:
    """Get the IDs of all vectors available in the file."""
    return list(self._vectors.keys())
save_to_h5(path: Path | str) -> None

Store the data to h5 file.

Parameters:

Name Type Description Default
path Path | str

Path to save the file. Overwrites existing files.

required

Raises:

Type Description
KeyError

If common index is None and there are vectors missing specific index.

KeyError

If common metadata is None and there are vectors missing specific metadata.

Source code in framdata/file_editors/NVEH5TimeVectorEditor.py
def save_to_h5(self, path: Path | str) -> None:
    """
    Store the data to h5 file.

    Args:
        path (Path | str): Path to save the file. Overwrites existing files.

    Raises:
        KeyError: If common index is None and there are vectors missing specific index.
        KeyError: If common metadata is None and there are vectors missing specific metadata.

    """
    self._check_type(path, (Path, str))
    path = Path(path)

    self._check_missing_indexes()
    self._check_missing_metadata()

    with h5py.File(path, mode="w") as f:
        if self._common_metadata is not None:
            common_meta_group = f.create_group(H5Names.COMMON_PREFIX + H5Names.METADATA_GROUP)
            self._write_meta_to_group(common_meta_group, self._common_metadata)
        if self._common_index is not None:
            f.create_dataset(H5Names.COMMON_PREFIX + H5Names.INDEX_GROUP, data=self._common_index.astype(bytes))

        if self._metadata:
            meta_group = f.create_group(H5Names.METADATA_GROUP)
            for vector_id, meta in self._metadata.items():
                vm_group = meta_group.create_group(vector_id)
                self._write_meta_to_group(vm_group, meta)

        if self._index:
            index_group = f.create_group(H5Names.INDEX_GROUP)
            for vector_id, index in self._index.items():
                index_group.create_dataset(vector_id, data=index.astype(bytes))

        if self._vectors:
            vector_group = f.create_group(H5Names.VECTORS_GROUP)
            for vector_id, vector in self._vectors.items():
                vector_group.create_dataset(vector_id, data=vector)
set_common_index(values: NDArray) -> None

Set the common index which will be used for vectors which have not specified their own index by its ID.

Source code in framdata/file_editors/NVEH5TimeVectorEditor.py
def set_common_index(self, values: NDArray) -> None:
    """Set the common index which will be used for vectors which have not specified their own index by its ID."""
    self._check_type(values, np.ndarray)
    self._common_index = values
set_common_metadata(metadata: dict[str, TvMn.METADATA_TYPES]) -> None

Set the common metadata dictionary (overwrites existing).

Source code in framdata/file_editors/NVEH5TimeVectorEditor.py
def set_common_metadata(self, metadata: dict[str, TvMn.METADATA_TYPES]) -> None:
    """Set the common metadata dictionary (overwrites existing)."""
    self._check_type(metadata, dict)
    self._common_metadata = metadata
set_common_metadata_by_key(key: str, value: TvMn.METADATA_TYPES) -> None

Set a field (new or overwrite) in the common metadata.

Source code in framdata/file_editors/NVEH5TimeVectorEditor.py
def set_common_metadata_by_key(self, key: str, value: TvMn.METADATA_TYPES) -> None:
    """Set a field (new or overwrite) in the common metadata."""
    self._check_type(key, str)
    self._check_type(value, TvMn.METADATA_TYPES_TUPLE)
    if self._common_metadata is None:
        self._common_metadata = {}
    self._common_metadata[key] = value
set_index(vector_id: str, index: NDArray) -> None

Set the index of a vector.

Index is paired with a vector of the same vector_id.

Source code in framdata/file_editors/NVEH5TimeVectorEditor.py
def set_index(self, vector_id: str, index: NDArray) -> None:
    """
    Set the index of a vector.

    Index is paired with a vector of the same vector_id.

    """
    self._check_type(vector_id, str)
    self._check_type(index, np.ndarray)
    self._index[vector_id] = index
set_metadata(vector_id: str, metadata: dict[str, TvMn.METADATA_TYPES]) -> None

Set the metadata dictionary of a specific vector (overwrites existing).

Source code in framdata/file_editors/NVEH5TimeVectorEditor.py
def set_metadata(self, vector_id: str, metadata: dict[str, TvMn.METADATA_TYPES]) -> None:
    """Set the metadata dictionary of a specific vector (overwrites existing)."""
    self._check_type(vector_id, str)
    self._check_type(metadata, dict)
    for key, value in metadata.items():
        self._check_type(key, str)
        self._check_type(value, TvMn.METADATA_TYPES_TUPLE)
    self._metadata[vector_id] = metadata
set_metadata_by_key(vector_id: str, key: str, value: TvMn.METADATA_TYPES) -> None

Set a field (new or overwrite) in the metadata of a vector.

Source code in framdata/file_editors/NVEH5TimeVectorEditor.py
def set_metadata_by_key(self, vector_id: str, key: str, value: TvMn.METADATA_TYPES) -> None:
    """Set a field (new or overwrite) in the metadata of a vector."""
    self._check_type(key, str)
    self._check_type(vector_id, str)
    self._check_type(value, TvMn.METADATA_TYPES_TUPLE)
    if vector_id not in self._metadata or not isinstance(self._metadata[vector_id], dict):
        self._metadata[vector_id] = {}
    self._metadata[vector_id][key] = value
set_vector(vector_id: str, values: NDArray) -> None

Set vector values.

Source code in framdata/file_editors/NVEH5TimeVectorEditor.py
def set_vector(self, vector_id: str, values: NDArray) -> None:
    """Set vector values."""
    self._check_type(vector_id, str)
    self._check_type(values, np.ndarray)
    self._vectors[vector_id] = values

NVEParquetTimeVectorEditor

Contains class for editing time vectors in parquet files.

NVEParquetTimeVectorEditor

Bases: NVEFileEditor

Class for managing time vectors and their metadata stored in parquet files.

Source code in framdata/file_editors/NVEParquetTimeVectorEditor.py
class NVEParquetTimeVectorEditor(NVEFileEditor):
    """Class for managing time vectors and their metadata stored in parquet files."""

    def __init__(self, source: Path | str | None = None) -> None:
        """
        Set path to parquet file if supplied, load/initialize table and metadata as pd.DataFrame and dictionary respectively.

        Args:
            source (Path | str | None, optional): Path to parquet file with timevectors. Defaults to None.

        """
        super().__init__(source)
        self._metadata = {} if self._source is None or not self._source.exists() else self._read_metadata()
        self._data = pd.DataFrame() if self._source is None or not self._source.exists() else pd.read_parquet(self._source)

    def save_to_parquet(self, path: Path | str) -> None:
        """
        Save the edited dataframe and metadata to parquet file.

        Args:
            path (Path): Path to save tha file to. Must be defined to force user to explicitly overwrite the original file if they want.

        """
        self._check_type(path, (Path, str))
        path = Path(path)
        table = pa.Table.from_pandas(self._data)

        # ensure binary strings with defined encoding, since parquet encodes metadata anyway
        schema_with_meta = table.schema.with_metadata({str(k).encode(TvMn.ENCODING): str(v).encode(TvMn.ENCODING) for k, v in self._metadata.items()})
        table = pa.Table.from_pandas(self._data, schema=schema_with_meta)

        pq.write_table(table, path)

    def get_metadata(self) -> dict:
        """Get a copy of the metadata of the vectors in the parquet file."""
        return self._metadata.copy()

    def set_metadata(self, metadata: dict[str, TvMn.METADATA_TYPES]) -> None:
        """Set the metadata dictionary (overwrites existing)."""
        self._check_type(metadata, dict)
        for key, value in metadata.items():
            self._check_type(key, str)
            self._check_type(value, TvMn.METADATA_TYPES_TUPLE)
        self._metadata = metadata

    def set_metadata_by_key(self, key: str, value: TvMn.METADATA_TYPES) -> None:
        """Set a field (new or overwrite) in the metadata."""
        self._check_type(key, str)
        self._check_type(value, TvMn.METADATA_TYPES_TUPLE)
        self._metadata[key] = value

    def set_vector(self, vector_id: str, values: NDArray | pd.Series) -> None:
        """Set a whole vector in the time vector table."""
        self._check_type(vector_id, str)
        self._check_type(values, (np.ndarray, pd.Series))
        if not self._data.empty and len(values) != len(self._data):
            message = f"Series values has different size than the other vectors in the table.\nLength values: {len(values)}\nLength vectors: {len(self._data)}"
            raise IndexError(message)
        self._data[vector_id] = values

    def get_vector(self, vector_id: str) -> pd.Series:
        """Return a copy of a given vector as a pandas series from the table."""
        try:
            return self._data[vector_id].copy()
        except KeyError as e:
            f"Found no vector named '{vector_id}' in table at {self._source}."
            raise KeyError from e

    def get_dataframe(self) -> pd.DataFrame:
        """Return a copy of all of the vector table as a pandas dataframe."""
        return self._data.copy()

    def set_dataframe(self, dataframe: pd.DataFrame) -> None:
        """Set the dataframe of the editor."""
        self._check_type(dataframe, pd.DataFrame)
        self._data = dataframe

    def get_vector_ids(self) -> list[str]:
        """Get the IDs of all vectors."""
        return [c for c in self._data.columns if c != TvMn.DATETIME_COL]

    def set_index_column(self, index: NDArray | pd.Series) -> None:
        """Set the index column."""
        self._check_type(index, (np.ndarray, pd.Series))
        if not self._data.empty and len(index) != len(self._data):
            message = f"Series index has different size than the other vectors in the table.\nLength index: {len(index)}\nLength vectors: {len(self._data)}"
            raise IndexError(message)
        self._data[TvMn.DATETIME_COL] = index

    def get_index_column(self) -> pd.Series:
        """Get the datetime column of the dataframe."""
        if TvMn.DATETIME_COL not in self._data:
            message = f"Table at {self._source} does not have an index column. Index column must exist and be named '{TvMn.DATETIME_COL}'."
            raise KeyError(message)
        return self._data[TvMn.DATETIME_COL].copy()

    def _read_metadata(self) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]:
        if self._source is None:
            message = "Must set a source before reading file."
            raise ValueError(message)
        metadata = pq.ParquetFile(self._source).schema_arrow.metadata

        cast_meta, __ = TvMn.cast_meta(metadata)  # ignore missing keys
        return cast_meta
__init__(source: Path | str | None = None) -> None

Set path to parquet file if supplied, load/initialize table and metadata as pd.DataFrame and dictionary respectively.

Parameters:

Name Type Description Default
source Path | str | None

Path to parquet file with timevectors. Defaults to None.

None
Source code in framdata/file_editors/NVEParquetTimeVectorEditor.py
def __init__(self, source: Path | str | None = None) -> None:
    """
    Set path to parquet file if supplied, load/initialize table and metadata as pd.DataFrame and dictionary respectively.

    Args:
        source (Path | str | None, optional): Path to parquet file with timevectors. Defaults to None.

    """
    super().__init__(source)
    self._metadata = {} if self._source is None or not self._source.exists() else self._read_metadata()
    self._data = pd.DataFrame() if self._source is None or not self._source.exists() else pd.read_parquet(self._source)
get_dataframe() -> pd.DataFrame

Return a copy of all of the vector table as a pandas dataframe.

Source code in framdata/file_editors/NVEParquetTimeVectorEditor.py
def get_dataframe(self) -> pd.DataFrame:
    """Return a copy of all of the vector table as a pandas dataframe."""
    return self._data.copy()
get_index_column() -> pd.Series

Get the datetime column of the dataframe.

Source code in framdata/file_editors/NVEParquetTimeVectorEditor.py
def get_index_column(self) -> pd.Series:
    """Get the datetime column of the dataframe."""
    if TvMn.DATETIME_COL not in self._data:
        message = f"Table at {self._source} does not have an index column. Index column must exist and be named '{TvMn.DATETIME_COL}'."
        raise KeyError(message)
    return self._data[TvMn.DATETIME_COL].copy()
get_metadata() -> dict

Get a copy of the metadata of the vectors in the parquet file.

Source code in framdata/file_editors/NVEParquetTimeVectorEditor.py
def get_metadata(self) -> dict:
    """Get a copy of the metadata of the vectors in the parquet file."""
    return self._metadata.copy()
get_vector(vector_id: str) -> pd.Series

Return a copy of a given vector as a pandas series from the table.

Source code in framdata/file_editors/NVEParquetTimeVectorEditor.py
def get_vector(self, vector_id: str) -> pd.Series:
    """Return a copy of a given vector as a pandas series from the table."""
    try:
        return self._data[vector_id].copy()
    except KeyError as e:
        f"Found no vector named '{vector_id}' in table at {self._source}."
        raise KeyError from e
get_vector_ids() -> list[str]

Get the IDs of all vectors.

Source code in framdata/file_editors/NVEParquetTimeVectorEditor.py
def get_vector_ids(self) -> list[str]:
    """Get the IDs of all vectors."""
    return [c for c in self._data.columns if c != TvMn.DATETIME_COL]
save_to_parquet(path: Path | str) -> None

Save the edited dataframe and metadata to parquet file.

Parameters:

Name Type Description Default
path Path

Path to save tha file to. Must be defined to force user to explicitly overwrite the original file if they want.

required
Source code in framdata/file_editors/NVEParquetTimeVectorEditor.py
def save_to_parquet(self, path: Path | str) -> None:
    """
    Save the edited dataframe and metadata to parquet file.

    Args:
        path (Path): Path to save tha file to. Must be defined to force user to explicitly overwrite the original file if they want.

    """
    self._check_type(path, (Path, str))
    path = Path(path)
    table = pa.Table.from_pandas(self._data)

    # ensure binary strings with defined encoding, since parquet encodes metadata anyway
    schema_with_meta = table.schema.with_metadata({str(k).encode(TvMn.ENCODING): str(v).encode(TvMn.ENCODING) for k, v in self._metadata.items()})
    table = pa.Table.from_pandas(self._data, schema=schema_with_meta)

    pq.write_table(table, path)
set_dataframe(dataframe: pd.DataFrame) -> None

Set the dataframe of the editor.

Source code in framdata/file_editors/NVEParquetTimeVectorEditor.py
def set_dataframe(self, dataframe: pd.DataFrame) -> None:
    """Set the dataframe of the editor."""
    self._check_type(dataframe, pd.DataFrame)
    self._data = dataframe
set_index_column(index: NDArray | pd.Series) -> None

Set the index column.

Source code in framdata/file_editors/NVEParquetTimeVectorEditor.py
def set_index_column(self, index: NDArray | pd.Series) -> None:
    """Set the index column."""
    self._check_type(index, (np.ndarray, pd.Series))
    if not self._data.empty and len(index) != len(self._data):
        message = f"Series index has different size than the other vectors in the table.\nLength index: {len(index)}\nLength vectors: {len(self._data)}"
        raise IndexError(message)
    self._data[TvMn.DATETIME_COL] = index
set_metadata(metadata: dict[str, TvMn.METADATA_TYPES]) -> None

Set the metadata dictionary (overwrites existing).

Source code in framdata/file_editors/NVEParquetTimeVectorEditor.py
def set_metadata(self, metadata: dict[str, TvMn.METADATA_TYPES]) -> None:
    """Set the metadata dictionary (overwrites existing)."""
    self._check_type(metadata, dict)
    for key, value in metadata.items():
        self._check_type(key, str)
        self._check_type(value, TvMn.METADATA_TYPES_TUPLE)
    self._metadata = metadata
set_metadata_by_key(key: str, value: TvMn.METADATA_TYPES) -> None

Set a field (new or overwrite) in the metadata.

Source code in framdata/file_editors/NVEParquetTimeVectorEditor.py
def set_metadata_by_key(self, key: str, value: TvMn.METADATA_TYPES) -> None:
    """Set a field (new or overwrite) in the metadata."""
    self._check_type(key, str)
    self._check_type(value, TvMn.METADATA_TYPES_TUPLE)
    self._metadata[key] = value
set_vector(vector_id: str, values: NDArray | pd.Series) -> None

Set a whole vector in the time vector table.

Source code in framdata/file_editors/NVEParquetTimeVectorEditor.py
def set_vector(self, vector_id: str, values: NDArray | pd.Series) -> None:
    """Set a whole vector in the time vector table."""
    self._check_type(vector_id, str)
    self._check_type(values, (np.ndarray, pd.Series))
    if not self._data.empty and len(values) != len(self._data):
        message = f"Series values has different size than the other vectors in the table.\nLength values: {len(values)}\nLength vectors: {len(self._data)}"
        raise IndexError(message)
    self._data[vector_id] = values

loaders

NVEExcelTimeVectorLoader

Bases: NVETimeVectorLoader

Class for loading time vector data from NVE excel file sources.

Meant for short time vectors (e.g. yearly volumes or installed capacities) which are desireable to view and edit easily through Excel. Supports the followinf formats: - 'Horizontal': One column containing IDs, the other column names represents the index. Vector values as rows - 'Vertical': One column as index (DateTime), the oher columns names are vector IDs. Vectors as column values.

Source code in framdata/loaders/time_vector_loaders.py
class NVEExcelTimeVectorLoader(NVETimeVectorLoader):
    """
    Class for loading time vector data from NVE excel file sources.

    Meant for short time vectors (e.g. yearly volumes or installed capacities) which are desireable to view and edit easily through Excel.
    Supports the followinf formats:
        - 'Horizontal': One column containing IDs, the other column names represents the index. Vector values as rows
        - 'Vertical': One column as index (DateTime), the oher columns names are vector IDs. Vectors as column values.

    """

    _SUPPORTED_SUFFIXES: ClassVar[list] = [".xlsx"]
    _DATA_SHEET = "Data"
    _METADATA_SHEET = "Metadata"

    def __init__(self, source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None:
        """
        Intitialize loader instance and connect it to an Excel file containing time vector data.

        Args:
            source (Path | str): Absolute Path to database or excel file.
            require_whole_years (bool): Flag for validating that the time vectors in the source contain data for complete years.
            relative_loc (Path | str | None, optional): Path to excel file relative to source. Defaults to None.
            validate (bool, optional): Flag to turn on validation of timevectors. NB! Loads all data into memory at once. Defaults to True.

        """
        super().__init__(source, require_whole_years, relative_loc)
        self._index: TimeIndex = None

        if validate:
            self.validate_vectors()

    def get_unit(self, vector_id: str) -> str:
        """
        Get the unit of the given time vector.

        Args:
            vector_id (str): ID of a time vector. Not used since all time vectors in the NVE excel files have the same
                             unit.

        Returns:
            str: Unit of the time vector.

        """
        return self.get_metadata("")[TvMn.UNIT]

    def get_values(self, vector_id: str) -> NDArray:
        """
        Get numpy array with all the values of a given vector in the Loader's excel file.

        Args:
            vector_id (str): Unique id of the vector in the file.

        Returns:
            NDArray: Numpy array with values.

        """
        if self._data is None:
            self._data = pd.DataFrame()
        if vector_id not in self._data.columns:
            is_horizontal = self._is_horizontal_format()
            column_filter = [vector_id]
            usecols = None
            if not is_horizontal:
                usecols = column_filter

            values_df = pd.read_excel(self.get_source(), sheet_name=self._DATA_SHEET, usecols=usecols)

            if is_horizontal:  # Convert the table to large time series format
                values_df = self._process_horizontal_format(values_df)
                values_df = self._enforce_dtypes(values_df, is_horizontal)
                self._data = values_df
            else:
                values_df = self._enforce_dtypes(values_df, is_horizontal)
                self._data[vector_id] = values_df
        return self._data[vector_id].to_numpy()

    def get_index(self, vector_id: str) -> ListTimeIndex:
        """
        Get the TimeIndex describing the time dimension of the vectors in the file.

        Args:
            vector_id (str): Not used since all vectors in the NVE excel files have the same index.

        Returns:
            TimeIndex: TimeIndex object describing the excel file's index.

        """
        meta = self.get_metadata("")
        if self._index is None:
            self._index = self._create_index(
                self.get_values(TvMn.DATETIME_COL),
                is_52_week_years=meta[TvMn.IS_52_WEEK_YEARS],
                extrapolate_first_point=meta[TvMn.EXTRAPOLATE_FISRT_POINT],
                extrapolate_last_point=meta[TvMn.EXTRAPOLATE_LAST_POINT],
            )
        return self._index

    def get_metadata(self, vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]:
        """
        Read Excel file metadata.

        Args:
            vector_id (str): Not used.

        Raises:
            KeyError: If an expected metadata key is missing.

        Returns:
            dict[str, bool|int|str|datetime|timedelta|tzinfo|None]: Metadata dictionary.

        """
        if self._meta is None:
            path = self.get_source()
            raw_meta = pd.read_excel(path, sheet_name=self._METADATA_SHEET, na_values=[""]).replace([np.nan], [None]).to_dict("records")[0]

            self._meta = self._process_meta(raw_meta)
        return self._meta

    def _enforce_dtypes(self, values_df: pd.DataFrame | pd.Series, issmallformat: bool) -> pd.DataFrame:
        set_dtypes = "float"
        if isinstance(values_df, pd.DataFrame):
            set_dtypes = {c: "float" for c in values_df.columns if c != TvMn.DATETIME_COL}

        # ensure correct dtypes
        try:
            return values_df.astype(set_dtypes)
        except ValueError as e:
            index_column = TvMn.ID_COLUMN_NAME if issmallformat else TvMn.DATETIME_COL
            message = f"Error in {self} while reading file. All columns except '{index_column}' must consist of only float or integer numbers."
            raise RuntimeError(message) from e

    def _process_horizontal_format(self, horizontal_format_df: pd.DataFrame) -> pd.DataFrame:
        # We have to read the whole file to find the correct series

        # Rename the id column name and then transpose to get the correct format
        # Since the columns are counted as indices when transposing, we need to reset the index (but keep the DateTime
        # column)
        reformat_df = horizontal_format_df.rename(columns={TvMn.ID_COLUMN_NAME: TvMn.DATETIME_COL}).T.reset_index(drop=False)

        # after transposing, column names are set a the first row, which is DateTime, IDs
        reformat_df.columns = reformat_df.iloc[0]
        # We reindex by dropping the first row, thus removing the row of DateTime, IDs
        reformat_df = reformat_df.reindex(reformat_df.index.drop(0)).reset_index(drop=True)

        # Since It is possible to write only year or year-month as timestamp in the table,
        # we need to reformat to correct datetime format
        reformat_df[TvMn.DATETIME_COL] = self._to_iso_datetimes(reformat_df[TvMn.DATETIME_COL])

        return reformat_df

    def _to_iso_datetimes(self, series: pd.Series) -> list[datetime]:
        """
        Convert a series of dates to ISO datetime format.

        Args:
            series (pd.Series): Series which values will be converted to ISO format.

        Raises:
            RuntimeError: When an input value which cannot be converted is encountered.

        Returns:
            list[datetime]: List of formatted datetimes.

        """
        reformatted = []
        three_segments = 3
        two_segments = 2
        one_segment = 1
        for i in series:
            new_i = str(i)
            date_split = len(new_i.split("-"))
            space_split = len(new_i.split(" "))
            time_split = len(new_i.split(":"))
            try:
                if date_split == one_segment:  # Only year is defined
                    # get datetime for first week first day
                    new_i = datetime.fromisocalendar(int(new_i), 1, 1)
                elif date_split == two_segments:
                    # Year and month is defined
                    new_i = datetime.strptime(new_i + "-01", "%Y-%m-%d")  # Add first day
                elif date_split == three_segments and space_split == one_segment and time_split == one_segment:
                    # days defined but not time
                    new_i = datetime.strptime(new_i, "%Y-%m-%d")
                elif date_split == three_segments and space_split == two_segments and time_split == one_segment:
                    new_i = datetime.strptime(new_i, "%Y-%m-%d %H")
                elif date_split == three_segments and space_split == two_segments and time_split == two_segments:
                    new_i = datetime.strptime(new_i, "%Y-%m-%d %H:%M")
                elif date_split == three_segments and space_split == two_segments and time_split == three_segments:
                    # Assume time is defined
                    new_i = datetime.strptime(new_i, "%Y-%m-%d %H:%M:%S")
                else:
                    msg = f"Could not convert value '{new_i}' to datetime format."
                    raise ValueError(msg)
            except Exception as e:
                msg = f"Loader {self} could not convert value '{new_i}' to datetime format. Check formatting, for example number of spaces."
                raise RuntimeError(msg) from e
            reformatted.append(new_i)
        return sorted(reformatted)

    def _is_horizontal_format(self) -> bool:
        """Determine if the file strucure is the NVE small format."""
        column_names = pd.read_excel(self.get_source(), nrows=0, sheet_name=self._DATA_SHEET).columns.tolist()
        return TvMn.ID_COLUMN_NAME in column_names

    def _get_ids(self) -> list[str]:
        if self._content_ids is not None:
            return self._content_ids
        try:
            if self._is_horizontal_format():
                self._content_ids = pd.read_excel(
                    self.get_source(),
                    usecols=[TvMn.ID_COLUMN_NAME],
                    sheet_name=self._DATA_SHEET,
                )[TvMn.ID_COLUMN_NAME].tolist()
            else:
                columns_list = pd.read_excel(self.get_source(), nrows=0, sheet_name=self._DATA_SHEET).columns.tolist()
                columns_list.remove(TvMn.DATETIME_COL)
                self._content_ids = columns_list
        except ValueError as e:
            message = f"{self}: found problem with TimeVector IDs."
            raise RuntimeError(message) from e

        return self._content_ids

    def clear_cache(self) -> None:
        """Clear cached data."""
        self._data = None
        self._meta = None
        self._index = None
__init__(source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None

Intitialize loader instance and connect it to an Excel file containing time vector data.

Parameters:

Name Type Description Default
source Path | str

Absolute Path to database or excel file.

required
require_whole_years bool

Flag for validating that the time vectors in the source contain data for complete years.

required
relative_loc Path | str | None

Path to excel file relative to source. Defaults to None.

None
validate bool

Flag to turn on validation of timevectors. NB! Loads all data into memory at once. Defaults to True.

True
Source code in framdata/loaders/time_vector_loaders.py
def __init__(self, source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None:
    """
    Intitialize loader instance and connect it to an Excel file containing time vector data.

    Args:
        source (Path | str): Absolute Path to database or excel file.
        require_whole_years (bool): Flag for validating that the time vectors in the source contain data for complete years.
        relative_loc (Path | str | None, optional): Path to excel file relative to source. Defaults to None.
        validate (bool, optional): Flag to turn on validation of timevectors. NB! Loads all data into memory at once. Defaults to True.

    """
    super().__init__(source, require_whole_years, relative_loc)
    self._index: TimeIndex = None

    if validate:
        self.validate_vectors()
clear_cache() -> None

Clear cached data.

Source code in framdata/loaders/time_vector_loaders.py
def clear_cache(self) -> None:
    """Clear cached data."""
    self._data = None
    self._meta = None
    self._index = None
get_index(vector_id: str) -> ListTimeIndex

Get the TimeIndex describing the time dimension of the vectors in the file.

Parameters:

Name Type Description Default
vector_id str

Not used since all vectors in the NVE excel files have the same index.

required

Returns:

Name Type Description
TimeIndex ListTimeIndex

TimeIndex object describing the excel file's index.

Source code in framdata/loaders/time_vector_loaders.py
def get_index(self, vector_id: str) -> ListTimeIndex:
    """
    Get the TimeIndex describing the time dimension of the vectors in the file.

    Args:
        vector_id (str): Not used since all vectors in the NVE excel files have the same index.

    Returns:
        TimeIndex: TimeIndex object describing the excel file's index.

    """
    meta = self.get_metadata("")
    if self._index is None:
        self._index = self._create_index(
            self.get_values(TvMn.DATETIME_COL),
            is_52_week_years=meta[TvMn.IS_52_WEEK_YEARS],
            extrapolate_first_point=meta[TvMn.EXTRAPOLATE_FISRT_POINT],
            extrapolate_last_point=meta[TvMn.EXTRAPOLATE_LAST_POINT],
        )
    return self._index
get_metadata(vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]

Read Excel file metadata.

Parameters:

Name Type Description Default
vector_id str

Not used.

required

Raises:

Type Description
KeyError

If an expected metadata key is missing.

Returns:

Type Description
dict[str, bool | int | str | datetime | timedelta | tzinfo | None]

dict[str, bool|int|str|datetime|timedelta|tzinfo|None]: Metadata dictionary.

Source code in framdata/loaders/time_vector_loaders.py
def get_metadata(self, vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]:
    """
    Read Excel file metadata.

    Args:
        vector_id (str): Not used.

    Raises:
        KeyError: If an expected metadata key is missing.

    Returns:
        dict[str, bool|int|str|datetime|timedelta|tzinfo|None]: Metadata dictionary.

    """
    if self._meta is None:
        path = self.get_source()
        raw_meta = pd.read_excel(path, sheet_name=self._METADATA_SHEET, na_values=[""]).replace([np.nan], [None]).to_dict("records")[0]

        self._meta = self._process_meta(raw_meta)
    return self._meta
get_unit(vector_id: str) -> str

Get the unit of the given time vector.

Parameters:

Name Type Description Default
vector_id str

ID of a time vector. Not used since all time vectors in the NVE excel files have the same unit.

required

Returns:

Name Type Description
str str

Unit of the time vector.

Source code in framdata/loaders/time_vector_loaders.py
def get_unit(self, vector_id: str) -> str:
    """
    Get the unit of the given time vector.

    Args:
        vector_id (str): ID of a time vector. Not used since all time vectors in the NVE excel files have the same
                         unit.

    Returns:
        str: Unit of the time vector.

    """
    return self.get_metadata("")[TvMn.UNIT]
get_values(vector_id: str) -> NDArray

Get numpy array with all the values of a given vector in the Loader's excel file.

Parameters:

Name Type Description Default
vector_id str

Unique id of the vector in the file.

required

Returns:

Name Type Description
NDArray NDArray

Numpy array with values.

Source code in framdata/loaders/time_vector_loaders.py
def get_values(self, vector_id: str) -> NDArray:
    """
    Get numpy array with all the values of a given vector in the Loader's excel file.

    Args:
        vector_id (str): Unique id of the vector in the file.

    Returns:
        NDArray: Numpy array with values.

    """
    if self._data is None:
        self._data = pd.DataFrame()
    if vector_id not in self._data.columns:
        is_horizontal = self._is_horizontal_format()
        column_filter = [vector_id]
        usecols = None
        if not is_horizontal:
            usecols = column_filter

        values_df = pd.read_excel(self.get_source(), sheet_name=self._DATA_SHEET, usecols=usecols)

        if is_horizontal:  # Convert the table to large time series format
            values_df = self._process_horizontal_format(values_df)
            values_df = self._enforce_dtypes(values_df, is_horizontal)
            self._data = values_df
        else:
            values_df = self._enforce_dtypes(values_df, is_horizontal)
            self._data[vector_id] = values_df
    return self._data[vector_id].to_numpy()

NVEH5TimeVectorLoader

Bases: NVETimeVectorLoader

Class for loading time vector data from NVE HDF5 file sources.

Meant for large time vectors (e.g. hourly data over multiple years). Supports differing lengths and metadata of vectors stored in the file.

Specialized to the following format
  • index (h5py.Group, optional): Used to define indexes for vectors if index is supposed to only apply to that vector.
  • common_index (h5py.Dataset): Contains one numpy array for all vectors. This is a fallback index for vectors which have not defined their own index in the index group. Also used on purpose if many or all vectors have the same index.
  • metadata (h5py.Group): Used connect a specific set of metadata to a particular vector.
  • common_metadata (h5py.Group): Contains one set of metadata fields for all vectors. Used in a similar way as common_index.
  • vectors (h5py.Group): Contains numpy arrays containing the vector values connected to a unique ID. The same ID is used to connect the vector to an index or metadata.
Source code in framdata/loaders/time_vector_loaders.py
class NVEH5TimeVectorLoader(NVETimeVectorLoader):
    """
    Class for loading time vector data from NVE HDF5 file sources.

    Meant for large time vectors (e.g. hourly data over multiple years). Supports differing lengths and metadata of vectors stored in the file.

    Specialized to the following format:
        - index (h5py.Group, optional): Used to define indexes for vectors if index is supposed to only apply to that vector.
        - common_index (h5py.Dataset): Contains one numpy array for all vectors. This is a fallback index for vectors which have not defined their own index in
                                       the index group. Also used on purpose if many or all vectors have the same index.
        - metadata (h5py.Group): Used connect a specific set of metadata to a particular vector.
        - common_metadata (h5py.Group): Contains one set of metadata fields for all vectors. Used in a similar way as common_index.
        - vectors (h5py.Group): Contains numpy arrays containing the vector values connected to a unique ID. The same ID is used to connect the vector to an
                                index or metadata.

    """

    _SUPPORTED_SUFFIXES: ClassVar[list] = [".h5", ".hdf5"]

    def __init__(self, source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None:
        """
        Intitialize loader instance and connect it to a H5 file containing time vector data.

        Args:
            source (Path | str): Absolute Path to database or HDF5 file.
            require_whole_years (bool): Flag for validating that the time vectors in the source contain data for complete years.
            relative_loc (Path | str | None, optional): Path to HDF5 file relative to source. Defaults to None.
            validate (bool, optional): Whether to validate vectors after loading. NB! Loads all data into memory at once. Defaults to True.

        """
        super().__init__(source, require_whole_years, relative_loc)
        self._index: TimeIndex = None
        self._file_pointer = None

        if validate:
            self.validate_vectors()

    def get_values(self, vector_id: str) -> NDArray:
        """
        Get numpy array with all the values of a given vector in the Loader's HDF5 file.

        Args:
            vector_id (str): Unique id of the vector in the file.

        Returns:
            NDArray: Numpy array with values.

        """
        if self._data is None:
            self._data = dict()
        if vector_id not in self._data:
            with h5py.File(self.get_source(), mode="r") as h5f:
                self._data[vector_id] = self._read_vector_field(h5f, H5Names.VECTORS_GROUP, vector_id, field_type=h5py.Dataset, use_fallback=False)[()]
        return self._data[vector_id]

    def get_index(self, vector_id: str) -> TimeIndex:
        """
        Get the TimeIndex describing the time dimension of the vectors in the file.

        Args:
            vector_id (str): Not used since all vectors in the NVE parquet files have the same index.

        Returns:
            TimeIndex: TimeIndex object describing the parquet file's index.

        """
        if self._index is None:
            meta = self.get_metadata("")

            if TvMn.FREQUENCY not in meta or (TvMn.FREQUENCY in meta and meta[TvMn.FREQUENCY] is None):
                self._index = self._create_index(
                    datetimes=self._read_index(vector_id),
                    is_52_week_years=meta[TvMn.IS_52_WEEK_YEARS],
                    extrapolate_first_point=meta[TvMn.EXTRAPOLATE_FISRT_POINT],
                    extrapolate_last_point=meta[TvMn.EXTRAPOLATE_LAST_POINT],
                )
                return self._index
            index_array = self._read_index(vector_id) if meta[TvMn.START] is None or meta[TvMn.NUM_POINTS] is None else None
            start = meta[TvMn.START] if index_array is None else index_array[0].item()
            num_points = meta[TvMn.NUM_POINTS] if index_array is None else index_array.size

            self._index = FixedFrequencyTimeIndex(
                start,
                meta[TvMn.FREQUENCY],
                num_points,
                is_52_week_years=meta[TvMn.IS_52_WEEK_YEARS],
                extrapolate_first_point=meta[TvMn.EXTRAPOLATE_FISRT_POINT],
                extrapolate_last_point=meta[TvMn.EXTRAPOLATE_LAST_POINT],
            )

        return self._index

    def _read_index(self, vector_id: str) -> NDArray[np.datetime64]:
        with h5py.File(self.get_source(), mode="r") as h5f:
            decoded_index = np.char.decode(self._read_vector_field(h5f, H5Names.INDEX_GROUP, vector_id, h5py.Dataset)[()].astype(np.bytes_), encoding="utf-8")
            return decoded_index.astype(np.datetime64)

    def _read_vector_field(
        self,
        h5file: h5py.File,
        field_name: str,
        vector_id: str,
        field_type: type[h5py.Dataset | h5py.Group],
        use_fallback: bool = True,
    ) -> h5py.Dataset | h5py.Group:
        error = ""
        if field_name in h5file:  # check if group_name exists
            main_group = h5file[field_name]
            if not isinstance(main_group, h5py.Group):
                message = f"{self} expected '{field_name}' to be a {h5py.Group} in {h5file}. Got {type(main_group)}."
                raise TypeError(message)

            if vector_id in main_group:
                vector_field = main_group[vector_id]
                if not isinstance(vector_field, field_type):
                    message = f"{self} expected '{vector_id}' to be a {field_type} in {h5file}. Got {type(vector_field)}"
                    raise TypeError(message)
                return vector_field
            error = f"'{vector_id}' was not found in '{field_name}' group"
        else:
            error = f"'{field_name}' was not found in file"

        no_fallback_message = f"{self} expected '{vector_id}' in {h5py.Group} '{field_name}' "
        if not use_fallback:
            no_fallback_message += f"but {error}."
            raise KeyError(no_fallback_message)

        fallback_name = H5Names.COMMON_PREFIX + field_name
        if fallback_name in h5file:  # check if common_ + group_name exists
            fallback_field = h5file[fallback_name]
            if not isinstance(fallback_field, field_type):
                message = f"{self} expected '{fallback_field}' to be a {field_type} in {h5file}. Got {type(fallback_field)}."
                raise TypeError(message)
            return fallback_field

        message = (
            no_fallback_message
            + f"or a fallback {field_type} '{fallback_name}' in H5 file but "
            + f"{error},"
            + f" and fallback {field_type} '{fallback_name}' not found in file."
        )
        raise KeyError(message)

    def get_metadata(self, vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]:
        """
        Retrieve and decodes custom metadata from parquet file.

        Args:
            vector_id (str): Not used

        Raises:
            KeyError: If any of the expected metadata keys is not found in file.

        Returns:
            dict: Dictionary with decoded metadata.

        """
        if self._meta is None:
            errors = set()
            meta = {}
            with h5py.File(self.get_source(), mode="r") as h5f:
                meta_group = self._read_vector_field(h5f, H5Names.METADATA_GROUP, vector_id, h5py.Group)
                for k, m in meta_group.items():
                    if isinstance(m, h5py.Dataset):
                        meta[k] = m[()]
                    else:
                        errors.add(f"Improper metadata format: Metadata key {k} exists but is a h5 group when it should be a h5 dataset.")
            self._report_errors(errors)
            self._meta = self._process_meta(meta)
        return self._meta

    def _get_ids(self) -> list[str]:
        with h5py.File(self.get_source(), mode="r") as h5f:
            if H5Names.VECTORS_GROUP in h5f:
                return list(h5f[H5Names.VECTORS_GROUP].keys())
            message = f"{self} required key '{H5Names.VECTORS_GROUP}' was not found in file."
            raise KeyError(message)

    def clear_cache(self) -> None:
        """Clear cached data."""
        self._data = None
        self._meta = None
        self._index = None
__init__(source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None

Intitialize loader instance and connect it to a H5 file containing time vector data.

Parameters:

Name Type Description Default
source Path | str

Absolute Path to database or HDF5 file.

required
require_whole_years bool

Flag for validating that the time vectors in the source contain data for complete years.

required
relative_loc Path | str | None

Path to HDF5 file relative to source. Defaults to None.

None
validate bool

Whether to validate vectors after loading. NB! Loads all data into memory at once. Defaults to True.

True
Source code in framdata/loaders/time_vector_loaders.py
def __init__(self, source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None:
    """
    Intitialize loader instance and connect it to a H5 file containing time vector data.

    Args:
        source (Path | str): Absolute Path to database or HDF5 file.
        require_whole_years (bool): Flag for validating that the time vectors in the source contain data for complete years.
        relative_loc (Path | str | None, optional): Path to HDF5 file relative to source. Defaults to None.
        validate (bool, optional): Whether to validate vectors after loading. NB! Loads all data into memory at once. Defaults to True.

    """
    super().__init__(source, require_whole_years, relative_loc)
    self._index: TimeIndex = None
    self._file_pointer = None

    if validate:
        self.validate_vectors()
clear_cache() -> None

Clear cached data.

Source code in framdata/loaders/time_vector_loaders.py
def clear_cache(self) -> None:
    """Clear cached data."""
    self._data = None
    self._meta = None
    self._index = None
get_index(vector_id: str) -> TimeIndex

Get the TimeIndex describing the time dimension of the vectors in the file.

Parameters:

Name Type Description Default
vector_id str

Not used since all vectors in the NVE parquet files have the same index.

required

Returns:

Name Type Description
TimeIndex TimeIndex

TimeIndex object describing the parquet file's index.

Source code in framdata/loaders/time_vector_loaders.py
def get_index(self, vector_id: str) -> TimeIndex:
    """
    Get the TimeIndex describing the time dimension of the vectors in the file.

    Args:
        vector_id (str): Not used since all vectors in the NVE parquet files have the same index.

    Returns:
        TimeIndex: TimeIndex object describing the parquet file's index.

    """
    if self._index is None:
        meta = self.get_metadata("")

        if TvMn.FREQUENCY not in meta or (TvMn.FREQUENCY in meta and meta[TvMn.FREQUENCY] is None):
            self._index = self._create_index(
                datetimes=self._read_index(vector_id),
                is_52_week_years=meta[TvMn.IS_52_WEEK_YEARS],
                extrapolate_first_point=meta[TvMn.EXTRAPOLATE_FISRT_POINT],
                extrapolate_last_point=meta[TvMn.EXTRAPOLATE_LAST_POINT],
            )
            return self._index
        index_array = self._read_index(vector_id) if meta[TvMn.START] is None or meta[TvMn.NUM_POINTS] is None else None
        start = meta[TvMn.START] if index_array is None else index_array[0].item()
        num_points = meta[TvMn.NUM_POINTS] if index_array is None else index_array.size

        self._index = FixedFrequencyTimeIndex(
            start,
            meta[TvMn.FREQUENCY],
            num_points,
            is_52_week_years=meta[TvMn.IS_52_WEEK_YEARS],
            extrapolate_first_point=meta[TvMn.EXTRAPOLATE_FISRT_POINT],
            extrapolate_last_point=meta[TvMn.EXTRAPOLATE_LAST_POINT],
        )

    return self._index
get_metadata(vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]

Retrieve and decodes custom metadata from parquet file.

Parameters:

Name Type Description Default
vector_id str

Not used

required

Raises:

Type Description
KeyError

If any of the expected metadata keys is not found in file.

Returns:

Name Type Description
dict dict[str, bool | int | str | datetime | timedelta | tzinfo | None]

Dictionary with decoded metadata.

Source code in framdata/loaders/time_vector_loaders.py
def get_metadata(self, vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]:
    """
    Retrieve and decodes custom metadata from parquet file.

    Args:
        vector_id (str): Not used

    Raises:
        KeyError: If any of the expected metadata keys is not found in file.

    Returns:
        dict: Dictionary with decoded metadata.

    """
    if self._meta is None:
        errors = set()
        meta = {}
        with h5py.File(self.get_source(), mode="r") as h5f:
            meta_group = self._read_vector_field(h5f, H5Names.METADATA_GROUP, vector_id, h5py.Group)
            for k, m in meta_group.items():
                if isinstance(m, h5py.Dataset):
                    meta[k] = m[()]
                else:
                    errors.add(f"Improper metadata format: Metadata key {k} exists but is a h5 group when it should be a h5 dataset.")
        self._report_errors(errors)
        self._meta = self._process_meta(meta)
    return self._meta
get_values(vector_id: str) -> NDArray

Get numpy array with all the values of a given vector in the Loader's HDF5 file.

Parameters:

Name Type Description Default
vector_id str

Unique id of the vector in the file.

required

Returns:

Name Type Description
NDArray NDArray

Numpy array with values.

Source code in framdata/loaders/time_vector_loaders.py
def get_values(self, vector_id: str) -> NDArray:
    """
    Get numpy array with all the values of a given vector in the Loader's HDF5 file.

    Args:
        vector_id (str): Unique id of the vector in the file.

    Returns:
        NDArray: Numpy array with values.

    """
    if self._data is None:
        self._data = dict()
    if vector_id not in self._data:
        with h5py.File(self.get_source(), mode="r") as h5f:
            self._data[vector_id] = self._read_vector_field(h5f, H5Names.VECTORS_GROUP, vector_id, field_type=h5py.Dataset, use_fallback=False)[()]
    return self._data[vector_id]

NVEParquetTimeVectorLoader

Bases: NVETimeVectorLoader

Class for loading time vector data from NVE parquet file sources.

Meant for large time vectors. All vectors in the file must have the same lenghts and metadata. Supports format: - 'Vertical' with one index collumn (DateTime) and the others containing vector values.

Source code in framdata/loaders/time_vector_loaders.py
class NVEParquetTimeVectorLoader(NVETimeVectorLoader):
    """
    Class for loading time vector data from NVE parquet file sources.

    Meant for large time vectors. All vectors in the file must have the same lenghts and metadata.
    Supports format:
        - 'Vertical' with one index collumn (DateTime) and the others containing vector values.

    """

    _SUPPORTED_SUFFIXES: ClassVar[list] = [".parquet"]

    def __init__(self, source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None:
        """
        Intitialize loader instance and connect it to an Parquet file containing time vector data.

        Args:
            source (Path | str): Absolute Path to database or parquet file.
            require_whole_years (bool): Flag for validating that the time vectors in the source contain data for complete years.
            relative_loc (Path | str | None, optional): Path to parquet file relative to source. Defaults to None.
            validate (bool, optional): Flag to turn on validation of timevectors. NB! Loads all data into memory at once. Defaults to True.

        """
        super().__init__(source, require_whole_years, relative_loc)
        self._index: TimeIndex = None
        if validate:
            self.validate_vectors()

    def get_values(self, vector_id: str) -> NDArray:
        """
        Get numpy array with all the values of a given vector in the Loader's parquet file.

        Args:
            vector_id (str): Unique id of the vector in the file.

        Returns:
            NDArray: Numpy array with values.

        """
        if self._data is None:
            self._data = dict()
        if vector_id not in self._data:
            table = pq.read_table(self.get_source(), columns=[vector_id])
            self._data[vector_id] = table[vector_id].to_numpy()
        # if self._data is None:
        #     self._data = pq.read_table(self.get_source())
        return self._data[vector_id]  # .to_numpy()

    def get_index(self, vector_id: str) -> TimeIndex:  # Could be more types of indexes?
        """
        Get the TimeIndex describing the time dimension of the vectors in the file.

        Args:
            vector_id (str): Not used since all vectors in the NVE parquet files have the same index.

        Returns:
            TimeIndex: TimeIndex object describing the parquet file's index.

        """
        if self._index is None:
            meta = self.get_metadata("")

            if TvMn.FREQUENCY not in meta or (TvMn.FREQUENCY in meta and meta[TvMn.FREQUENCY] is None):
                datetime_index = pd.DatetimeIndex(
                    pd.read_parquet(self.get_source(), columns=[TvMn.DATETIME_COL])[TvMn.DATETIME_COL],
                    tz=meta[TvMn.TIMEZONE],
                ).tolist()
                self._index = self._create_index(
                    datetimes=datetime_index,
                    is_52_week_years=meta[TvMn.IS_52_WEEK_YEARS],
                    extrapolate_first_point=meta[TvMn.EXTRAPOLATE_FISRT_POINT],
                    extrapolate_last_point=meta[TvMn.EXTRAPOLATE_LAST_POINT],
                )
                return self._index

            parquet_file = None
            if TvMn.START not in meta or (TvMn.START in meta and meta[TvMn.START] is None):
                parquet_file = pq.ParquetFile(self.get_source())
                start = pd.to_datetime(next(parquet_file.iter_batches(batch_size=1, columns=[TvMn.DATETIME_COL])))
            else:
                start = meta[TvMn.START]

            if TvMn.NUM_POINTS not in meta or (TvMn.NUM_POINTS in meta and meta[TvMn.NUM_POINTS] is None):
                if parquet_file is None:
                    parquet_file = pq.ParquetFile(self.get_source())
                num_points = parquet_file.metadata.num_rows
            else:
                num_points = meta[TvMn.NUM_POINTS]
            self._index = FixedFrequencyTimeIndex(
                start,
                meta[TvMn.FREQUENCY],
                num_points,
                is_52_week_years=meta[TvMn.IS_52_WEEK_YEARS],
                extrapolate_first_point=meta[TvMn.EXTRAPOLATE_FISRT_POINT],
                extrapolate_last_point=meta[TvMn.EXTRAPOLATE_LAST_POINT],
            )

        return self._index

    def get_metadata(self, vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]:
        """
        Retrieve and decodes custom metadata from parquet file.

        Args:
            vector_id (str): Not used

        Raises:
            KeyError: If any of the expected metadata keys is not found in file.

        Returns:
            dict: Dictionary with decoded metadata.

        """
        if self._meta is None:
            path = self.get_source()
            raw_meta = pq.ParquetFile(path).schema_arrow.metadata

            self._meta = self._process_meta(raw_meta)
        return self._meta

    def _get_ids(self) -> list[str]:
        parquet_file = pq.ParquetFile(self.get_source())
        time_vector_ids: list[str] = parquet_file.schema_arrow.names
        time_vector_ids.remove(TvMn.DATETIME_COL)
        return time_vector_ids

    def clear_cache(self) -> None:
        """Clear cached data."""
        self._data = None
        self._meta = None
        self._index = None
__init__(source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None

Intitialize loader instance and connect it to an Parquet file containing time vector data.

Parameters:

Name Type Description Default
source Path | str

Absolute Path to database or parquet file.

required
require_whole_years bool

Flag for validating that the time vectors in the source contain data for complete years.

required
relative_loc Path | str | None

Path to parquet file relative to source. Defaults to None.

None
validate bool

Flag to turn on validation of timevectors. NB! Loads all data into memory at once. Defaults to True.

True
Source code in framdata/loaders/time_vector_loaders.py
def __init__(self, source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None:
    """
    Intitialize loader instance and connect it to an Parquet file containing time vector data.

    Args:
        source (Path | str): Absolute Path to database or parquet file.
        require_whole_years (bool): Flag for validating that the time vectors in the source contain data for complete years.
        relative_loc (Path | str | None, optional): Path to parquet file relative to source. Defaults to None.
        validate (bool, optional): Flag to turn on validation of timevectors. NB! Loads all data into memory at once. Defaults to True.

    """
    super().__init__(source, require_whole_years, relative_loc)
    self._index: TimeIndex = None
    if validate:
        self.validate_vectors()
clear_cache() -> None

Clear cached data.

Source code in framdata/loaders/time_vector_loaders.py
def clear_cache(self) -> None:
    """Clear cached data."""
    self._data = None
    self._meta = None
    self._index = None
get_index(vector_id: str) -> TimeIndex

Get the TimeIndex describing the time dimension of the vectors in the file.

Parameters:

Name Type Description Default
vector_id str

Not used since all vectors in the NVE parquet files have the same index.

required

Returns:

Name Type Description
TimeIndex TimeIndex

TimeIndex object describing the parquet file's index.

Source code in framdata/loaders/time_vector_loaders.py
def get_index(self, vector_id: str) -> TimeIndex:  # Could be more types of indexes?
    """
    Get the TimeIndex describing the time dimension of the vectors in the file.

    Args:
        vector_id (str): Not used since all vectors in the NVE parquet files have the same index.

    Returns:
        TimeIndex: TimeIndex object describing the parquet file's index.

    """
    if self._index is None:
        meta = self.get_metadata("")

        if TvMn.FREQUENCY not in meta or (TvMn.FREQUENCY in meta and meta[TvMn.FREQUENCY] is None):
            datetime_index = pd.DatetimeIndex(
                pd.read_parquet(self.get_source(), columns=[TvMn.DATETIME_COL])[TvMn.DATETIME_COL],
                tz=meta[TvMn.TIMEZONE],
            ).tolist()
            self._index = self._create_index(
                datetimes=datetime_index,
                is_52_week_years=meta[TvMn.IS_52_WEEK_YEARS],
                extrapolate_first_point=meta[TvMn.EXTRAPOLATE_FISRT_POINT],
                extrapolate_last_point=meta[TvMn.EXTRAPOLATE_LAST_POINT],
            )
            return self._index

        parquet_file = None
        if TvMn.START not in meta or (TvMn.START in meta and meta[TvMn.START] is None):
            parquet_file = pq.ParquetFile(self.get_source())
            start = pd.to_datetime(next(parquet_file.iter_batches(batch_size=1, columns=[TvMn.DATETIME_COL])))
        else:
            start = meta[TvMn.START]

        if TvMn.NUM_POINTS not in meta or (TvMn.NUM_POINTS in meta and meta[TvMn.NUM_POINTS] is None):
            if parquet_file is None:
                parquet_file = pq.ParquetFile(self.get_source())
            num_points = parquet_file.metadata.num_rows
        else:
            num_points = meta[TvMn.NUM_POINTS]
        self._index = FixedFrequencyTimeIndex(
            start,
            meta[TvMn.FREQUENCY],
            num_points,
            is_52_week_years=meta[TvMn.IS_52_WEEK_YEARS],
            extrapolate_first_point=meta[TvMn.EXTRAPOLATE_FISRT_POINT],
            extrapolate_last_point=meta[TvMn.EXTRAPOLATE_LAST_POINT],
        )

    return self._index
get_metadata(vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]

Retrieve and decodes custom metadata from parquet file.

Parameters:

Name Type Description Default
vector_id str

Not used

required

Raises:

Type Description
KeyError

If any of the expected metadata keys is not found in file.

Returns:

Name Type Description
dict dict[str, bool | int | str | datetime | timedelta | tzinfo | None]

Dictionary with decoded metadata.

Source code in framdata/loaders/time_vector_loaders.py
def get_metadata(self, vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]:
    """
    Retrieve and decodes custom metadata from parquet file.

    Args:
        vector_id (str): Not used

    Raises:
        KeyError: If any of the expected metadata keys is not found in file.

    Returns:
        dict: Dictionary with decoded metadata.

    """
    if self._meta is None:
        path = self.get_source()
        raw_meta = pq.ParquetFile(path).schema_arrow.metadata

        self._meta = self._process_meta(raw_meta)
    return self._meta
get_values(vector_id: str) -> NDArray

Get numpy array with all the values of a given vector in the Loader's parquet file.

Parameters:

Name Type Description Default
vector_id str

Unique id of the vector in the file.

required

Returns:

Name Type Description
NDArray NDArray

Numpy array with values.

Source code in framdata/loaders/time_vector_loaders.py
def get_values(self, vector_id: str) -> NDArray:
    """
    Get numpy array with all the values of a given vector in the Loader's parquet file.

    Args:
        vector_id (str): Unique id of the vector in the file.

    Returns:
        NDArray: Numpy array with values.

    """
    if self._data is None:
        self._data = dict()
    if vector_id not in self._data:
        table = pq.read_table(self.get_source(), columns=[vector_id])
        self._data[vector_id] = table[vector_id].to_numpy()
    # if self._data is None:
    #     self._data = pq.read_table(self.get_source())
    return self._data[vector_id]  # .to_numpy()

NVEYamlTimeVectoroader

Bases: NVETimeVectorLoader

Class for loading time vector data from NVE YAML file sources.

Meant for very sparse time vector data, where the vectors have varying lengths and indexes. Currently all vectors must have the same metadata within each file. Supported format: - Metadata: field containing dictionary with metadata for all vectors. - Other fields are vector IDs with lists for x and y axes.

Source code in framdata/loaders/time_vector_loaders.py
class NVEYamlTimeVectoroader(NVETimeVectorLoader):
    """
    Class for loading time vector data from NVE YAML file sources.

    Meant for very sparse time vector data, where the vectors have varying lengths and indexes. Currently all vectors must have the same metadata within each
    file.
    Supported format:
        - Metadata: field containing dictionary with metadata for all vectors.
        - Other fields are vector IDs with lists for x and y axes.

    """

    _SUPPORTED_SUFFIXES: ClassVar[list] = [".yaml", ".yml"]

    def __init__(self, source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None:
        """
        Intitialize loader instance and connect it to an Yaml file containing time vector data.

        Args:
            source (Path | str): Absolute Path to database or excel file.
            require_whole_years (bool): Flag for validating that the time vectors in the source contain data for complete years.
            relative_loc (Path | str | None, optional): Path to excel file relative to source. Defaults to None.
            validate (bool, optional): Flag to turn on validation of timevectors. NB! Loads all data into memory at once. Defaults to True.

        """
        super().__init__(source, require_whole_years, relative_loc)
        self._content_ids: list[str] = None

        self._values_label: str = None
        self._index_label: str = None

        if validate:
            self.validate_vectors()

    def get_values(self, vector_id: str) -> NDArray:
        """
        Get values of vector.

        Args:
            vector_id (str): Unique id of the curve in the Loader source.

        Returns:
            NDArray: Numpy array with values of vector.

        """
        if self._data is None:
            self._parse_file()
        values_list = self._data[vector_id][self._values_label]
        if len(values_list) == 0:
            message = f"Time vector {vector_id} in {self} contains no points."
            raise ValueError(message)
        return np.asarray(values_list)

    def get_index(self, vector_id: str) -> TimeIndex:
        """
        Get index of vector.

        Args:
            vector_id (str): Unique id of the curve in the Loader source.

        Returns:
            NDArray: Numpy array with index of vector.

        """
        meta = self.get_metadata(vector_id)  # also parses data
        try:
            datetime_list = [self._date_to_datetime(index_val) for index_val in self._data[vector_id][self._index_label]]
        except ValueError as e:
            message = f"{self} got non date or none datetime values in index field of vector {vector_id}."
            raise ValueError(message) from e

        if len(datetime_list) == 0:
            message = f"Index of {vector_id} in {self} contains no points."
            raise ValueError(message)

        if (len(datetime_list) == 1 or self.get_values(vector_id).size == 1) and meta[TvMn.EXTRAPOLATE_FISRT_POINT] and meta[TvMn.EXTRAPOLATE_LAST_POINT]:
            # Even though _create_index can now handle ConstantTimeIndexes,
            # we need to consider that YAML time vectors can have the extra end date for its final period stored in its index.
            # That would lead to _create_time_index not creating a constant one when it should.
            # We may remove this feature in the future.
            return ConstantTimeIndex()

        args = (
            datetime_list,
            meta[TvMn.IS_52_WEEK_YEARS],
            meta[TvMn.EXTRAPOLATE_FISRT_POINT],
            meta[TvMn.EXTRAPOLATE_LAST_POINT],
        )

        if len(datetime_list) == len(self.get_values(vector_id)) + 1:
            return ListTimeIndex(*args)
        # create index with added end datetime
        return self._create_index(*args)

    def get_metadata(self, vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]:
        """
        Read YAML file metadata.

        Args:
            vector_id (str): Not used.

        Raises:
            KeyError: If an expected metadata key is missing.

        Returns:
            dict[str, bool|int|str|datetime|timedelta|tzinfo|None]: Metadata dictionary.

        """
        if self._meta is None:
            raw_meta = self._data[YamlNames.metadata_field][YamlNames.x_field]

            self._meta = self._process_meta(raw_meta)
        return self._meta

    def _get_ids(self) -> list[str]:
        if self._content_ids is None:
            if self._data is None:
                self._parse_file()
            ids_list = list(self._data.keys())
            ids_list.remove(YamlNames.metadata_field)
            self._content_ids = ids_list
        return self._content_ids

    def _parse_file(self) -> None:
        with self.get_source().open(encoding=YamlNames.encoding) as f:
            d = yaml.safe_load(f)
            self._x_meta = d[YamlNames.metadata_field][YamlNames.x_field]
            self._y_meta = d[YamlNames.metadata_field][YamlNames.y_field]

            self._values_label = self._x_meta[YamlNames.attribute]
            self._index_label = self._y_meta[YamlNames.attribute]

            self._data = d

    def _date_to_datetime(self, value: date | datetime) -> datetime:
        if isinstance(value, date):
            value = datetime(value.year, value.month, value.day)
        elif not isinstance(value, datetime):
            message = "Value must be date or datetime."
            raise ValueError(message)
        return value

    def clear_cache(self) -> None:
        """Clear cached data."""
        self._data = None
        self._meta = None

        self._content_ids = None

        self._values_label = None
        self._index_label = None
__init__(source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None

Intitialize loader instance and connect it to an Yaml file containing time vector data.

Parameters:

Name Type Description Default
source Path | str

Absolute Path to database or excel file.

required
require_whole_years bool

Flag for validating that the time vectors in the source contain data for complete years.

required
relative_loc Path | str | None

Path to excel file relative to source. Defaults to None.

None
validate bool

Flag to turn on validation of timevectors. NB! Loads all data into memory at once. Defaults to True.

True
Source code in framdata/loaders/time_vector_loaders.py
def __init__(self, source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None:
    """
    Intitialize loader instance and connect it to an Yaml file containing time vector data.

    Args:
        source (Path | str): Absolute Path to database or excel file.
        require_whole_years (bool): Flag for validating that the time vectors in the source contain data for complete years.
        relative_loc (Path | str | None, optional): Path to excel file relative to source. Defaults to None.
        validate (bool, optional): Flag to turn on validation of timevectors. NB! Loads all data into memory at once. Defaults to True.

    """
    super().__init__(source, require_whole_years, relative_loc)
    self._content_ids: list[str] = None

    self._values_label: str = None
    self._index_label: str = None

    if validate:
        self.validate_vectors()
clear_cache() -> None

Clear cached data.

Source code in framdata/loaders/time_vector_loaders.py
def clear_cache(self) -> None:
    """Clear cached data."""
    self._data = None
    self._meta = None

    self._content_ids = None

    self._values_label = None
    self._index_label = None
get_index(vector_id: str) -> TimeIndex

Get index of vector.

Parameters:

Name Type Description Default
vector_id str

Unique id of the curve in the Loader source.

required

Returns:

Name Type Description
NDArray TimeIndex

Numpy array with index of vector.

Source code in framdata/loaders/time_vector_loaders.py
def get_index(self, vector_id: str) -> TimeIndex:
    """
    Get index of vector.

    Args:
        vector_id (str): Unique id of the curve in the Loader source.

    Returns:
        NDArray: Numpy array with index of vector.

    """
    meta = self.get_metadata(vector_id)  # also parses data
    try:
        datetime_list = [self._date_to_datetime(index_val) for index_val in self._data[vector_id][self._index_label]]
    except ValueError as e:
        message = f"{self} got non date or none datetime values in index field of vector {vector_id}."
        raise ValueError(message) from e

    if len(datetime_list) == 0:
        message = f"Index of {vector_id} in {self} contains no points."
        raise ValueError(message)

    if (len(datetime_list) == 1 or self.get_values(vector_id).size == 1) and meta[TvMn.EXTRAPOLATE_FISRT_POINT] and meta[TvMn.EXTRAPOLATE_LAST_POINT]:
        # Even though _create_index can now handle ConstantTimeIndexes,
        # we need to consider that YAML time vectors can have the extra end date for its final period stored in its index.
        # That would lead to _create_time_index not creating a constant one when it should.
        # We may remove this feature in the future.
        return ConstantTimeIndex()

    args = (
        datetime_list,
        meta[TvMn.IS_52_WEEK_YEARS],
        meta[TvMn.EXTRAPOLATE_FISRT_POINT],
        meta[TvMn.EXTRAPOLATE_LAST_POINT],
    )

    if len(datetime_list) == len(self.get_values(vector_id)) + 1:
        return ListTimeIndex(*args)
    # create index with added end datetime
    return self._create_index(*args)
get_metadata(vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]

Read YAML file metadata.

Parameters:

Name Type Description Default
vector_id str

Not used.

required

Raises:

Type Description
KeyError

If an expected metadata key is missing.

Returns:

Type Description
dict[str, bool | int | str | datetime | timedelta | tzinfo | None]

dict[str, bool|int|str|datetime|timedelta|tzinfo|None]: Metadata dictionary.

Source code in framdata/loaders/time_vector_loaders.py
def get_metadata(self, vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]:
    """
    Read YAML file metadata.

    Args:
        vector_id (str): Not used.

    Raises:
        KeyError: If an expected metadata key is missing.

    Returns:
        dict[str, bool|int|str|datetime|timedelta|tzinfo|None]: Metadata dictionary.

    """
    if self._meta is None:
        raw_meta = self._data[YamlNames.metadata_field][YamlNames.x_field]

        self._meta = self._process_meta(raw_meta)
    return self._meta
get_values(vector_id: str) -> NDArray

Get values of vector.

Parameters:

Name Type Description Default
vector_id str

Unique id of the curve in the Loader source.

required

Returns:

Name Type Description
NDArray NDArray

Numpy array with values of vector.

Source code in framdata/loaders/time_vector_loaders.py
def get_values(self, vector_id: str) -> NDArray:
    """
    Get values of vector.

    Args:
        vector_id (str): Unique id of the curve in the Loader source.

    Returns:
        NDArray: Numpy array with values of vector.

    """
    if self._data is None:
        self._parse_file()
    values_list = self._data[vector_id][self._values_label]
    if len(values_list) == 0:
        message = f"Time vector {vector_id} in {self} contains no points."
        raise ValueError(message)
    return np.asarray(values_list)

NVETimeVectorLoader

Loader for NVE time vector data.

This module provides the NVETimeVectorLoader class, which extends FileLoader and TimeVectorLoader to handle metadata and validation for time vector data from NVE parquet files.

NVETimeVectorLoader

Bases: FileLoader, TimeVectorLoader

Common interface for metadata in NVE TimeVectorLoaders.

Source code in framdata/loaders/NVETimeVectorLoader.py
class NVETimeVectorLoader(FileLoader, TimeVectorLoader):
    """Common interface for metadata in NVE TimeVectorLoaders."""

    def __init__(self, source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None) -> None:
        """
        Initialize NVETimeVectorLoader with source and optional relative location.

        Args:
            source (Path | str): Path or string to the source file.
            require_whole_years (bool): Flag for validating that the time vectors in the source contain data for complete years.
            relative_loc (Path | str | None, optional): Relative location, defaults to None.

        """
        super().__init__(source, relative_loc)

        self._data: dict[str, NDArray] = None
        self._meta: dict[str, bool | int | str | datetime | timedelta | tzinfo] = None

        self._require_whole_years = require_whole_years

    def is_max_level(self, vector_id: str) -> bool | None:
        """
        Check if the time vector is classified as a max level vector.

        Args:
            vector_id (str): ID of the time vector.

        Returns:
            bool | None: True if max level, False otherwise, or None if not specified.

        """
        return self.get_metadata(vector_id)[TvMn.IS_MAX_LEVEL]

    def is_zero_one_profile(self, vector_id: str) -> bool | None:
        """
        Check if the time vector is classified as a zero-one profile vector.

        Args:
            vector_id (str): ID of the time vector.

        Returns:
            bool | None: True if zero-one profile, False otherwise, or None if not specified.

        """
        return self.get_metadata(vector_id)[TvMn.IS_ZERO_ONE_PROFILE]

    def get_unit(self, vector_id: str) -> str:
        """
        Get the unit of the given time vector.

        Args:
            vector_id (str): ID of a time vector. Not used since all time vectors in the NVE parquet files have the same
                             unit.

        Returns:
            str: Unit of the time vector.

        """
        return self.get_metadata(vector_id)[TvMn.UNIT]

    def get_reference_period(self, vector_id: str) -> ReferencePeriod | None:
        """
        Get Reference perod from metadata.

        Args:
            vector_id (str): Not used.

        Raises:
            ValueError: If only one of start year or number of years are set in metadata.

        Returns:
            ReferencePeriod | None

        """
        start_year = self.get_metadata(vector_id)[TvMn.REF_PERIOD_START_YEAR]
        num_years = self.get_metadata(vector_id)[TvMn.REF_PERIOD_NUM_YEARS]

        ref_period = None
        if start_year and num_years:
            ref_period = ReferencePeriod(start_year=start_year, num_years=num_years)
        elif start_year or num_years:
            message = (
                f"{self}: Both {TvMn.REF_PERIOD_START_YEAR} and {TvMn.REF_PERIOD_NUM_YEARS} must be provided for a valid reference period."
                "Alternatively, both must be None for undefined reference period."
            )
            raise ValueError(message)
        return ref_period

    def validate_vectors(self) -> None:
        """
        Validate data in all vectors contained in the Loader.

        Conditions validated:
            - If vector contains negative values.
            (- If vector is a zero one profile and contains values outside the unit interval.) * not in use currently

        Raises:
            ValueError: When conditions are violated.

        """
        errors = set()
        for vector_id in self.get_ids():
            errors |= self._validate_vector(vector_id)

        if errors:
            message = f"Found errors in {self}:"
            for e in errors:
                message += f"\n - {e}."

            raise ValueError(message)

    def _process_meta(self, raw_meta: dict[str | bytes, str | bytes | int | bool | None]) -> dict[str, Any]:
        processed_meta, missing_keys = TvMn.cast_meta(raw_meta)

        optional_keys = {TvMn.ID_COLUMN_NAME, TvMn.FREQUENCY, TvMn.NUM_POINTS, TvMn.START}
        missing_keys -= optional_keys

        if missing_keys:
            msg = f"{self} could not find keys: {missing_keys} in metadata of file {self.get_source()}. Metadata: {processed_meta}"
            raise KeyError(msg)

        return processed_meta

    def _validate_vector(self, vector_id: str) -> set[str]:
        index = self.get_index(vector_id)
        values = self.get_values(vector_id)

        errors = set()

        # validate index length
        if index.get_num_periods() not in range(values.size - 1, values.size + 1):  # Since ListTimeIndex objects' num_periods can vary.
            errors.add(f"{vector_id} - {type(index)} with {index.get_num_periods()} periods and vector with size ({values.size}) do not match.")

        # validate negative and missing values
        negatives = values < 0
        if np.any(negatives):
            errors.add(f"{vector_id} contains {negatives.sum()} negative values.")
        nans = np.isnan(values)
        if np.any(nans):
            errors.add(f"{vector_id} contains {nans.sum()} nan values.")

        # validate that index is whole years if required
        if self._require_whole_years and not index.is_whole_years():
            errors.add(f"{vector_id} is required to contain whole years but its index ({index}) is not classified as is_whole_years.")

        # outside_unit_interval = ((0 <= values) & (values <= 1))
        # if self.is_zero_one_profile(vector_id) and outside_unit_interval.any():
        #     num_outside_range = outside_unit_interval.sum()
        #     errors.add(f"{vector_id} is classified as a zero one vector but contains {num_outside_range} values outside the range 0, 1.")

        # if not self.is_zero_one_profile(vector_id):
        #     ref_period = self.get_reference_period(vector_id)
        #     ref_start_date = ref_period.get_start_year()

        #     index = self.get_index(vector_id)

        return errors

    def _create_index(
        self,
        datetimes: list[datetime] | NDArray[np.datetime64],
        is_52_week_years: bool,
        extrapolate_first_point: bool,
        extrapolate_last_point: bool,
    ) -> ListTimeIndex | FixedFrequencyTimeIndex:
        """
        Check if the index has a fixed frequency and creates wither a Fixed- or List-TimeVectorIndex based on this.

        If a list index is created, the first datetime of the year following the actual final index is added as the end of the final period.
        For example:
            - Actual input index: [2023-1-2, 2029-12-31, 2035-1-1, 2040-1-2, 2050-1-3]
            - Output ListTimeIndex: [2023-1-2, 2029-12-31, 2035-1-1, 2040-1-2, 2050-1-3, 2051-1-2]

        """
        dt64_arrray = np.array(datetimes).astype("datetime64[us]")  # convert to microseconds to match resolution of python tatetime

        if dt64_arrray.size == 1 and extrapolate_first_point and extrapolate_last_point:
            return ConstantTimeIndex()

        diff_array = np.diff(dt64_arrray)  # get period durations between points
        unique_array = np.unique(diff_array)  # get unique durations

        if unique_array.size == 1 and dt64_arrray.size > 1:  # Fixed frequency and more than one value
            dt64_start: np.datetime64 = dt64_arrray[0]
            td64_period_duration: np.timedelta64 = unique_array[0]
            return FixedFrequencyTimeIndex(
                start_time=dt64_start.item(),
                period_duration=td64_period_duration.item(),
                num_periods=dt64_arrray.size,
                is_52_week_years=is_52_week_years,
                extrapolate_first_point=extrapolate_first_point,
                extrapolate_last_point=extrapolate_last_point,
            )

        # add end date to final period
        dt_list = datetimes if isinstance(datetimes, list) else datetimes.astype("datetime64[us]").astype(datetime).tolist()
        end_year = dt_list[-1].isocalendar().year + 1
        end_dt = datetime.fromisocalendar(end_year, 1, 1)

        if len(dt_list) == 1:
            start_dt = dt_list[0]
            period_duration = end_dt - start_dt
            return SinglePeriodTimeIndex(
                start_time=start_dt,
                period_duration=period_duration,
                is_52_week_years=is_52_week_years,
                extrapolate_first_point=extrapolate_first_point,
                extrapolate_last_point=extrapolate_last_point,
            )

        return ListTimeIndex(
            datetime_list=[*dt_list, end_dt],
            is_52_week_years=is_52_week_years,
            extrapolate_first_point=extrapolate_first_point,
            extrapolate_last_point=extrapolate_last_point,
        )
__init__(source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None) -> None

Initialize NVETimeVectorLoader with source and optional relative location.

Parameters:

Name Type Description Default
source Path | str

Path or string to the source file.

required
require_whole_years bool

Flag for validating that the time vectors in the source contain data for complete years.

required
relative_loc Path | str | None

Relative location, defaults to None.

None
Source code in framdata/loaders/NVETimeVectorLoader.py
def __init__(self, source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None) -> None:
    """
    Initialize NVETimeVectorLoader with source and optional relative location.

    Args:
        source (Path | str): Path or string to the source file.
        require_whole_years (bool): Flag for validating that the time vectors in the source contain data for complete years.
        relative_loc (Path | str | None, optional): Relative location, defaults to None.

    """
    super().__init__(source, relative_loc)

    self._data: dict[str, NDArray] = None
    self._meta: dict[str, bool | int | str | datetime | timedelta | tzinfo] = None

    self._require_whole_years = require_whole_years
get_reference_period(vector_id: str) -> ReferencePeriod | None

Get Reference perod from metadata.

Parameters:

Name Type Description Default
vector_id str

Not used.

required

Raises:

Type Description
ValueError

If only one of start year or number of years are set in metadata.

Returns:

Type Description
ReferencePeriod | None

ReferencePeriod | None

Source code in framdata/loaders/NVETimeVectorLoader.py
def get_reference_period(self, vector_id: str) -> ReferencePeriod | None:
    """
    Get Reference perod from metadata.

    Args:
        vector_id (str): Not used.

    Raises:
        ValueError: If only one of start year or number of years are set in metadata.

    Returns:
        ReferencePeriod | None

    """
    start_year = self.get_metadata(vector_id)[TvMn.REF_PERIOD_START_YEAR]
    num_years = self.get_metadata(vector_id)[TvMn.REF_PERIOD_NUM_YEARS]

    ref_period = None
    if start_year and num_years:
        ref_period = ReferencePeriod(start_year=start_year, num_years=num_years)
    elif start_year or num_years:
        message = (
            f"{self}: Both {TvMn.REF_PERIOD_START_YEAR} and {TvMn.REF_PERIOD_NUM_YEARS} must be provided for a valid reference period."
            "Alternatively, both must be None for undefined reference period."
        )
        raise ValueError(message)
    return ref_period
get_unit(vector_id: str) -> str

Get the unit of the given time vector.

Parameters:

Name Type Description Default
vector_id str

ID of a time vector. Not used since all time vectors in the NVE parquet files have the same unit.

required

Returns:

Name Type Description
str str

Unit of the time vector.

Source code in framdata/loaders/NVETimeVectorLoader.py
def get_unit(self, vector_id: str) -> str:
    """
    Get the unit of the given time vector.

    Args:
        vector_id (str): ID of a time vector. Not used since all time vectors in the NVE parquet files have the same
                         unit.

    Returns:
        str: Unit of the time vector.

    """
    return self.get_metadata(vector_id)[TvMn.UNIT]
is_max_level(vector_id: str) -> bool | None

Check if the time vector is classified as a max level vector.

Parameters:

Name Type Description Default
vector_id str

ID of the time vector.

required

Returns:

Type Description
bool | None

bool | None: True if max level, False otherwise, or None if not specified.

Source code in framdata/loaders/NVETimeVectorLoader.py
def is_max_level(self, vector_id: str) -> bool | None:
    """
    Check if the time vector is classified as a max level vector.

    Args:
        vector_id (str): ID of the time vector.

    Returns:
        bool | None: True if max level, False otherwise, or None if not specified.

    """
    return self.get_metadata(vector_id)[TvMn.IS_MAX_LEVEL]
is_zero_one_profile(vector_id: str) -> bool | None

Check if the time vector is classified as a zero-one profile vector.

Parameters:

Name Type Description Default
vector_id str

ID of the time vector.

required

Returns:

Type Description
bool | None

bool | None: True if zero-one profile, False otherwise, or None if not specified.

Source code in framdata/loaders/NVETimeVectorLoader.py
def is_zero_one_profile(self, vector_id: str) -> bool | None:
    """
    Check if the time vector is classified as a zero-one profile vector.

    Args:
        vector_id (str): ID of the time vector.

    Returns:
        bool | None: True if zero-one profile, False otherwise, or None if not specified.

    """
    return self.get_metadata(vector_id)[TvMn.IS_ZERO_ONE_PROFILE]
validate_vectors() -> None

Validate data in all vectors contained in the Loader.

Conditions validated
  • If vector contains negative values. (- If vector is a zero one profile and contains values outside the unit interval.) * not in use currently

Raises:

Type Description
ValueError

When conditions are violated.

Source code in framdata/loaders/NVETimeVectorLoader.py
def validate_vectors(self) -> None:
    """
    Validate data in all vectors contained in the Loader.

    Conditions validated:
        - If vector contains negative values.
        (- If vector is a zero one profile and contains values outside the unit interval.) * not in use currently

    Raises:
        ValueError: When conditions are violated.

    """
    errors = set()
    for vector_id in self.get_ids():
        errors |= self._validate_vector(vector_id)

    if errors:
        message = f"Found errors in {self}:"
        for e in errors:
            message += f"\n - {e}."

        raise ValueError(message)

curve_loaders

Contains class for loading Curve data from NVE yaml files.

NVEYamlCurveLoader

Bases: FileLoader, CurveLoader

Handle reading of Curve data from a yaml File of NVE specific format.

Source code in framdata/loaders/curve_loaders.py
class NVEYamlCurveLoader(FileLoader, CurveLoader):
    """Handle reading of Curve data from a yaml File of NVE specific format."""

    _SUPPORTED_SUFFIXES: ClassVar[list[str]] = [".yaml", ".yml"]

    def __init__(self, source: Path | str, relative_loc: Path | str | None = None) -> None:
        """
        Handle reading of curves from a single yaml file.

        Args:
            source (Path | str): Absolute Path to database or yaml file path.
            relative_loc (Optional[Union[Path, str]], optional): Path to yaml file relative to source. Defaults to None.

        """
        super().__init__(source, relative_loc)

        self._data = None
        self._x_meta: str = None
        self._y_meta: str = None

        self._x_label: str = None
        self._y_label: str = None

    def get_x_axis(self, curve_id: str) -> NDArray:
        """
        Get values of x axis.

        Args:
            curve_id (str): Unique id of the curve in the Loader source.

        Returns:
            NDArray: Numpy array with values of x axis.

        """
        if self._data is None:
            self._parse_file()
        return np.asarray(self._data[curve_id][self._x_label])

    def get_y_axis(self, curve_id: str) -> NDArray:
        """
        Get values of y axis.

        Args:
            curve_id (str): Unique id of the curve in the Loader source.

        Returns:
            NDArray: Numpy array with values of y axis.

        """
        if self._data is None:
            self._parse_file()
        return np.asarray(self._data[curve_id][self._y_label])

    def get_x_unit(self, curve_id: str) -> str:
        """
        Get the unit of the x axis for the specified curve.

        Args:
            curve_id (str): Unique id of the curve in the Loader source.

        Returns:
            str: Unit of the x axis.

        """
        if self._data is None:
            self._parse_file()
        return self._x_meta[YamlNames.unit]

    def get_y_unit(self, curve_id: str) -> str:
        """
        Get the unit of the y axis for the specified curve.

        Args:
            curve_id (str): Unique id of the curve in the Loader source.

        Returns:
            str: Unit of the y axis.

        """
        if self._data is None:
            self._parse_file()
        return self._y_meta[YamlNames.unit]

    def get_metadata(self, content_id: str) -> dict:
        """
        Retrieve metadata for the specified content ID.

        Args:
            content_id (str): Unique identifier for the content.

        Returns:
            dict: Metadata associated with the content.

        """
        if self._data is None:
            self._parse_file()
        return self._data[YamlNames.metadata_field]

    def _get_ids(self) -> list[str]:
        if self._content_ids is None:
            if self._data is None:
                self._parse_file()
            ids_list = list(self._data.keys())
            ids_list.remove(YamlNames.metadata_field)
            self._content_ids = ids_list
        return self._content_ids

    def _parse_file(self) -> None:
        with self.get_source().open(encoding=YamlNames.encoding) as f:
            d = yaml.safe_load(f)
            self._x_meta = d[YamlNames.metadata_field][YamlNames.x_field]
            self._y_meta = d[YamlNames.metadata_field][YamlNames.y_field]

            self._x_label = self._x_meta[YamlNames.attribute]
            self._y_label = self._y_meta[YamlNames.attribute]

            self._data = d

    def clear_cache(self) -> None:
        """Clear cached data."""
        self._data = None
        self._x_meta = None
        self._y_meta = None

        self._x_label = None
        self._y_label = None
__init__(source: Path | str, relative_loc: Path | str | None = None) -> None

Handle reading of curves from a single yaml file.

Parameters:

Name Type Description Default
source Path | str

Absolute Path to database or yaml file path.

required
relative_loc Optional[Union[Path, str]]

Path to yaml file relative to source. Defaults to None.

None
Source code in framdata/loaders/curve_loaders.py
def __init__(self, source: Path | str, relative_loc: Path | str | None = None) -> None:
    """
    Handle reading of curves from a single yaml file.

    Args:
        source (Path | str): Absolute Path to database or yaml file path.
        relative_loc (Optional[Union[Path, str]], optional): Path to yaml file relative to source. Defaults to None.

    """
    super().__init__(source, relative_loc)

    self._data = None
    self._x_meta: str = None
    self._y_meta: str = None

    self._x_label: str = None
    self._y_label: str = None
clear_cache() -> None

Clear cached data.

Source code in framdata/loaders/curve_loaders.py
def clear_cache(self) -> None:
    """Clear cached data."""
    self._data = None
    self._x_meta = None
    self._y_meta = None

    self._x_label = None
    self._y_label = None
get_metadata(content_id: str) -> dict

Retrieve metadata for the specified content ID.

Parameters:

Name Type Description Default
content_id str

Unique identifier for the content.

required

Returns:

Name Type Description
dict dict

Metadata associated with the content.

Source code in framdata/loaders/curve_loaders.py
def get_metadata(self, content_id: str) -> dict:
    """
    Retrieve metadata for the specified content ID.

    Args:
        content_id (str): Unique identifier for the content.

    Returns:
        dict: Metadata associated with the content.

    """
    if self._data is None:
        self._parse_file()
    return self._data[YamlNames.metadata_field]
get_x_axis(curve_id: str) -> NDArray

Get values of x axis.

Parameters:

Name Type Description Default
curve_id str

Unique id of the curve in the Loader source.

required

Returns:

Name Type Description
NDArray NDArray

Numpy array with values of x axis.

Source code in framdata/loaders/curve_loaders.py
def get_x_axis(self, curve_id: str) -> NDArray:
    """
    Get values of x axis.

    Args:
        curve_id (str): Unique id of the curve in the Loader source.

    Returns:
        NDArray: Numpy array with values of x axis.

    """
    if self._data is None:
        self._parse_file()
    return np.asarray(self._data[curve_id][self._x_label])
get_x_unit(curve_id: str) -> str

Get the unit of the x axis for the specified curve.

Parameters:

Name Type Description Default
curve_id str

Unique id of the curve in the Loader source.

required

Returns:

Name Type Description
str str

Unit of the x axis.

Source code in framdata/loaders/curve_loaders.py
def get_x_unit(self, curve_id: str) -> str:
    """
    Get the unit of the x axis for the specified curve.

    Args:
        curve_id (str): Unique id of the curve in the Loader source.

    Returns:
        str: Unit of the x axis.

    """
    if self._data is None:
        self._parse_file()
    return self._x_meta[YamlNames.unit]
get_y_axis(curve_id: str) -> NDArray

Get values of y axis.

Parameters:

Name Type Description Default
curve_id str

Unique id of the curve in the Loader source.

required

Returns:

Name Type Description
NDArray NDArray

Numpy array with values of y axis.

Source code in framdata/loaders/curve_loaders.py
def get_y_axis(self, curve_id: str) -> NDArray:
    """
    Get values of y axis.

    Args:
        curve_id (str): Unique id of the curve in the Loader source.

    Returns:
        NDArray: Numpy array with values of y axis.

    """
    if self._data is None:
        self._parse_file()
    return np.asarray(self._data[curve_id][self._y_label])
get_y_unit(curve_id: str) -> str

Get the unit of the y axis for the specified curve.

Parameters:

Name Type Description Default
curve_id str

Unique id of the curve in the Loader source.

required

Returns:

Name Type Description
str str

Unit of the y axis.

Source code in framdata/loaders/curve_loaders.py
def get_y_unit(self, curve_id: str) -> str:
    """
    Get the unit of the y axis for the specified curve.

    Args:
        curve_id (str): Unique id of the curve in the Loader source.

    Returns:
        str: Unit of the y axis.

    """
    if self._data is None:
        self._parse_file()
    return self._y_meta[YamlNames.unit]

time_vector_loaders

Contain classes for reading time vector data from various file types with formats specific to NVE.

This module provides
  • NVEExcelTimeVectorLoader: Handle time vectors in excel files.
  • NVEH5TimeVectorLoader: Handle time vectors in HDF5 files.
  • NVEYamlTimeVectorLoader: Handle time vectors in Yaml files.
  • NVEParquetTieVectorLoader: Handle time vectors in Parquet files.
NVEExcelTimeVectorLoader

Bases: NVETimeVectorLoader

Class for loading time vector data from NVE excel file sources.

Meant for short time vectors (e.g. yearly volumes or installed capacities) which are desireable to view and edit easily through Excel. Supports the followinf formats: - 'Horizontal': One column containing IDs, the other column names represents the index. Vector values as rows - 'Vertical': One column as index (DateTime), the oher columns names are vector IDs. Vectors as column values.

Source code in framdata/loaders/time_vector_loaders.py
class NVEExcelTimeVectorLoader(NVETimeVectorLoader):
    """
    Class for loading time vector data from NVE excel file sources.

    Meant for short time vectors (e.g. yearly volumes or installed capacities) which are desireable to view and edit easily through Excel.
    Supports the followinf formats:
        - 'Horizontal': One column containing IDs, the other column names represents the index. Vector values as rows
        - 'Vertical': One column as index (DateTime), the oher columns names are vector IDs. Vectors as column values.

    """

    _SUPPORTED_SUFFIXES: ClassVar[list] = [".xlsx"]
    _DATA_SHEET = "Data"
    _METADATA_SHEET = "Metadata"

    def __init__(self, source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None:
        """
        Intitialize loader instance and connect it to an Excel file containing time vector data.

        Args:
            source (Path | str): Absolute Path to database or excel file.
            require_whole_years (bool): Flag for validating that the time vectors in the source contain data for complete years.
            relative_loc (Path | str | None, optional): Path to excel file relative to source. Defaults to None.
            validate (bool, optional): Flag to turn on validation of timevectors. NB! Loads all data into memory at once. Defaults to True.

        """
        super().__init__(source, require_whole_years, relative_loc)
        self._index: TimeIndex = None

        if validate:
            self.validate_vectors()

    def get_unit(self, vector_id: str) -> str:
        """
        Get the unit of the given time vector.

        Args:
            vector_id (str): ID of a time vector. Not used since all time vectors in the NVE excel files have the same
                             unit.

        Returns:
            str: Unit of the time vector.

        """
        return self.get_metadata("")[TvMn.UNIT]

    def get_values(self, vector_id: str) -> NDArray:
        """
        Get numpy array with all the values of a given vector in the Loader's excel file.

        Args:
            vector_id (str): Unique id of the vector in the file.

        Returns:
            NDArray: Numpy array with values.

        """
        if self._data is None:
            self._data = pd.DataFrame()
        if vector_id not in self._data.columns:
            is_horizontal = self._is_horizontal_format()
            column_filter = [vector_id]
            usecols = None
            if not is_horizontal:
                usecols = column_filter

            values_df = pd.read_excel(self.get_source(), sheet_name=self._DATA_SHEET, usecols=usecols)

            if is_horizontal:  # Convert the table to large time series format
                values_df = self._process_horizontal_format(values_df)
                values_df = self._enforce_dtypes(values_df, is_horizontal)
                self._data = values_df
            else:
                values_df = self._enforce_dtypes(values_df, is_horizontal)
                self._data[vector_id] = values_df
        return self._data[vector_id].to_numpy()

    def get_index(self, vector_id: str) -> ListTimeIndex:
        """
        Get the TimeIndex describing the time dimension of the vectors in the file.

        Args:
            vector_id (str): Not used since all vectors in the NVE excel files have the same index.

        Returns:
            TimeIndex: TimeIndex object describing the excel file's index.

        """
        meta = self.get_metadata("")
        if self._index is None:
            self._index = self._create_index(
                self.get_values(TvMn.DATETIME_COL),
                is_52_week_years=meta[TvMn.IS_52_WEEK_YEARS],
                extrapolate_first_point=meta[TvMn.EXTRAPOLATE_FISRT_POINT],
                extrapolate_last_point=meta[TvMn.EXTRAPOLATE_LAST_POINT],
            )
        return self._index

    def get_metadata(self, vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]:
        """
        Read Excel file metadata.

        Args:
            vector_id (str): Not used.

        Raises:
            KeyError: If an expected metadata key is missing.

        Returns:
            dict[str, bool|int|str|datetime|timedelta|tzinfo|None]: Metadata dictionary.

        """
        if self._meta is None:
            path = self.get_source()
            raw_meta = pd.read_excel(path, sheet_name=self._METADATA_SHEET, na_values=[""]).replace([np.nan], [None]).to_dict("records")[0]

            self._meta = self._process_meta(raw_meta)
        return self._meta

    def _enforce_dtypes(self, values_df: pd.DataFrame | pd.Series, issmallformat: bool) -> pd.DataFrame:
        set_dtypes = "float"
        if isinstance(values_df, pd.DataFrame):
            set_dtypes = {c: "float" for c in values_df.columns if c != TvMn.DATETIME_COL}

        # ensure correct dtypes
        try:
            return values_df.astype(set_dtypes)
        except ValueError as e:
            index_column = TvMn.ID_COLUMN_NAME if issmallformat else TvMn.DATETIME_COL
            message = f"Error in {self} while reading file. All columns except '{index_column}' must consist of only float or integer numbers."
            raise RuntimeError(message) from e

    def _process_horizontal_format(self, horizontal_format_df: pd.DataFrame) -> pd.DataFrame:
        # We have to read the whole file to find the correct series

        # Rename the id column name and then transpose to get the correct format
        # Since the columns are counted as indices when transposing, we need to reset the index (but keep the DateTime
        # column)
        reformat_df = horizontal_format_df.rename(columns={TvMn.ID_COLUMN_NAME: TvMn.DATETIME_COL}).T.reset_index(drop=False)

        # after transposing, column names are set a the first row, which is DateTime, IDs
        reformat_df.columns = reformat_df.iloc[0]
        # We reindex by dropping the first row, thus removing the row of DateTime, IDs
        reformat_df = reformat_df.reindex(reformat_df.index.drop(0)).reset_index(drop=True)

        # Since It is possible to write only year or year-month as timestamp in the table,
        # we need to reformat to correct datetime format
        reformat_df[TvMn.DATETIME_COL] = self._to_iso_datetimes(reformat_df[TvMn.DATETIME_COL])

        return reformat_df

    def _to_iso_datetimes(self, series: pd.Series) -> list[datetime]:
        """
        Convert a series of dates to ISO datetime format.

        Args:
            series (pd.Series): Series which values will be converted to ISO format.

        Raises:
            RuntimeError: When an input value which cannot be converted is encountered.

        Returns:
            list[datetime]: List of formatted datetimes.

        """
        reformatted = []
        three_segments = 3
        two_segments = 2
        one_segment = 1
        for i in series:
            new_i = str(i)
            date_split = len(new_i.split("-"))
            space_split = len(new_i.split(" "))
            time_split = len(new_i.split(":"))
            try:
                if date_split == one_segment:  # Only year is defined
                    # get datetime for first week first day
                    new_i = datetime.fromisocalendar(int(new_i), 1, 1)
                elif date_split == two_segments:
                    # Year and month is defined
                    new_i = datetime.strptime(new_i + "-01", "%Y-%m-%d")  # Add first day
                elif date_split == three_segments and space_split == one_segment and time_split == one_segment:
                    # days defined but not time
                    new_i = datetime.strptime(new_i, "%Y-%m-%d")
                elif date_split == three_segments and space_split == two_segments and time_split == one_segment:
                    new_i = datetime.strptime(new_i, "%Y-%m-%d %H")
                elif date_split == three_segments and space_split == two_segments and time_split == two_segments:
                    new_i = datetime.strptime(new_i, "%Y-%m-%d %H:%M")
                elif date_split == three_segments and space_split == two_segments and time_split == three_segments:
                    # Assume time is defined
                    new_i = datetime.strptime(new_i, "%Y-%m-%d %H:%M:%S")
                else:
                    msg = f"Could not convert value '{new_i}' to datetime format."
                    raise ValueError(msg)
            except Exception as e:
                msg = f"Loader {self} could not convert value '{new_i}' to datetime format. Check formatting, for example number of spaces."
                raise RuntimeError(msg) from e
            reformatted.append(new_i)
        return sorted(reformatted)

    def _is_horizontal_format(self) -> bool:
        """Determine if the file strucure is the NVE small format."""
        column_names = pd.read_excel(self.get_source(), nrows=0, sheet_name=self._DATA_SHEET).columns.tolist()
        return TvMn.ID_COLUMN_NAME in column_names

    def _get_ids(self) -> list[str]:
        if self._content_ids is not None:
            return self._content_ids
        try:
            if self._is_horizontal_format():
                self._content_ids = pd.read_excel(
                    self.get_source(),
                    usecols=[TvMn.ID_COLUMN_NAME],
                    sheet_name=self._DATA_SHEET,
                )[TvMn.ID_COLUMN_NAME].tolist()
            else:
                columns_list = pd.read_excel(self.get_source(), nrows=0, sheet_name=self._DATA_SHEET).columns.tolist()
                columns_list.remove(TvMn.DATETIME_COL)
                self._content_ids = columns_list
        except ValueError as e:
            message = f"{self}: found problem with TimeVector IDs."
            raise RuntimeError(message) from e

        return self._content_ids

    def clear_cache(self) -> None:
        """Clear cached data."""
        self._data = None
        self._meta = None
        self._index = None
__init__(source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None

Intitialize loader instance and connect it to an Excel file containing time vector data.

Parameters:

Name Type Description Default
source Path | str

Absolute Path to database or excel file.

required
require_whole_years bool

Flag for validating that the time vectors in the source contain data for complete years.

required
relative_loc Path | str | None

Path to excel file relative to source. Defaults to None.

None
validate bool

Flag to turn on validation of timevectors. NB! Loads all data into memory at once. Defaults to True.

True
Source code in framdata/loaders/time_vector_loaders.py
def __init__(self, source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None:
    """
    Intitialize loader instance and connect it to an Excel file containing time vector data.

    Args:
        source (Path | str): Absolute Path to database or excel file.
        require_whole_years (bool): Flag for validating that the time vectors in the source contain data for complete years.
        relative_loc (Path | str | None, optional): Path to excel file relative to source. Defaults to None.
        validate (bool, optional): Flag to turn on validation of timevectors. NB! Loads all data into memory at once. Defaults to True.

    """
    super().__init__(source, require_whole_years, relative_loc)
    self._index: TimeIndex = None

    if validate:
        self.validate_vectors()
clear_cache() -> None

Clear cached data.

Source code in framdata/loaders/time_vector_loaders.py
def clear_cache(self) -> None:
    """Clear cached data."""
    self._data = None
    self._meta = None
    self._index = None
get_index(vector_id: str) -> ListTimeIndex

Get the TimeIndex describing the time dimension of the vectors in the file.

Parameters:

Name Type Description Default
vector_id str

Not used since all vectors in the NVE excel files have the same index.

required

Returns:

Name Type Description
TimeIndex ListTimeIndex

TimeIndex object describing the excel file's index.

Source code in framdata/loaders/time_vector_loaders.py
def get_index(self, vector_id: str) -> ListTimeIndex:
    """
    Get the TimeIndex describing the time dimension of the vectors in the file.

    Args:
        vector_id (str): Not used since all vectors in the NVE excel files have the same index.

    Returns:
        TimeIndex: TimeIndex object describing the excel file's index.

    """
    meta = self.get_metadata("")
    if self._index is None:
        self._index = self._create_index(
            self.get_values(TvMn.DATETIME_COL),
            is_52_week_years=meta[TvMn.IS_52_WEEK_YEARS],
            extrapolate_first_point=meta[TvMn.EXTRAPOLATE_FISRT_POINT],
            extrapolate_last_point=meta[TvMn.EXTRAPOLATE_LAST_POINT],
        )
    return self._index
get_metadata(vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]

Read Excel file metadata.

Parameters:

Name Type Description Default
vector_id str

Not used.

required

Raises:

Type Description
KeyError

If an expected metadata key is missing.

Returns:

Type Description
dict[str, bool | int | str | datetime | timedelta | tzinfo | None]

dict[str, bool|int|str|datetime|timedelta|tzinfo|None]: Metadata dictionary.

Source code in framdata/loaders/time_vector_loaders.py
def get_metadata(self, vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]:
    """
    Read Excel file metadata.

    Args:
        vector_id (str): Not used.

    Raises:
        KeyError: If an expected metadata key is missing.

    Returns:
        dict[str, bool|int|str|datetime|timedelta|tzinfo|None]: Metadata dictionary.

    """
    if self._meta is None:
        path = self.get_source()
        raw_meta = pd.read_excel(path, sheet_name=self._METADATA_SHEET, na_values=[""]).replace([np.nan], [None]).to_dict("records")[0]

        self._meta = self._process_meta(raw_meta)
    return self._meta
get_unit(vector_id: str) -> str

Get the unit of the given time vector.

Parameters:

Name Type Description Default
vector_id str

ID of a time vector. Not used since all time vectors in the NVE excel files have the same unit.

required

Returns:

Name Type Description
str str

Unit of the time vector.

Source code in framdata/loaders/time_vector_loaders.py
def get_unit(self, vector_id: str) -> str:
    """
    Get the unit of the given time vector.

    Args:
        vector_id (str): ID of a time vector. Not used since all time vectors in the NVE excel files have the same
                         unit.

    Returns:
        str: Unit of the time vector.

    """
    return self.get_metadata("")[TvMn.UNIT]
get_values(vector_id: str) -> NDArray

Get numpy array with all the values of a given vector in the Loader's excel file.

Parameters:

Name Type Description Default
vector_id str

Unique id of the vector in the file.

required

Returns:

Name Type Description
NDArray NDArray

Numpy array with values.

Source code in framdata/loaders/time_vector_loaders.py
def get_values(self, vector_id: str) -> NDArray:
    """
    Get numpy array with all the values of a given vector in the Loader's excel file.

    Args:
        vector_id (str): Unique id of the vector in the file.

    Returns:
        NDArray: Numpy array with values.

    """
    if self._data is None:
        self._data = pd.DataFrame()
    if vector_id not in self._data.columns:
        is_horizontal = self._is_horizontal_format()
        column_filter = [vector_id]
        usecols = None
        if not is_horizontal:
            usecols = column_filter

        values_df = pd.read_excel(self.get_source(), sheet_name=self._DATA_SHEET, usecols=usecols)

        if is_horizontal:  # Convert the table to large time series format
            values_df = self._process_horizontal_format(values_df)
            values_df = self._enforce_dtypes(values_df, is_horizontal)
            self._data = values_df
        else:
            values_df = self._enforce_dtypes(values_df, is_horizontal)
            self._data[vector_id] = values_df
    return self._data[vector_id].to_numpy()
NVEH5TimeVectorLoader

Bases: NVETimeVectorLoader

Class for loading time vector data from NVE HDF5 file sources.

Meant for large time vectors (e.g. hourly data over multiple years). Supports differing lengths and metadata of vectors stored in the file.

Specialized to the following format
  • index (h5py.Group, optional): Used to define indexes for vectors if index is supposed to only apply to that vector.
  • common_index (h5py.Dataset): Contains one numpy array for all vectors. This is a fallback index for vectors which have not defined their own index in the index group. Also used on purpose if many or all vectors have the same index.
  • metadata (h5py.Group): Used connect a specific set of metadata to a particular vector.
  • common_metadata (h5py.Group): Contains one set of metadata fields for all vectors. Used in a similar way as common_index.
  • vectors (h5py.Group): Contains numpy arrays containing the vector values connected to a unique ID. The same ID is used to connect the vector to an index or metadata.
Source code in framdata/loaders/time_vector_loaders.py
class NVEH5TimeVectorLoader(NVETimeVectorLoader):
    """
    Class for loading time vector data from NVE HDF5 file sources.

    Meant for large time vectors (e.g. hourly data over multiple years). Supports differing lengths and metadata of vectors stored in the file.

    Specialized to the following format:
        - index (h5py.Group, optional): Used to define indexes for vectors if index is supposed to only apply to that vector.
        - common_index (h5py.Dataset): Contains one numpy array for all vectors. This is a fallback index for vectors which have not defined their own index in
                                       the index group. Also used on purpose if many or all vectors have the same index.
        - metadata (h5py.Group): Used connect a specific set of metadata to a particular vector.
        - common_metadata (h5py.Group): Contains one set of metadata fields for all vectors. Used in a similar way as common_index.
        - vectors (h5py.Group): Contains numpy arrays containing the vector values connected to a unique ID. The same ID is used to connect the vector to an
                                index or metadata.

    """

    _SUPPORTED_SUFFIXES: ClassVar[list] = [".h5", ".hdf5"]

    def __init__(self, source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None:
        """
        Intitialize loader instance and connect it to a H5 file containing time vector data.

        Args:
            source (Path | str): Absolute Path to database or HDF5 file.
            require_whole_years (bool): Flag for validating that the time vectors in the source contain data for complete years.
            relative_loc (Path | str | None, optional): Path to HDF5 file relative to source. Defaults to None.
            validate (bool, optional): Whether to validate vectors after loading. NB! Loads all data into memory at once. Defaults to True.

        """
        super().__init__(source, require_whole_years, relative_loc)
        self._index: TimeIndex = None
        self._file_pointer = None

        if validate:
            self.validate_vectors()

    def get_values(self, vector_id: str) -> NDArray:
        """
        Get numpy array with all the values of a given vector in the Loader's HDF5 file.

        Args:
            vector_id (str): Unique id of the vector in the file.

        Returns:
            NDArray: Numpy array with values.

        """
        if self._data is None:
            self._data = dict()
        if vector_id not in self._data:
            with h5py.File(self.get_source(), mode="r") as h5f:
                self._data[vector_id] = self._read_vector_field(h5f, H5Names.VECTORS_GROUP, vector_id, field_type=h5py.Dataset, use_fallback=False)[()]
        return self._data[vector_id]

    def get_index(self, vector_id: str) -> TimeIndex:
        """
        Get the TimeIndex describing the time dimension of the vectors in the file.

        Args:
            vector_id (str): Not used since all vectors in the NVE parquet files have the same index.

        Returns:
            TimeIndex: TimeIndex object describing the parquet file's index.

        """
        if self._index is None:
            meta = self.get_metadata("")

            if TvMn.FREQUENCY not in meta or (TvMn.FREQUENCY in meta and meta[TvMn.FREQUENCY] is None):
                self._index = self._create_index(
                    datetimes=self._read_index(vector_id),
                    is_52_week_years=meta[TvMn.IS_52_WEEK_YEARS],
                    extrapolate_first_point=meta[TvMn.EXTRAPOLATE_FISRT_POINT],
                    extrapolate_last_point=meta[TvMn.EXTRAPOLATE_LAST_POINT],
                )
                return self._index
            index_array = self._read_index(vector_id) if meta[TvMn.START] is None or meta[TvMn.NUM_POINTS] is None else None
            start = meta[TvMn.START] if index_array is None else index_array[0].item()
            num_points = meta[TvMn.NUM_POINTS] if index_array is None else index_array.size

            self._index = FixedFrequencyTimeIndex(
                start,
                meta[TvMn.FREQUENCY],
                num_points,
                is_52_week_years=meta[TvMn.IS_52_WEEK_YEARS],
                extrapolate_first_point=meta[TvMn.EXTRAPOLATE_FISRT_POINT],
                extrapolate_last_point=meta[TvMn.EXTRAPOLATE_LAST_POINT],
            )

        return self._index

    def _read_index(self, vector_id: str) -> NDArray[np.datetime64]:
        with h5py.File(self.get_source(), mode="r") as h5f:
            decoded_index = np.char.decode(self._read_vector_field(h5f, H5Names.INDEX_GROUP, vector_id, h5py.Dataset)[()].astype(np.bytes_), encoding="utf-8")
            return decoded_index.astype(np.datetime64)

    def _read_vector_field(
        self,
        h5file: h5py.File,
        field_name: str,
        vector_id: str,
        field_type: type[h5py.Dataset | h5py.Group],
        use_fallback: bool = True,
    ) -> h5py.Dataset | h5py.Group:
        error = ""
        if field_name in h5file:  # check if group_name exists
            main_group = h5file[field_name]
            if not isinstance(main_group, h5py.Group):
                message = f"{self} expected '{field_name}' to be a {h5py.Group} in {h5file}. Got {type(main_group)}."
                raise TypeError(message)

            if vector_id in main_group:
                vector_field = main_group[vector_id]
                if not isinstance(vector_field, field_type):
                    message = f"{self} expected '{vector_id}' to be a {field_type} in {h5file}. Got {type(vector_field)}"
                    raise TypeError(message)
                return vector_field
            error = f"'{vector_id}' was not found in '{field_name}' group"
        else:
            error = f"'{field_name}' was not found in file"

        no_fallback_message = f"{self} expected '{vector_id}' in {h5py.Group} '{field_name}' "
        if not use_fallback:
            no_fallback_message += f"but {error}."
            raise KeyError(no_fallback_message)

        fallback_name = H5Names.COMMON_PREFIX + field_name
        if fallback_name in h5file:  # check if common_ + group_name exists
            fallback_field = h5file[fallback_name]
            if not isinstance(fallback_field, field_type):
                message = f"{self} expected '{fallback_field}' to be a {field_type} in {h5file}. Got {type(fallback_field)}."
                raise TypeError(message)
            return fallback_field

        message = (
            no_fallback_message
            + f"or a fallback {field_type} '{fallback_name}' in H5 file but "
            + f"{error},"
            + f" and fallback {field_type} '{fallback_name}' not found in file."
        )
        raise KeyError(message)

    def get_metadata(self, vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]:
        """
        Retrieve and decodes custom metadata from parquet file.

        Args:
            vector_id (str): Not used

        Raises:
            KeyError: If any of the expected metadata keys is not found in file.

        Returns:
            dict: Dictionary with decoded metadata.

        """
        if self._meta is None:
            errors = set()
            meta = {}
            with h5py.File(self.get_source(), mode="r") as h5f:
                meta_group = self._read_vector_field(h5f, H5Names.METADATA_GROUP, vector_id, h5py.Group)
                for k, m in meta_group.items():
                    if isinstance(m, h5py.Dataset):
                        meta[k] = m[()]
                    else:
                        errors.add(f"Improper metadata format: Metadata key {k} exists but is a h5 group when it should be a h5 dataset.")
            self._report_errors(errors)
            self._meta = self._process_meta(meta)
        return self._meta

    def _get_ids(self) -> list[str]:
        with h5py.File(self.get_source(), mode="r") as h5f:
            if H5Names.VECTORS_GROUP in h5f:
                return list(h5f[H5Names.VECTORS_GROUP].keys())
            message = f"{self} required key '{H5Names.VECTORS_GROUP}' was not found in file."
            raise KeyError(message)

    def clear_cache(self) -> None:
        """Clear cached data."""
        self._data = None
        self._meta = None
        self._index = None
__init__(source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None

Intitialize loader instance and connect it to a H5 file containing time vector data.

Parameters:

Name Type Description Default
source Path | str

Absolute Path to database or HDF5 file.

required
require_whole_years bool

Flag for validating that the time vectors in the source contain data for complete years.

required
relative_loc Path | str | None

Path to HDF5 file relative to source. Defaults to None.

None
validate bool

Whether to validate vectors after loading. NB! Loads all data into memory at once. Defaults to True.

True
Source code in framdata/loaders/time_vector_loaders.py
def __init__(self, source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None:
    """
    Intitialize loader instance and connect it to a H5 file containing time vector data.

    Args:
        source (Path | str): Absolute Path to database or HDF5 file.
        require_whole_years (bool): Flag for validating that the time vectors in the source contain data for complete years.
        relative_loc (Path | str | None, optional): Path to HDF5 file relative to source. Defaults to None.
        validate (bool, optional): Whether to validate vectors after loading. NB! Loads all data into memory at once. Defaults to True.

    """
    super().__init__(source, require_whole_years, relative_loc)
    self._index: TimeIndex = None
    self._file_pointer = None

    if validate:
        self.validate_vectors()
clear_cache() -> None

Clear cached data.

Source code in framdata/loaders/time_vector_loaders.py
def clear_cache(self) -> None:
    """Clear cached data."""
    self._data = None
    self._meta = None
    self._index = None
get_index(vector_id: str) -> TimeIndex

Get the TimeIndex describing the time dimension of the vectors in the file.

Parameters:

Name Type Description Default
vector_id str

Not used since all vectors in the NVE parquet files have the same index.

required

Returns:

Name Type Description
TimeIndex TimeIndex

TimeIndex object describing the parquet file's index.

Source code in framdata/loaders/time_vector_loaders.py
def get_index(self, vector_id: str) -> TimeIndex:
    """
    Get the TimeIndex describing the time dimension of the vectors in the file.

    Args:
        vector_id (str): Not used since all vectors in the NVE parquet files have the same index.

    Returns:
        TimeIndex: TimeIndex object describing the parquet file's index.

    """
    if self._index is None:
        meta = self.get_metadata("")

        if TvMn.FREQUENCY not in meta or (TvMn.FREQUENCY in meta and meta[TvMn.FREQUENCY] is None):
            self._index = self._create_index(
                datetimes=self._read_index(vector_id),
                is_52_week_years=meta[TvMn.IS_52_WEEK_YEARS],
                extrapolate_first_point=meta[TvMn.EXTRAPOLATE_FISRT_POINT],
                extrapolate_last_point=meta[TvMn.EXTRAPOLATE_LAST_POINT],
            )
            return self._index
        index_array = self._read_index(vector_id) if meta[TvMn.START] is None or meta[TvMn.NUM_POINTS] is None else None
        start = meta[TvMn.START] if index_array is None else index_array[0].item()
        num_points = meta[TvMn.NUM_POINTS] if index_array is None else index_array.size

        self._index = FixedFrequencyTimeIndex(
            start,
            meta[TvMn.FREQUENCY],
            num_points,
            is_52_week_years=meta[TvMn.IS_52_WEEK_YEARS],
            extrapolate_first_point=meta[TvMn.EXTRAPOLATE_FISRT_POINT],
            extrapolate_last_point=meta[TvMn.EXTRAPOLATE_LAST_POINT],
        )

    return self._index
get_metadata(vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]

Retrieve and decodes custom metadata from parquet file.

Parameters:

Name Type Description Default
vector_id str

Not used

required

Raises:

Type Description
KeyError

If any of the expected metadata keys is not found in file.

Returns:

Name Type Description
dict dict[str, bool | int | str | datetime | timedelta | tzinfo | None]

Dictionary with decoded metadata.

Source code in framdata/loaders/time_vector_loaders.py
def get_metadata(self, vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]:
    """
    Retrieve and decodes custom metadata from parquet file.

    Args:
        vector_id (str): Not used

    Raises:
        KeyError: If any of the expected metadata keys is not found in file.

    Returns:
        dict: Dictionary with decoded metadata.

    """
    if self._meta is None:
        errors = set()
        meta = {}
        with h5py.File(self.get_source(), mode="r") as h5f:
            meta_group = self._read_vector_field(h5f, H5Names.METADATA_GROUP, vector_id, h5py.Group)
            for k, m in meta_group.items():
                if isinstance(m, h5py.Dataset):
                    meta[k] = m[()]
                else:
                    errors.add(f"Improper metadata format: Metadata key {k} exists but is a h5 group when it should be a h5 dataset.")
        self._report_errors(errors)
        self._meta = self._process_meta(meta)
    return self._meta
get_values(vector_id: str) -> NDArray

Get numpy array with all the values of a given vector in the Loader's HDF5 file.

Parameters:

Name Type Description Default
vector_id str

Unique id of the vector in the file.

required

Returns:

Name Type Description
NDArray NDArray

Numpy array with values.

Source code in framdata/loaders/time_vector_loaders.py
def get_values(self, vector_id: str) -> NDArray:
    """
    Get numpy array with all the values of a given vector in the Loader's HDF5 file.

    Args:
        vector_id (str): Unique id of the vector in the file.

    Returns:
        NDArray: Numpy array with values.

    """
    if self._data is None:
        self._data = dict()
    if vector_id not in self._data:
        with h5py.File(self.get_source(), mode="r") as h5f:
            self._data[vector_id] = self._read_vector_field(h5f, H5Names.VECTORS_GROUP, vector_id, field_type=h5py.Dataset, use_fallback=False)[()]
    return self._data[vector_id]
NVEParquetTimeVectorLoader

Bases: NVETimeVectorLoader

Class for loading time vector data from NVE parquet file sources.

Meant for large time vectors. All vectors in the file must have the same lenghts and metadata. Supports format: - 'Vertical' with one index collumn (DateTime) and the others containing vector values.

Source code in framdata/loaders/time_vector_loaders.py
class NVEParquetTimeVectorLoader(NVETimeVectorLoader):
    """
    Class for loading time vector data from NVE parquet file sources.

    Meant for large time vectors. All vectors in the file must have the same lenghts and metadata.
    Supports format:
        - 'Vertical' with one index collumn (DateTime) and the others containing vector values.

    """

    _SUPPORTED_SUFFIXES: ClassVar[list] = [".parquet"]

    def __init__(self, source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None:
        """
        Intitialize loader instance and connect it to an Parquet file containing time vector data.

        Args:
            source (Path | str): Absolute Path to database or parquet file.
            require_whole_years (bool): Flag for validating that the time vectors in the source contain data for complete years.
            relative_loc (Path | str | None, optional): Path to parquet file relative to source. Defaults to None.
            validate (bool, optional): Flag to turn on validation of timevectors. NB! Loads all data into memory at once. Defaults to True.

        """
        super().__init__(source, require_whole_years, relative_loc)
        self._index: TimeIndex = None
        if validate:
            self.validate_vectors()

    def get_values(self, vector_id: str) -> NDArray:
        """
        Get numpy array with all the values of a given vector in the Loader's parquet file.

        Args:
            vector_id (str): Unique id of the vector in the file.

        Returns:
            NDArray: Numpy array with values.

        """
        if self._data is None:
            self._data = dict()
        if vector_id not in self._data:
            table = pq.read_table(self.get_source(), columns=[vector_id])
            self._data[vector_id] = table[vector_id].to_numpy()
        # if self._data is None:
        #     self._data = pq.read_table(self.get_source())
        return self._data[vector_id]  # .to_numpy()

    def get_index(self, vector_id: str) -> TimeIndex:  # Could be more types of indexes?
        """
        Get the TimeIndex describing the time dimension of the vectors in the file.

        Args:
            vector_id (str): Not used since all vectors in the NVE parquet files have the same index.

        Returns:
            TimeIndex: TimeIndex object describing the parquet file's index.

        """
        if self._index is None:
            meta = self.get_metadata("")

            if TvMn.FREQUENCY not in meta or (TvMn.FREQUENCY in meta and meta[TvMn.FREQUENCY] is None):
                datetime_index = pd.DatetimeIndex(
                    pd.read_parquet(self.get_source(), columns=[TvMn.DATETIME_COL])[TvMn.DATETIME_COL],
                    tz=meta[TvMn.TIMEZONE],
                ).tolist()
                self._index = self._create_index(
                    datetimes=datetime_index,
                    is_52_week_years=meta[TvMn.IS_52_WEEK_YEARS],
                    extrapolate_first_point=meta[TvMn.EXTRAPOLATE_FISRT_POINT],
                    extrapolate_last_point=meta[TvMn.EXTRAPOLATE_LAST_POINT],
                )
                return self._index

            parquet_file = None
            if TvMn.START not in meta or (TvMn.START in meta and meta[TvMn.START] is None):
                parquet_file = pq.ParquetFile(self.get_source())
                start = pd.to_datetime(next(parquet_file.iter_batches(batch_size=1, columns=[TvMn.DATETIME_COL])))
            else:
                start = meta[TvMn.START]

            if TvMn.NUM_POINTS not in meta or (TvMn.NUM_POINTS in meta and meta[TvMn.NUM_POINTS] is None):
                if parquet_file is None:
                    parquet_file = pq.ParquetFile(self.get_source())
                num_points = parquet_file.metadata.num_rows
            else:
                num_points = meta[TvMn.NUM_POINTS]
            self._index = FixedFrequencyTimeIndex(
                start,
                meta[TvMn.FREQUENCY],
                num_points,
                is_52_week_years=meta[TvMn.IS_52_WEEK_YEARS],
                extrapolate_first_point=meta[TvMn.EXTRAPOLATE_FISRT_POINT],
                extrapolate_last_point=meta[TvMn.EXTRAPOLATE_LAST_POINT],
            )

        return self._index

    def get_metadata(self, vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]:
        """
        Retrieve and decodes custom metadata from parquet file.

        Args:
            vector_id (str): Not used

        Raises:
            KeyError: If any of the expected metadata keys is not found in file.

        Returns:
            dict: Dictionary with decoded metadata.

        """
        if self._meta is None:
            path = self.get_source()
            raw_meta = pq.ParquetFile(path).schema_arrow.metadata

            self._meta = self._process_meta(raw_meta)
        return self._meta

    def _get_ids(self) -> list[str]:
        parquet_file = pq.ParquetFile(self.get_source())
        time_vector_ids: list[str] = parquet_file.schema_arrow.names
        time_vector_ids.remove(TvMn.DATETIME_COL)
        return time_vector_ids

    def clear_cache(self) -> None:
        """Clear cached data."""
        self._data = None
        self._meta = None
        self._index = None
__init__(source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None

Intitialize loader instance and connect it to an Parquet file containing time vector data.

Parameters:

Name Type Description Default
source Path | str

Absolute Path to database or parquet file.

required
require_whole_years bool

Flag for validating that the time vectors in the source contain data for complete years.

required
relative_loc Path | str | None

Path to parquet file relative to source. Defaults to None.

None
validate bool

Flag to turn on validation of timevectors. NB! Loads all data into memory at once. Defaults to True.

True
Source code in framdata/loaders/time_vector_loaders.py
def __init__(self, source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None:
    """
    Intitialize loader instance and connect it to an Parquet file containing time vector data.

    Args:
        source (Path | str): Absolute Path to database or parquet file.
        require_whole_years (bool): Flag for validating that the time vectors in the source contain data for complete years.
        relative_loc (Path | str | None, optional): Path to parquet file relative to source. Defaults to None.
        validate (bool, optional): Flag to turn on validation of timevectors. NB! Loads all data into memory at once. Defaults to True.

    """
    super().__init__(source, require_whole_years, relative_loc)
    self._index: TimeIndex = None
    if validate:
        self.validate_vectors()
clear_cache() -> None

Clear cached data.

Source code in framdata/loaders/time_vector_loaders.py
def clear_cache(self) -> None:
    """Clear cached data."""
    self._data = None
    self._meta = None
    self._index = None
get_index(vector_id: str) -> TimeIndex

Get the TimeIndex describing the time dimension of the vectors in the file.

Parameters:

Name Type Description Default
vector_id str

Not used since all vectors in the NVE parquet files have the same index.

required

Returns:

Name Type Description
TimeIndex TimeIndex

TimeIndex object describing the parquet file's index.

Source code in framdata/loaders/time_vector_loaders.py
def get_index(self, vector_id: str) -> TimeIndex:  # Could be more types of indexes?
    """
    Get the TimeIndex describing the time dimension of the vectors in the file.

    Args:
        vector_id (str): Not used since all vectors in the NVE parquet files have the same index.

    Returns:
        TimeIndex: TimeIndex object describing the parquet file's index.

    """
    if self._index is None:
        meta = self.get_metadata("")

        if TvMn.FREQUENCY not in meta or (TvMn.FREQUENCY in meta and meta[TvMn.FREQUENCY] is None):
            datetime_index = pd.DatetimeIndex(
                pd.read_parquet(self.get_source(), columns=[TvMn.DATETIME_COL])[TvMn.DATETIME_COL],
                tz=meta[TvMn.TIMEZONE],
            ).tolist()
            self._index = self._create_index(
                datetimes=datetime_index,
                is_52_week_years=meta[TvMn.IS_52_WEEK_YEARS],
                extrapolate_first_point=meta[TvMn.EXTRAPOLATE_FISRT_POINT],
                extrapolate_last_point=meta[TvMn.EXTRAPOLATE_LAST_POINT],
            )
            return self._index

        parquet_file = None
        if TvMn.START not in meta or (TvMn.START in meta and meta[TvMn.START] is None):
            parquet_file = pq.ParquetFile(self.get_source())
            start = pd.to_datetime(next(parquet_file.iter_batches(batch_size=1, columns=[TvMn.DATETIME_COL])))
        else:
            start = meta[TvMn.START]

        if TvMn.NUM_POINTS not in meta or (TvMn.NUM_POINTS in meta and meta[TvMn.NUM_POINTS] is None):
            if parquet_file is None:
                parquet_file = pq.ParquetFile(self.get_source())
            num_points = parquet_file.metadata.num_rows
        else:
            num_points = meta[TvMn.NUM_POINTS]
        self._index = FixedFrequencyTimeIndex(
            start,
            meta[TvMn.FREQUENCY],
            num_points,
            is_52_week_years=meta[TvMn.IS_52_WEEK_YEARS],
            extrapolate_first_point=meta[TvMn.EXTRAPOLATE_FISRT_POINT],
            extrapolate_last_point=meta[TvMn.EXTRAPOLATE_LAST_POINT],
        )

    return self._index
get_metadata(vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]

Retrieve and decodes custom metadata from parquet file.

Parameters:

Name Type Description Default
vector_id str

Not used

required

Raises:

Type Description
KeyError

If any of the expected metadata keys is not found in file.

Returns:

Name Type Description
dict dict[str, bool | int | str | datetime | timedelta | tzinfo | None]

Dictionary with decoded metadata.

Source code in framdata/loaders/time_vector_loaders.py
def get_metadata(self, vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]:
    """
    Retrieve and decodes custom metadata from parquet file.

    Args:
        vector_id (str): Not used

    Raises:
        KeyError: If any of the expected metadata keys is not found in file.

    Returns:
        dict: Dictionary with decoded metadata.

    """
    if self._meta is None:
        path = self.get_source()
        raw_meta = pq.ParquetFile(path).schema_arrow.metadata

        self._meta = self._process_meta(raw_meta)
    return self._meta
get_values(vector_id: str) -> NDArray

Get numpy array with all the values of a given vector in the Loader's parquet file.

Parameters:

Name Type Description Default
vector_id str

Unique id of the vector in the file.

required

Returns:

Name Type Description
NDArray NDArray

Numpy array with values.

Source code in framdata/loaders/time_vector_loaders.py
def get_values(self, vector_id: str) -> NDArray:
    """
    Get numpy array with all the values of a given vector in the Loader's parquet file.

    Args:
        vector_id (str): Unique id of the vector in the file.

    Returns:
        NDArray: Numpy array with values.

    """
    if self._data is None:
        self._data = dict()
    if vector_id not in self._data:
        table = pq.read_table(self.get_source(), columns=[vector_id])
        self._data[vector_id] = table[vector_id].to_numpy()
    # if self._data is None:
    #     self._data = pq.read_table(self.get_source())
    return self._data[vector_id]  # .to_numpy()
NVEYamlTimeVectoroader

Bases: NVETimeVectorLoader

Class for loading time vector data from NVE YAML file sources.

Meant for very sparse time vector data, where the vectors have varying lengths and indexes. Currently all vectors must have the same metadata within each file. Supported format: - Metadata: field containing dictionary with metadata for all vectors. - Other fields are vector IDs with lists for x and y axes.

Source code in framdata/loaders/time_vector_loaders.py
class NVEYamlTimeVectoroader(NVETimeVectorLoader):
    """
    Class for loading time vector data from NVE YAML file sources.

    Meant for very sparse time vector data, where the vectors have varying lengths and indexes. Currently all vectors must have the same metadata within each
    file.
    Supported format:
        - Metadata: field containing dictionary with metadata for all vectors.
        - Other fields are vector IDs with lists for x and y axes.

    """

    _SUPPORTED_SUFFIXES: ClassVar[list] = [".yaml", ".yml"]

    def __init__(self, source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None:
        """
        Intitialize loader instance and connect it to an Yaml file containing time vector data.

        Args:
            source (Path | str): Absolute Path to database or excel file.
            require_whole_years (bool): Flag for validating that the time vectors in the source contain data for complete years.
            relative_loc (Path | str | None, optional): Path to excel file relative to source. Defaults to None.
            validate (bool, optional): Flag to turn on validation of timevectors. NB! Loads all data into memory at once. Defaults to True.

        """
        super().__init__(source, require_whole_years, relative_loc)
        self._content_ids: list[str] = None

        self._values_label: str = None
        self._index_label: str = None

        if validate:
            self.validate_vectors()

    def get_values(self, vector_id: str) -> NDArray:
        """
        Get values of vector.

        Args:
            vector_id (str): Unique id of the curve in the Loader source.

        Returns:
            NDArray: Numpy array with values of vector.

        """
        if self._data is None:
            self._parse_file()
        values_list = self._data[vector_id][self._values_label]
        if len(values_list) == 0:
            message = f"Time vector {vector_id} in {self} contains no points."
            raise ValueError(message)
        return np.asarray(values_list)

    def get_index(self, vector_id: str) -> TimeIndex:
        """
        Get index of vector.

        Args:
            vector_id (str): Unique id of the curve in the Loader source.

        Returns:
            NDArray: Numpy array with index of vector.

        """
        meta = self.get_metadata(vector_id)  # also parses data
        try:
            datetime_list = [self._date_to_datetime(index_val) for index_val in self._data[vector_id][self._index_label]]
        except ValueError as e:
            message = f"{self} got non date or none datetime values in index field of vector {vector_id}."
            raise ValueError(message) from e

        if len(datetime_list) == 0:
            message = f"Index of {vector_id} in {self} contains no points."
            raise ValueError(message)

        if (len(datetime_list) == 1 or self.get_values(vector_id).size == 1) and meta[TvMn.EXTRAPOLATE_FISRT_POINT] and meta[TvMn.EXTRAPOLATE_LAST_POINT]:
            # Even though _create_index can now handle ConstantTimeIndexes,
            # we need to consider that YAML time vectors can have the extra end date for its final period stored in its index.
            # That would lead to _create_time_index not creating a constant one when it should.
            # We may remove this feature in the future.
            return ConstantTimeIndex()

        args = (
            datetime_list,
            meta[TvMn.IS_52_WEEK_YEARS],
            meta[TvMn.EXTRAPOLATE_FISRT_POINT],
            meta[TvMn.EXTRAPOLATE_LAST_POINT],
        )

        if len(datetime_list) == len(self.get_values(vector_id)) + 1:
            return ListTimeIndex(*args)
        # create index with added end datetime
        return self._create_index(*args)

    def get_metadata(self, vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]:
        """
        Read YAML file metadata.

        Args:
            vector_id (str): Not used.

        Raises:
            KeyError: If an expected metadata key is missing.

        Returns:
            dict[str, bool|int|str|datetime|timedelta|tzinfo|None]: Metadata dictionary.

        """
        if self._meta is None:
            raw_meta = self._data[YamlNames.metadata_field][YamlNames.x_field]

            self._meta = self._process_meta(raw_meta)
        return self._meta

    def _get_ids(self) -> list[str]:
        if self._content_ids is None:
            if self._data is None:
                self._parse_file()
            ids_list = list(self._data.keys())
            ids_list.remove(YamlNames.metadata_field)
            self._content_ids = ids_list
        return self._content_ids

    def _parse_file(self) -> None:
        with self.get_source().open(encoding=YamlNames.encoding) as f:
            d = yaml.safe_load(f)
            self._x_meta = d[YamlNames.metadata_field][YamlNames.x_field]
            self._y_meta = d[YamlNames.metadata_field][YamlNames.y_field]

            self._values_label = self._x_meta[YamlNames.attribute]
            self._index_label = self._y_meta[YamlNames.attribute]

            self._data = d

    def _date_to_datetime(self, value: date | datetime) -> datetime:
        if isinstance(value, date):
            value = datetime(value.year, value.month, value.day)
        elif not isinstance(value, datetime):
            message = "Value must be date or datetime."
            raise ValueError(message)
        return value

    def clear_cache(self) -> None:
        """Clear cached data."""
        self._data = None
        self._meta = None

        self._content_ids = None

        self._values_label = None
        self._index_label = None
__init__(source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None

Intitialize loader instance and connect it to an Yaml file containing time vector data.

Parameters:

Name Type Description Default
source Path | str

Absolute Path to database or excel file.

required
require_whole_years bool

Flag for validating that the time vectors in the source contain data for complete years.

required
relative_loc Path | str | None

Path to excel file relative to source. Defaults to None.

None
validate bool

Flag to turn on validation of timevectors. NB! Loads all data into memory at once. Defaults to True.

True
Source code in framdata/loaders/time_vector_loaders.py
def __init__(self, source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None:
    """
    Intitialize loader instance and connect it to an Yaml file containing time vector data.

    Args:
        source (Path | str): Absolute Path to database or excel file.
        require_whole_years (bool): Flag for validating that the time vectors in the source contain data for complete years.
        relative_loc (Path | str | None, optional): Path to excel file relative to source. Defaults to None.
        validate (bool, optional): Flag to turn on validation of timevectors. NB! Loads all data into memory at once. Defaults to True.

    """
    super().__init__(source, require_whole_years, relative_loc)
    self._content_ids: list[str] = None

    self._values_label: str = None
    self._index_label: str = None

    if validate:
        self.validate_vectors()
clear_cache() -> None

Clear cached data.

Source code in framdata/loaders/time_vector_loaders.py
def clear_cache(self) -> None:
    """Clear cached data."""
    self._data = None
    self._meta = None

    self._content_ids = None

    self._values_label = None
    self._index_label = None
get_index(vector_id: str) -> TimeIndex

Get index of vector.

Parameters:

Name Type Description Default
vector_id str

Unique id of the curve in the Loader source.

required

Returns:

Name Type Description
NDArray TimeIndex

Numpy array with index of vector.

Source code in framdata/loaders/time_vector_loaders.py
def get_index(self, vector_id: str) -> TimeIndex:
    """
    Get index of vector.

    Args:
        vector_id (str): Unique id of the curve in the Loader source.

    Returns:
        NDArray: Numpy array with index of vector.

    """
    meta = self.get_metadata(vector_id)  # also parses data
    try:
        datetime_list = [self._date_to_datetime(index_val) for index_val in self._data[vector_id][self._index_label]]
    except ValueError as e:
        message = f"{self} got non date or none datetime values in index field of vector {vector_id}."
        raise ValueError(message) from e

    if len(datetime_list) == 0:
        message = f"Index of {vector_id} in {self} contains no points."
        raise ValueError(message)

    if (len(datetime_list) == 1 or self.get_values(vector_id).size == 1) and meta[TvMn.EXTRAPOLATE_FISRT_POINT] and meta[TvMn.EXTRAPOLATE_LAST_POINT]:
        # Even though _create_index can now handle ConstantTimeIndexes,
        # we need to consider that YAML time vectors can have the extra end date for its final period stored in its index.
        # That would lead to _create_time_index not creating a constant one when it should.
        # We may remove this feature in the future.
        return ConstantTimeIndex()

    args = (
        datetime_list,
        meta[TvMn.IS_52_WEEK_YEARS],
        meta[TvMn.EXTRAPOLATE_FISRT_POINT],
        meta[TvMn.EXTRAPOLATE_LAST_POINT],
    )

    if len(datetime_list) == len(self.get_values(vector_id)) + 1:
        return ListTimeIndex(*args)
    # create index with added end datetime
    return self._create_index(*args)
get_metadata(vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]

Read YAML file metadata.

Parameters:

Name Type Description Default
vector_id str

Not used.

required

Raises:

Type Description
KeyError

If an expected metadata key is missing.

Returns:

Type Description
dict[str, bool | int | str | datetime | timedelta | tzinfo | None]

dict[str, bool|int|str|datetime|timedelta|tzinfo|None]: Metadata dictionary.

Source code in framdata/loaders/time_vector_loaders.py
def get_metadata(self, vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]:
    """
    Read YAML file metadata.

    Args:
        vector_id (str): Not used.

    Raises:
        KeyError: If an expected metadata key is missing.

    Returns:
        dict[str, bool|int|str|datetime|timedelta|tzinfo|None]: Metadata dictionary.

    """
    if self._meta is None:
        raw_meta = self._data[YamlNames.metadata_field][YamlNames.x_field]

        self._meta = self._process_meta(raw_meta)
    return self._meta
get_values(vector_id: str) -> NDArray

Get values of vector.

Parameters:

Name Type Description Default
vector_id str

Unique id of the curve in the Loader source.

required

Returns:

Name Type Description
NDArray NDArray

Numpy array with values of vector.

Source code in framdata/loaders/time_vector_loaders.py
def get_values(self, vector_id: str) -> NDArray:
    """
    Get values of vector.

    Args:
        vector_id (str): Unique id of the curve in the Loader source.

    Returns:
        NDArray: Numpy array with values of vector.

    """
    if self._data is None:
        self._parse_file()
    values_list = self._data[vector_id][self._values_label]
    if len(values_list) == 0:
        message = f"Time vector {vector_id} in {self} contains no points."
        raise ValueError(message)
    return np.asarray(values_list)