Skip to content

Code Reference

framdata

database_names

DatabaseNames

Container for names and locations of files and folders in the NVE database.

DatabaseNames

Bases: Base

Define names of files and folders in the NVE database and map files to folders.

Source code in framdata/database_names/DatabaseNames.py
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
class DatabaseNames(Base):
    """Define names of files and folders in the NVE database and map files to folders."""

    # ---------- FILE EXTENSIONS ---------- #
    ext_excel = ".xlsx"
    ext_h5 = ".h5"
    ext_parquet = ".parquet"
    ext_yaml = ".yaml"

    # ---------- SHEETS ---------- #
    data_sheet = "Data"
    metadata_sheet = "Metadata"

    # ---------- SUFFIXES ---------- #
    capacity = ".capacity"
    prices = ".prices"
    profiles = ".profiles"
    curves = ".curves"

    # ---------- DATABASE FOLDERs MAP ---------- #
    db00 = "db00_nodes"
    db01 = "db01_nodes_profiles"
    db10 = "db10_wind_solar"
    db11 = "db11_wind_solar_profiles"
    db20 = "db20_hydropower"
    db21 = "db21_hydropower_profiles"
    db22 = "db22_hydropower_curves"
    db30 = "db30_thermal"
    db31 = "db31_thermal_profiles"
    db40 = "db40_demand"
    db41 = "db41_demand_profiles"
    db50 = "db50_transmission"
    db51 = "db51_transmission_profiles"

    db_folder_list: ClassVar[list] = [db00, db01, db10, db11, db20, db21, db22, db30, db31, db40, db41, db50, db51]

    # ---------- FILENAMES ---------- #
    # ==== NODES ====
    power_nodes = "Power.Nodes"
    power_nodes_prices = "Power.Nodes.prices"
    power_nodes_profiles = "Power.Nodes.profiles"

    fuel_nodes = "Fuel.Nodes"
    fuel_nodes_prices = "Fuel.Nodes.prices"
    fuel_nodes_profiles = "Fuel.Nodes.profiles"

    emission_nodes = "Emission.Nodes"
    emission_nodes_prices = "Emission.Nodes.prices"
    emission_nodes_profiles = "Emission.Nodes.profiles"

    # ==== THERMAL ====
    thermal_generators = "Thermal.Generators"
    thermal_generators_capacity = "Thermal.Generators.capacity"
    thermal_generators_profiles = "Thermal.Generators.profiles"

    # ==== HYDROPOWER ====
    # hydro attribute tables
    hydro_modules = "Hydropower.Modules"
    hydro_modules_volumecapacity = "Hydropower.Modules.VolumeCapacity"
    hydro_modules_enekv_global_derived = "Hydropower.Modules.enekv_global_derived"
    hydro_modules_reggrad_glob_derived = "Hydropower.Modules.reggrad_glob_derived"
    hydro_modules_reggrad_lok_derived = "Hydropower.Modules.reggrad_lok_derived"
    hydro_bypass = "Hydropower.Bypass"
    hydro_generators = "Hydropower.Generators"
    hydro_inflow = "Hydropower.Inflow"
    hydro_inflow_yearvolume = "Hydropower.Inflow.YearVolume"
    hydro_inflow_upstream_inflow_derived = "Hydropower.Inflow.upstream_inflow_derived"
    hydro_pumps = "Hydropower.Pumps"
    hydro_reservoirs = "Hydropower.Reservoirs"

    # hydro time series
    hydro_inflow_profiles = "Hydropower.Inflow.profiles"
    hydro_bypass_operationalbounds_restrictions = "Hydropower.Bypass.OperationalBounds.Restrictions"
    hydro_modules_operationalbounds_restrictions = "Hydropower.Modules.OperationalBounds.Restrictions"
    hydro_reservoirs_operationalbounds_restrictions = "Hydropower.Reservoirs.OperationalBounds.Restrictions"
    hydro_generators_energyeq_mid = "Hydropower.Generators.EnergyEq_mid"

    # hydro curves
    hydro_curves = "Hydropower.curves"
    hydro_pqcurves = "Hydropower.pqcurves"

    # ==== DEMAND ====
    demand_consumers = "Demand.Consumers"
    demand_consumers_capacity = "Demand.Consumers.capacity"
    demand_consumers_normalprices = "Demand.Consumers.normalprices"
    demand_consumers_profiles_weatheryears = "Demand.Consumers.profiles.weatheryears"
    demand_consumers_profiles_oneyear = "Demand.Consumers.profiles"

    # ==== WIND AND SOLAR ====
    wind_generators = "Wind.Generators"
    wind_generators_capacity = "Wind.Generators.capacity"
    wind_generators_profiles = "Wind.Generators.profiles"
    solar_generators = "Solar.Generators"
    solar_generators_capacity = "Solar.Generators.capacity"
    solar_generators_profiles = "Solar.Generators.profiles"

    # ==== Transmission ====
    transmission_grid = "Transmission.Grid"
    transmission_capacity = transmission_grid + ".capacity"
    transmission_loss = transmission_grid + ".loss"
    transmission_profiles = transmission_grid + ".profiles"

    # ---------- DATABASE FOLDER MAP ---------- #
    db_folder_map: ClassVar[dict[str, list[str]]] = {
        # ===: NODES ====,
        power_nodes: db00,
        fuel_nodes: db00,
        emission_nodes: db00,
        power_nodes_prices: db01,
        fuel_nodes_prices: db01,
        emission_nodes_prices: db01,
        power_nodes_profiles: db01,
        fuel_nodes_profiles: db01,
        emission_nodes_profiles: db01,
        # ===: HYDROPOWER ====,
        # hydro attribute tables
        hydro_modules: db20,
        hydro_modules_volumecapacity: db20,
        hydro_modules_enekv_global_derived: db20,
        hydro_modules_reggrad_glob_derived: db20,
        hydro_modules_reggrad_lok_derived: db20,
        hydro_bypass: db20,
        hydro_generators: db20,
        hydro_inflow: db20,
        hydro_inflow_yearvolume: db20,
        hydro_inflow_upstream_inflow_derived: db20,
        hydro_pumps: db20,
        hydro_reservoirs: db20,
        # hydro time series
        hydro_inflow_profiles: db21,
        hydro_bypass_operationalbounds_restrictions: db21,
        hydro_modules_operationalbounds_restrictions: db21,
        hydro_reservoirs_operationalbounds_restrictions: db21,
        hydro_generators_energyeq_mid: db21,
        # hydro curves
        hydro_curves: db22,
        hydro_pqcurves: db22,
        # ===: THERMAL ====,
        thermal_generators: db30,
        thermal_generators_capacity: db30,
        thermal_generators_profiles: db31,
        # ===: DEMAND ====,
        demand_consumers: db40,
        demand_consumers_capacity: db40,
        demand_consumers_normalprices: db40,
        demand_consumers_profiles_weatheryears: db41,
        demand_consumers_profiles_oneyear: db41,
        # ===: WIND AND SOLAR ====,
        wind_generators: db10,
        wind_generators_capacity: db11,
        wind_generators_profiles: db11,
        solar_generators: db10,
        solar_generators_capacity: db11,
        solar_generators_profiles: db11,
        # ==== Transmission ====
        transmission_grid: db50,
        transmission_capacity: db51,
        transmission_loss: db51,
        transmission_profiles: db51,
    }

    @classmethod
    def get_relative_folder_path(cls, file_id: str) -> Path:
        """
        Get the relative database folder path for a given file_id.

        The relative path consists of database folder and file name.

        Args:
            file_id (str): Identifier for the file to retrieve.

        Returns:
            Path: The database folder name.

        """
        try:
            return Path(cls.db_folder_map[file_id])
        except KeyError as e:
            message = f"File id '{file_id}' not found in database folder map."

            raise KeyError(message) from e

    @classmethod
    def get_file_name(cls, source: Path, db_folder: str, file_id: str) -> str | None:
        """
        Get the name of a file, with extension, from a file ID and a path.

        Args:
            source (Path): Root path of the database.
            db_folder (str): Database folder to look for the file in.
            file_id (str): ID of file, i.e the name of the file without extension.

        Raises:
            RuntimeError: If multiple files with the same ID but different extensions are found.

        Returns:
            str | None: File ID and extension combined. If file is not found, return None.

        """
        db_path = source / db_folder
        if not db_path.exists():
            message = f"The database folder {db_path} does not exist."
            raise FileNotFoundError(message)
        candidate_extentions = set()
        for file_path in db_path.iterdir():
            if file_path.is_file() and file_path.stem == file_id:
                candidate_extentions.add(file_path.suffix)
        if len(candidate_extentions) > 1:  # Multiple files of same ID. Ambiguous
            message = (
                f"Found multiple files with ID {file_id} (with different extensions: {candidate_extentions}) in database folder {db_path}."
                " File names must be unique."
            )
            raise RuntimeError(message)
        if len(candidate_extentions) == 0:  # No matching files.
            return None
            # message = f"Found no file with ID {file_id} in database folder {db_path}."
            # raise FileNotFoundError(message)

        (extension,) = candidate_extentions  # We have only one candidate, so we extract it.
        return file_id + extension
get_file_name(source: Path, db_folder: str, file_id: str) -> str | None classmethod

Get the name of a file, with extension, from a file ID and a path.

Parameters:

Name Type Description Default
source Path

Root path of the database.

required
db_folder str

Database folder to look for the file in.

required
file_id str

ID of file, i.e the name of the file without extension.

required

Raises:

Type Description
RuntimeError

If multiple files with the same ID but different extensions are found.

Returns:

Type Description
str | None

str | None: File ID and extension combined. If file is not found, return None.

Source code in framdata/database_names/DatabaseNames.py
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
@classmethod
def get_file_name(cls, source: Path, db_folder: str, file_id: str) -> str | None:
    """
    Get the name of a file, with extension, from a file ID and a path.

    Args:
        source (Path): Root path of the database.
        db_folder (str): Database folder to look for the file in.
        file_id (str): ID of file, i.e the name of the file without extension.

    Raises:
        RuntimeError: If multiple files with the same ID but different extensions are found.

    Returns:
        str | None: File ID and extension combined. If file is not found, return None.

    """
    db_path = source / db_folder
    if not db_path.exists():
        message = f"The database folder {db_path} does not exist."
        raise FileNotFoundError(message)
    candidate_extentions = set()
    for file_path in db_path.iterdir():
        if file_path.is_file() and file_path.stem == file_id:
            candidate_extentions.add(file_path.suffix)
    if len(candidate_extentions) > 1:  # Multiple files of same ID. Ambiguous
        message = (
            f"Found multiple files with ID {file_id} (with different extensions: {candidate_extentions}) in database folder {db_path}."
            " File names must be unique."
        )
        raise RuntimeError(message)
    if len(candidate_extentions) == 0:  # No matching files.
        return None
        # message = f"Found no file with ID {file_id} in database folder {db_path}."
        # raise FileNotFoundError(message)

    (extension,) = candidate_extentions  # We have only one candidate, so we extract it.
    return file_id + extension
get_relative_folder_path(file_id: str) -> Path classmethod

Get the relative database folder path for a given file_id.

The relative path consists of database folder and file name.

Parameters:

Name Type Description Default
file_id str

Identifier for the file to retrieve.

required

Returns:

Name Type Description
Path Path

The database folder name.

Source code in framdata/database_names/DatabaseNames.py
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
@classmethod
def get_relative_folder_path(cls, file_id: str) -> Path:
    """
    Get the relative database folder path for a given file_id.

    The relative path consists of database folder and file name.

    Args:
        file_id (str): Identifier for the file to retrieve.

    Returns:
        Path: The database folder name.

    """
    try:
        return Path(cls.db_folder_map[file_id])
    except KeyError as e:
        message = f"File id '{file_id}' not found in database folder map."

        raise KeyError(message) from e

DemandNames

Contains classes defining the demand table and validations.

DemandMetadataSchema

Bases: _AttributeMetadataSchema

Pandera DataFrameModel schema for metadata in the Demand.Consumers file.

Source code in framdata/database_names/DemandNames.py
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
class DemandMetadataSchema(_AttributeMetadataSchema):
    """Pandera DataFrameModel schema for metadata in the Demand.Consumers file."""

    @pa.dataframe_check
    @classmethod
    def check_unit_is_str_for_attributes(cls, df: pd.DataFrame) -> Series[bool]:
        """
        Check that the 'unit' value is a string for the row where 'attribute' is 'Capacity'.

        Args:
            df (Dataframe): DataFrame used to check value for "unit".

        Returns:
            Series[bool]: Series of boolean values detonating if each element has passed the check.

        """
        return check_unit_is_str_for_attributes(df, [DemandNames.capacity_col])
check_unit_is_str_for_attributes(df: pd.DataFrame) -> Series[bool] classmethod

Check that the 'unit' value is a string for the row where 'attribute' is 'Capacity'.

Parameters:

Name Type Description Default
df Dataframe

DataFrame used to check value for "unit".

required

Returns:

Type Description
Series[bool]

Series[bool]: Series of boolean values detonating if each element has passed the check.

Source code in framdata/database_names/DemandNames.py
305
306
307
308
309
310
311
312
313
314
315
316
317
318
@pa.dataframe_check
@classmethod
def check_unit_is_str_for_attributes(cls, df: pd.DataFrame) -> Series[bool]:
    """
    Check that the 'unit' value is a string for the row where 'attribute' is 'Capacity'.

    Args:
        df (Dataframe): DataFrame used to check value for "unit".

    Returns:
        Series[bool]: Series of boolean values detonating if each element has passed the check.

    """
    return check_unit_is_str_for_attributes(df, [DemandNames.capacity_col])
DemandNames

Bases: _BaseComponentsNames

Container class for describing the demand attribute table's names, structure, and convertion to Demand Component.

Source code in framdata/database_names/DemandNames.py
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
class DemandNames(_BaseComponentsNames):
    """Container class for describing the demand attribute table's names, structure, and convertion to Demand Component."""

    id_col = "ConsumerID"
    node_col = "PowerNode"
    reserve_price_col = "ReservePrice"
    price_elasticity_col = "PriceElasticity"
    min_price_col = "MinPriceLimit"
    max_price_col = "MaxPriceLimit"
    normal_price_col = "NormalPrice"
    capacity_profile_col = "CapacityProfile"
    temperature_profile_col = "TemperatureProfile"
    capacity_col = "Capacity"

    columns: ClassVar[list[str]] = [
        id_col,
        node_col,
        reserve_price_col,
        price_elasticity_col,
        min_price_col,
        max_price_col,
        normal_price_col,
        capacity_profile_col,
        temperature_profile_col,
        capacity_col,
    ]

    ref_columns: ClassVar[list[str]] = [
        node_col,
        reserve_price_col,
        price_elasticity_col,
        min_price_col,
        max_price_col,
        normal_price_col,
        capacity_profile_col,
        temperature_profile_col,
        capacity_col,
    ]

    @staticmethod
    def create_component(
        row: NDArray,
        indices: dict[str, int],
        meta_columns: set[str],
        meta_data: pd.DataFrame,
        attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
    ) -> dict[str, Demand]:
        """
        Create a Demand component from a table row in the Demand.Consumers file.

        Args:
            row (NDArray): Array containing the values of one table row, represeting one Demand object.
            indices (list[str, int]): Mapping of table's Column names to the array's indices.
            meta_columns (list[str]): Set of columns which defines memberships in meta groups for aggregation.
            meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
            attribute_objects (dict[str, tuple[object, dict[str, Meta]]], optional): NOT USED

        Returns:
            dict[str, Demand]: A dictionary with the consumer_id as key and the demand component as value.

        """
        elastic_demand_cols = [
            DemandNames.price_elasticity_col,
            DemandNames.min_price_col,
            DemandNames.max_price_col,
            DemandNames.normal_price_col,
        ]
        columns_to_parse = [
            DemandNames.reserve_price_col,
            DemandNames.capacity_profile_col,
            DemandNames.temperature_profile_col,
            DemandNames.capacity_col,
        ]
        columns_to_parse.extend(elastic_demand_cols)

        arg_user_code = DemandNames._parse_args(row, indices, columns_to_parse, meta_data)

        elastic_demand_values = [value for key, value in arg_user_code.items() if key in elastic_demand_cols]
        if all(value is not None for value in elastic_demand_values):
            elastic_demand = ElasticDemand(
                price_elasticity=Elasticity(level=arg_user_code[DemandNames.price_elasticity_col]),
                min_price=Price(level=arg_user_code[DemandNames.min_price_col]),
                normal_price=Price(level=arg_user_code[DemandNames.normal_price_col]),
                max_price=Price(level=arg_user_code[DemandNames.max_price_col]),
            )
            reserve_price = None
        elif arg_user_code[DemandNames.reserve_price_col] is not None:
            elastic_demand = None
            reserve_price = ReservePrice(level=arg_user_code[DemandNames.reserve_price_col])
        else:
            elastic_demand = None
            reserve_price = None
        demand = Demand(
            node=row[indices[DemandNames.node_col]],
            capacity=MaxFlowVolume(
                level=arg_user_code[DemandNames.capacity_col],
                profile=arg_user_code[DemandNames.capacity_profile_col],
            ),
            reserve_price=reserve_price,
            elastic_demand=elastic_demand,
            temperature_profile=arg_user_code[DemandNames.temperature_profile_col],
        )
        DemandNames._add_meta(demand, row, indices, meta_columns)

        return {row[indices[DemandNames.id_col]]: demand}

    @staticmethod
    def get_attribute_data_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for attribute data in the Demand.Consumers file.

        Returns:
            DemandSchema (pa.DataFrameModel): Pandera DataFrameModel schema for Demand attribute data.

        """
        return DemandSchema

    @staticmethod
    def get_metadata_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for metadata in the Demand.Consumers file.

        Returns:
            DemandMetadataSchema (pa.DataFrameModel): Pandera DataFrameModel schema for Demand metadata.

        """
        return DemandMetadataSchema

    @staticmethod
    def _get_unique_check_descriptions() -> dict[str, tuple[str, bool]]:
        """
        Retrieve a dictionary with descriptons of validation checks that are specific to the Demand schemas.

        Returns:
            dict[str, tuple[str, bool]]: A dictionary where:
                - Keys (str): The name of the validation check method.
                - Values (tuple[str, bool]):
                    - The first element (str) provides a concise and user-friendly description of the check. E.g. what
                      caused the validation error or what is required for the check to pass.
                    - The second element (bool) indicates whether the check is a warning (True) or an error (False).

        """
        return {
            "check_elastic_demand": ("Missing elastic demand value.", True),
        }

    @staticmethod
    def _format_unique_checks(errors: pd.DataFrame) -> pd.DataFrame:
        """
        Format the error DataFrame according to the validation checks that are specific to the Demand schemas.

        This method processes validation errors that come from a dataframe-level check on elastic demand columns in the
        attribute data schema. The default reporting on failed dataframe-level checks in Pandera's standard error
        reports DataFrame (errors) is not very user-friendly. It can contain uneccassary rows about columns that are not
        relevant to the check and will not include rows about the columns relevant to the check if those columns have
        missing values. This method removes uneccassary rows from the error dataframe and ensures that rows with
        information abot the elastic demand columns that fail the check are included.

        Args:
            errors (pd.DataFrame): DataFrame containing validation errors. Pandera's standard error reports DataFrame.

        Returns:
            pd.DataFrame: The updated error DataFrame with formatted rows for unique validation checks.

        """
        if "check_elastic_demand" in errors[DemandNames.COL_CHECK].to_numpy():
            check_rows = errors.loc[errors[DemandNames.COL_CHECK] == "check_elastic_demand"]
            errors = errors[~(errors[DemandNames.COL_CHECK] == "check_elastic_demand")]
            elastic_demand_columns = [
                DemandNames.price_elasticity_col,
                DemandNames.min_price_col,
                DemandNames.max_price_col,
                DemandNames.normal_price_col,
            ]
            check_description_str = check_rows[DemandNames.COL_CHECK_DESC].unique()[0]
            elastic_demand_rows = []
            for idx in check_rows[DemandNames.COL_IDX].unique():
                check_case = check_rows[check_rows[DemandNames.COL_IDX] == idx]
                for col in elastic_demand_columns:
                    if col not in list(check_case[DemandNames.COL_COLUMN].unique()):
                        elastic_demand_rows.append(
                            [
                                col,
                                "check_elastic_demand",
                                None,
                                idx,
                                check_description_str,
                                True,
                            ],
                        )
            errors = pd.concat([errors, pd.DataFrame(elastic_demand_rows, columns=errors.columns)], ignore_index=True)
        return errors
create_component(row: NDArray, indices: dict[str, int], meta_columns: set[str], meta_data: pd.DataFrame, attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None) -> dict[str, Demand] staticmethod

Create a Demand component from a table row in the Demand.Consumers file.

Parameters:

Name Type Description Default
row NDArray

Array containing the values of one table row, represeting one Demand object.

required
indices list[str, int]

Mapping of table's Column names to the array's indices.

required
meta_columns list[str]

Set of columns which defines memberships in meta groups for aggregation.

required
meta_data DataFrame

Dictionary containing at least unit of every column.

required
attribute_objects dict[str, tuple[object, dict[str, Meta]]]

NOT USED

None

Returns:

Type Description
dict[str, Demand]

dict[str, Demand]: A dictionary with the consumer_id as key and the demand component as value.

Source code in framdata/database_names/DemandNames.py
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
@staticmethod
def create_component(
    row: NDArray,
    indices: dict[str, int],
    meta_columns: set[str],
    meta_data: pd.DataFrame,
    attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
) -> dict[str, Demand]:
    """
    Create a Demand component from a table row in the Demand.Consumers file.

    Args:
        row (NDArray): Array containing the values of one table row, represeting one Demand object.
        indices (list[str, int]): Mapping of table's Column names to the array's indices.
        meta_columns (list[str]): Set of columns which defines memberships in meta groups for aggregation.
        meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
        attribute_objects (dict[str, tuple[object, dict[str, Meta]]], optional): NOT USED

    Returns:
        dict[str, Demand]: A dictionary with the consumer_id as key and the demand component as value.

    """
    elastic_demand_cols = [
        DemandNames.price_elasticity_col,
        DemandNames.min_price_col,
        DemandNames.max_price_col,
        DemandNames.normal_price_col,
    ]
    columns_to_parse = [
        DemandNames.reserve_price_col,
        DemandNames.capacity_profile_col,
        DemandNames.temperature_profile_col,
        DemandNames.capacity_col,
    ]
    columns_to_parse.extend(elastic_demand_cols)

    arg_user_code = DemandNames._parse_args(row, indices, columns_to_parse, meta_data)

    elastic_demand_values = [value for key, value in arg_user_code.items() if key in elastic_demand_cols]
    if all(value is not None for value in elastic_demand_values):
        elastic_demand = ElasticDemand(
            price_elasticity=Elasticity(level=arg_user_code[DemandNames.price_elasticity_col]),
            min_price=Price(level=arg_user_code[DemandNames.min_price_col]),
            normal_price=Price(level=arg_user_code[DemandNames.normal_price_col]),
            max_price=Price(level=arg_user_code[DemandNames.max_price_col]),
        )
        reserve_price = None
    elif arg_user_code[DemandNames.reserve_price_col] is not None:
        elastic_demand = None
        reserve_price = ReservePrice(level=arg_user_code[DemandNames.reserve_price_col])
    else:
        elastic_demand = None
        reserve_price = None
    demand = Demand(
        node=row[indices[DemandNames.node_col]],
        capacity=MaxFlowVolume(
            level=arg_user_code[DemandNames.capacity_col],
            profile=arg_user_code[DemandNames.capacity_profile_col],
        ),
        reserve_price=reserve_price,
        elastic_demand=elastic_demand,
        temperature_profile=arg_user_code[DemandNames.temperature_profile_col],
    )
    DemandNames._add_meta(demand, row, indices, meta_columns)

    return {row[indices[DemandNames.id_col]]: demand}
get_attribute_data_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for attribute data in the Demand.Consumers file.

Returns:

Name Type Description
DemandSchema DataFrameModel

Pandera DataFrameModel schema for Demand attribute data.

Source code in framdata/database_names/DemandNames.py
131
132
133
134
135
136
137
138
139
140
@staticmethod
def get_attribute_data_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for attribute data in the Demand.Consumers file.

    Returns:
        DemandSchema (pa.DataFrameModel): Pandera DataFrameModel schema for Demand attribute data.

    """
    return DemandSchema
get_metadata_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for metadata in the Demand.Consumers file.

Returns:

Name Type Description
DemandMetadataSchema DataFrameModel

Pandera DataFrameModel schema for Demand metadata.

Source code in framdata/database_names/DemandNames.py
142
143
144
145
146
147
148
149
150
151
@staticmethod
def get_metadata_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for metadata in the Demand.Consumers file.

    Returns:
        DemandMetadataSchema (pa.DataFrameModel): Pandera DataFrameModel schema for Demand metadata.

    """
    return DemandMetadataSchema
DemandSchema

Bases: DataFrameModel

Pandera DataFrameModel schema for attribute data in the Demand.Consumers file.

Source code in framdata/database_names/DemandNames.py
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
class DemandSchema(pa.DataFrameModel):
    """Pandera DataFrameModel schema for attribute data in the Demand.Consumers file."""

    ConsumerID: Series[str] = pa.Field(unique=True, nullable=False)
    PowerNode: Series[str] = pa.Field(nullable=False)
    ReservePrice: Series[Any] = pa.Field(nullable=True)
    PriceElasticity: Series[Any] = pa.Field(nullable=True)
    MinPriceLimit: Series[Any] = pa.Field(nullable=True)
    MaxPriceLimit: Series[Any] = pa.Field(nullable=True)
    NormalPrice: Series[Any] = pa.Field(nullable=True)
    CapacityProfile: Series[Any] = pa.Field(nullable=True)
    TemperatureProfile: Series[Any] = pa.Field(nullable=True)
    Capacity: Series[Any] = pa.Field(nullable=False)

    @pa.check(DemandNames.capacity_col)
    @classmethod
    def dtype_str_int_float(cls, series: Series[Any]) -> Series[bool]:
        """Check if values in the series are of datatype: str, int or float."""
        return dtype_str_int_float(series)

    @pa.check(
        DemandNames.reserve_price_col,
        DemandNames.price_elasticity_col,
        DemandNames.min_price_col,
        DemandNames.max_price_col,
        DemandNames.normal_price_col,
        DemandNames.capacity_profile_col,
        DemandNames.temperature_profile_col,
    )
    @classmethod
    def dtype_str_int_float_none(cls, series: Series[Any]) -> Series[bool]:
        """Check if values in the series are of datatype: str, int, float or None."""
        return dtype_str_int_float_none(series)

    @pa.check(DemandNames.price_elasticity_col)
    @classmethod
    def numeric_values_less_than_or_equal_to_0(cls, series: Series[Any]) -> Series[bool]:
        """Check if numeric values in the series are less than or equal to zero."""
        return numeric_values_less_than_or_equal_to(series, 0)

    @pa.check(
        DemandNames.reserve_price_col,
        DemandNames.min_price_col,
        DemandNames.max_price_col,
        DemandNames.normal_price_col,
        DemandNames.capacity_col,
    )
    @classmethod
    def numeric_values_greater_than_or_equal_to_0(cls, series: Series[Any]) -> Series[bool]:
        """Check if numeric values in the series are greater than or equal to zero."""
        return numeric_values_greater_than_or_equal_to(series, 0)

    @pa.check(DemandNames.capacity_profile_col)
    @classmethod
    def numeric_values_are_between_or_equal_to_0_and_1(cls, series: Series[Any]) -> Series[bool]:
        """Check if numeric values in the series are between zero and one or equal to zero and one."""
        return numeric_values_are_between_or_equal_to(series, 0, 1)

    @pa.dataframe_check
    @classmethod
    def check_elastic_demand(cls, df: DataFrame) -> Series[bool]:
        """Check that all elastic demand values are present if one or more is."""
        elastic_demand = df[
            [
                DemandNames.price_elasticity_col,
                DemandNames.min_price_col,
                DemandNames.max_price_col,
                DemandNames.normal_price_col,
            ]
        ]

        check = elastic_demand.apply(
            lambda row: all(value is not None for value in row) if any(value is not None for value in row) else True,
            axis=1,
        ).tolist()
        return pd.Series(check)

    class Config:
        """Schema-wide configuration for the DemandSchema class."""

        unique_column_names = True
Config

Schema-wide configuration for the DemandSchema class.

Source code in framdata/database_names/DemandNames.py
296
297
298
299
class Config:
    """Schema-wide configuration for the DemandSchema class."""

    unique_column_names = True
check_elastic_demand(df: DataFrame) -> Series[bool] classmethod

Check that all elastic demand values are present if one or more is.

Source code in framdata/database_names/DemandNames.py
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
@pa.dataframe_check
@classmethod
def check_elastic_demand(cls, df: DataFrame) -> Series[bool]:
    """Check that all elastic demand values are present if one or more is."""
    elastic_demand = df[
        [
            DemandNames.price_elasticity_col,
            DemandNames.min_price_col,
            DemandNames.max_price_col,
            DemandNames.normal_price_col,
        ]
    ]

    check = elastic_demand.apply(
        lambda row: all(value is not None for value in row) if any(value is not None for value in row) else True,
        axis=1,
    ).tolist()
    return pd.Series(check)
dtype_str_int_float(series: Series[Any]) -> Series[bool] classmethod

Check if values in the series are of datatype: str, int or float.

Source code in framdata/database_names/DemandNames.py
233
234
235
236
237
@pa.check(DemandNames.capacity_col)
@classmethod
def dtype_str_int_float(cls, series: Series[Any]) -> Series[bool]:
    """Check if values in the series are of datatype: str, int or float."""
    return dtype_str_int_float(series)
dtype_str_int_float_none(series: Series[Any]) -> Series[bool] classmethod

Check if values in the series are of datatype: str, int, float or None.

Source code in framdata/database_names/DemandNames.py
239
240
241
242
243
244
245
246
247
248
249
250
251
@pa.check(
    DemandNames.reserve_price_col,
    DemandNames.price_elasticity_col,
    DemandNames.min_price_col,
    DemandNames.max_price_col,
    DemandNames.normal_price_col,
    DemandNames.capacity_profile_col,
    DemandNames.temperature_profile_col,
)
@classmethod
def dtype_str_int_float_none(cls, series: Series[Any]) -> Series[bool]:
    """Check if values in the series are of datatype: str, int, float or None."""
    return dtype_str_int_float_none(series)
numeric_values_are_between_or_equal_to_0_and_1(series: Series[Any]) -> Series[bool] classmethod

Check if numeric values in the series are between zero and one or equal to zero and one.

Source code in framdata/database_names/DemandNames.py
271
272
273
274
275
@pa.check(DemandNames.capacity_profile_col)
@classmethod
def numeric_values_are_between_or_equal_to_0_and_1(cls, series: Series[Any]) -> Series[bool]:
    """Check if numeric values in the series are between zero and one or equal to zero and one."""
    return numeric_values_are_between_or_equal_to(series, 0, 1)
numeric_values_greater_than_or_equal_to_0(series: Series[Any]) -> Series[bool] classmethod

Check if numeric values in the series are greater than or equal to zero.

Source code in framdata/database_names/DemandNames.py
259
260
261
262
263
264
265
266
267
268
269
@pa.check(
    DemandNames.reserve_price_col,
    DemandNames.min_price_col,
    DemandNames.max_price_col,
    DemandNames.normal_price_col,
    DemandNames.capacity_col,
)
@classmethod
def numeric_values_greater_than_or_equal_to_0(cls, series: Series[Any]) -> Series[bool]:
    """Check if numeric values in the series are greater than or equal to zero."""
    return numeric_values_greater_than_or_equal_to(series, 0)
numeric_values_less_than_or_equal_to_0(series: Series[Any]) -> Series[bool] classmethod

Check if numeric values in the series are less than or equal to zero.

Source code in framdata/database_names/DemandNames.py
253
254
255
256
257
@pa.check(DemandNames.price_elasticity_col)
@classmethod
def numeric_values_less_than_or_equal_to_0(cls, series: Series[Any]) -> Series[bool]:
    """Check if numeric values in the series are less than or equal to zero."""
    return numeric_values_less_than_or_equal_to(series, 0)

H5Names

Define names and fields used in H5 files.

H5Names

Container class for names used in H5 files.

Source code in framdata/database_names/H5Names.py
 4
 5
 6
 7
 8
 9
10
class H5Names:
    """Container class for names used in H5 files."""

    INDEX_GROUP = "index"
    METADATA_GROUP = "metadata"
    VECTORS_GROUP = "vectors"
    COMMON_PREFIX = "common_"

HydroBypassNames

Contain the BypassNames class and related Pandera schemas for handling hydropower bypass data.

Includes attribute and metadata schemas.

HydroBypassMetadataSchema

Bases: _AttributeMetadataSchema

Pandera DataFrameModel schema for metadata in the Hydropower.Bypass file.

Source code in framdata/database_names/HydroBypassNames.py
145
146
147
148
class HydroBypassMetadataSchema(_AttributeMetadataSchema):
    """Pandera DataFrameModel schema for metadata in the Hydropower.Bypass file."""

    pass
HydroBypassNames

Bases: _BaseComponentsNames

Define naming conventions and attribute object creation for HydroBypass object, which is an attribute of the HydroModule.

Provides methods for creating generator components, retrieving Pandera schemas for attribute and metadata tables, and formatting validation errors specific to generator schemas.

Source code in framdata/database_names/HydroBypassNames.py
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
class HydroBypassNames(_BaseComponentsNames):
    """
    Define naming conventions and attribute object creation for HydroBypass object, which is an attribute of the HydroModule.

    Provides methods for creating generator components, retrieving Pandera schemas for attribute and metadata tables,
    and formatting validation errors specific to generator schemas.

    """

    id_col = "BypassID"
    to_col = "BypassTo"
    cap_col = "Capacity"
    min_bnd_col = "MinOperationalBypass"
    min_penalty_col = "MinViolationPenalty"

    columns: ClassVar[list[str]] = [id_col, to_col, cap_col, min_bnd_col, min_penalty_col]

    ref_columns: ClassVar[list[str]] = [to_col, cap_col, min_bnd_col, min_penalty_col]

    @staticmethod
    def create_component(
        row: NDArray,
        indices: dict[str, int],
        meta_columns: set[str],
        meta_data: pd.DataFrame,
        attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
    ) -> dict[str, HydroBypass]:
        """
        Create a HydroBypass object.

        Args:
            row (NDArray): Array containing the values of one table row, represeting one HydroModule object.
            indices (list[str, int]): Mapping of table's Column names to the array's indices.
            meta_columns (set[str]): Set of columns used to tag object with memberships.
            meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
            attribute_objects (dict[str, tuple[object, dict[str, Meta]]], optional): NOT USED, currently only used in HydroModulesNames.

        Returns:
            dict[str, HydroBypass]: A dictionary with the bypass ID as key and the module unit as value.

        """
        columns_to_parse = [
            HydroBypassNames.id_col,
            HydroBypassNames.to_col,
            HydroBypassNames.cap_col,
            HydroBypassNames.min_bnd_col,
            HydroBypassNames.min_penalty_col,
        ]

        arg_user_code = HydroBypassNames._parse_args(row, indices, columns_to_parse, meta_data)

        bypass = HydroBypass(
            to_module=row[indices[HydroBypassNames.to_col]],
            # capacity=SoftFlowCapacity(
            #     level_input=arg_user_code[BypassNames.cap_col],
            #     min_profile_input=arg_user_code[BypassNames.min_bnd_col],
            #     min_penalty=arg_user_code[BypassNames.min_penalty_col],
            # ),
            capacity=MaxFlowVolume(level=arg_user_code[HydroBypassNames.cap_col]),
        )

        meta = {}
        HydroBypassNames._add_meta(meta, row, indices, meta_columns)

        return {row[indices[HydroBypassNames.id_col]]: (bypass, meta)}

    @staticmethod
    def get_attribute_data_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for attribute data in the Hydropower.Bypass file.

        Returns:
            pa.DataFrameModel: Pandera DataFrameModel schema for the Bypass attribute data.

        """
        return HydroBypassSchema

    @staticmethod
    def get_metadata_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for the metadata table in the Hydropower.Bypass file.

        Returns:
            pa.DataFrameModel: Pandera DataFrameModel schema for the Bypass metadata.

        """
        return HydroBypassMetadataSchema

    @staticmethod
    def _get_unique_check_descriptions() -> dict[str, tuple[str, bool]]:
        """
        Retrieve a dictionary with descriptons of validation checks that are specific to the Bypass schemas.

        Returns:
            dict[str, tuple[str, bool]]: A dictionary where:
                - Keys (str): The name of the validation check method.
                - Values (tuple[str, bool]):
                    - The first element (str) provides a concise and user-friendly description of the check. E.g. what
                      caused the validation error or what is required for the check to pass.
                    - The second element (bool) indicates whether the check is a warning (True) or an error (False).


        """
        return None

    @staticmethod
    def _format_unique_checks(errors: pd.DataFrame) -> pd.DataFrame:
        """
        Format the error DataFrame according to the validation checks that are specific to the Bypass schemas.

        Args:
            errors (pd.DataFrame): The error DataFrame containing validation errors.

        Returns:
            pd.DataFrame: The updated error DataFrame with formatted rows for unique validation checks.

        """
        return None
create_component(row: NDArray, indices: dict[str, int], meta_columns: set[str], meta_data: pd.DataFrame, attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None) -> dict[str, HydroBypass] staticmethod

Create a HydroBypass object.

Parameters:

Name Type Description Default
row NDArray

Array containing the values of one table row, represeting one HydroModule object.

required
indices list[str, int]

Mapping of table's Column names to the array's indices.

required
meta_columns set[str]

Set of columns used to tag object with memberships.

required
meta_data DataFrame

Dictionary containing at least unit of every column.

required
attribute_objects dict[str, tuple[object, dict[str, Meta]]]

NOT USED, currently only used in HydroModulesNames.

None

Returns:

Type Description
dict[str, HydroBypass]

dict[str, HydroBypass]: A dictionary with the bypass ID as key and the module unit as value.

Source code in framdata/database_names/HydroBypassNames.py
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
@staticmethod
def create_component(
    row: NDArray,
    indices: dict[str, int],
    meta_columns: set[str],
    meta_data: pd.DataFrame,
    attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
) -> dict[str, HydroBypass]:
    """
    Create a HydroBypass object.

    Args:
        row (NDArray): Array containing the values of one table row, represeting one HydroModule object.
        indices (list[str, int]): Mapping of table's Column names to the array's indices.
        meta_columns (set[str]): Set of columns used to tag object with memberships.
        meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
        attribute_objects (dict[str, tuple[object, dict[str, Meta]]], optional): NOT USED, currently only used in HydroModulesNames.

    Returns:
        dict[str, HydroBypass]: A dictionary with the bypass ID as key and the module unit as value.

    """
    columns_to_parse = [
        HydroBypassNames.id_col,
        HydroBypassNames.to_col,
        HydroBypassNames.cap_col,
        HydroBypassNames.min_bnd_col,
        HydroBypassNames.min_penalty_col,
    ]

    arg_user_code = HydroBypassNames._parse_args(row, indices, columns_to_parse, meta_data)

    bypass = HydroBypass(
        to_module=row[indices[HydroBypassNames.to_col]],
        # capacity=SoftFlowCapacity(
        #     level_input=arg_user_code[BypassNames.cap_col],
        #     min_profile_input=arg_user_code[BypassNames.min_bnd_col],
        #     min_penalty=arg_user_code[BypassNames.min_penalty_col],
        # ),
        capacity=MaxFlowVolume(level=arg_user_code[HydroBypassNames.cap_col]),
    )

    meta = {}
    HydroBypassNames._add_meta(meta, row, indices, meta_columns)

    return {row[indices[HydroBypassNames.id_col]]: (bypass, meta)}
get_attribute_data_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for attribute data in the Hydropower.Bypass file.

Returns:

Type Description
DataFrameModel

pa.DataFrameModel: Pandera DataFrameModel schema for the Bypass attribute data.

Source code in framdata/database_names/HydroBypassNames.py
85
86
87
88
89
90
91
92
93
94
@staticmethod
def get_attribute_data_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for attribute data in the Hydropower.Bypass file.

    Returns:
        pa.DataFrameModel: Pandera DataFrameModel schema for the Bypass attribute data.

    """
    return HydroBypassSchema
get_metadata_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for the metadata table in the Hydropower.Bypass file.

Returns:

Type Description
DataFrameModel

pa.DataFrameModel: Pandera DataFrameModel schema for the Bypass metadata.

Source code in framdata/database_names/HydroBypassNames.py
 96
 97
 98
 99
100
101
102
103
104
105
@staticmethod
def get_metadata_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for the metadata table in the Hydropower.Bypass file.

    Returns:
        pa.DataFrameModel: Pandera DataFrameModel schema for the Bypass metadata.

    """
    return HydroBypassMetadataSchema
HydroBypassSchema

Bases: DataFrameModel

Pandera DataFrameModel schema for attribute data in the Hydropower.Bypass file.

Source code in framdata/database_names/HydroBypassNames.py
139
140
141
142
class HydroBypassSchema(pa.DataFrameModel):
    """Pandera DataFrameModel schema for attribute data in the Hydropower.Bypass file."""

    pass

HydroGeneratorNames

Define the GeneratorNames class and related Pandera schemas for hydropower generator data.

Provides: - GeneratorNames: class for handling generator component names and schema validation. - GeneratorSchema: Pandera schema for generator attribute data. - GeneratorMetadataSchema: Pandera schema for generator metadata.

GeneratorMetadataSchema

Bases: _AttributeMetadataSchema

Pandera DataFrameModel schema for metadata in the Hydropower.Generators file.

Source code in framdata/database_names/HydroGeneratorNames.py
158
159
160
161
class GeneratorMetadataSchema(_AttributeMetadataSchema):
    """Pandera DataFrameModel schema for metadata in the Hydropower.Generators file."""

    pass
GeneratorSchema

Bases: DataFrameModel

Pandera DataFrameModel schema for attribute data in the Hydropower.Generators file.

Source code in framdata/database_names/HydroGeneratorNames.py
152
153
154
155
class GeneratorSchema(pa.DataFrameModel):
    """Pandera DataFrameModel schema for attribute data in the Hydropower.Generators file."""

    pass
HydroGeneratorNames

Bases: _BaseComponentsNames

Handles generator component names and schema validation for hydropower generator data.

Provides methods for creating generator components, retrieving Pandera schemas for attribute and metadata tables, and formatting validation errors specific to generator schemas.

Source code in framdata/database_names/HydroGeneratorNames.py
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
class HydroGeneratorNames(_BaseComponentsNames):
    """
    Handles generator component names and schema validation for hydropower generator data.

    Provides methods for creating generator components, retrieving Pandera schemas for attribute and metadata tables,
    and formatting validation errors specific to generator schemas.
    """

    id_col = "GeneratorID"
    node_col = "PowerNode"
    pq_curve_col = "PQCurve"
    tailw_elev_col = "TailwaterElevation"
    head_nom_col = "NominalHead"
    en_eq_col = "EnergyEq"

    columns: ClassVar[list[str]] = [
        id_col,
        node_col,
        pq_curve_col,
        tailw_elev_col,
        head_nom_col,
        en_eq_col,
    ]

    ref_columns: ClassVar[list[str]] = [
        node_col,
        pq_curve_col,
        tailw_elev_col,
        head_nom_col,
        en_eq_col,
    ]

    @staticmethod
    def create_component(
        row: NDArray,
        indices: dict[str, int],
        meta_columns: set[str],
        meta_data: pd.DataFrame,
        attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
    ) -> dict[str, tuple[HydroGenerator, dict[str, Meta]]]:
        """
        Create a hydro generator attribute object.

        Args:
            row (NDArray): Array containing the values of one table row, represeting one HydroModule object.
            indices (list[str, int]): Mapping of table's Column names to the array's indices.
            meta_columns (set[str]): Set of columns used to tag object with memberships.
            meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
            attribute_objects (dict[str, tuple[object, dict[str, Meta]]], optional): NOT USED, currently only used in HydroModulesNames.

        Returns:
            dict[str, dict[str, Meta]]: A dictionary with the generator ID as key and the attribute object and metadata as value.

        """
        columns_to_parse = [
            HydroGeneratorNames.pq_curve_col,
            HydroGeneratorNames.tailw_elev_col,
            HydroGeneratorNames.head_nom_col,
            HydroGeneratorNames.en_eq_col,
        ]

        arg_user_code = HydroGeneratorNames._parse_args(row, indices, columns_to_parse, meta_data)

        generator = HydroGenerator(
            power_node=row[indices[HydroGeneratorNames.node_col]],
            energy_eq=Conversion(level=arg_user_code[HydroGeneratorNames.en_eq_col]),
            pq_curve=arg_user_code[HydroGeneratorNames.pq_curve_col],
            tailwater_elevation=arg_user_code[HydroGeneratorNames.tailw_elev_col],
            nominal_head=arg_user_code[HydroGeneratorNames.head_nom_col],
        )

        meta = {}
        HydroGeneratorNames._add_meta(meta, row, indices, meta_columns)

        return {row[indices[HydroGeneratorNames.id_col]]: (generator, meta)}

    @staticmethod
    def get_attribute_data_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for attribute data in the Hydropower.Generators file.

        Returns:
            pa.DataFrameModel: Pandera DataFrameModel schema for the Generator attribute data.

        """
        return GeneratorSchema

    @staticmethod
    def get_metadata_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for the metadata table in the Hydropower.Generators file.

        Returns:
            pa.DataFrameModel: Pandera DataFrameModel schema for the Generator metadata.

        """
        return GeneratorMetadataSchema

    @staticmethod
    def _get_unique_check_descriptions() -> dict[str, tuple[str, bool]]:
        """
        Retrieve a dictionary with descriptons of validation checks that are specific to the Generator schemas.

        Returns:
            dict[str, tuple[str, bool]]: A dictionary where:
                - Keys (str): The name of the validation check method.
                - Values (tuple[str, bool]):
                    - The first element (str) provides a concise and user-friendly description of the check. E.g. what
                      caused the validation error or what is required for the check to pass.
                    - The second element (bool) indicates whether the check is a warning (True) or an error (False).


        """
        return None

    @staticmethod
    def _format_unique_checks(errors: pd.DataFrame) -> pd.DataFrame:
        """
        Format the error DataFrame according to the validation checks that are specific to the Generator schemas.

        Args:
            errors (pd.DataFrame): The error DataFrame containing validation errors.

        Returns:
            pd.DataFrame: The updated error DataFrame with formatted rows for unique validation checks.

        """
        return None
create_component(row: NDArray, indices: dict[str, int], meta_columns: set[str], meta_data: pd.DataFrame, attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None) -> dict[str, tuple[HydroGenerator, dict[str, Meta]]] staticmethod

Create a hydro generator attribute object.

Parameters:

Name Type Description Default
row NDArray

Array containing the values of one table row, represeting one HydroModule object.

required
indices list[str, int]

Mapping of table's Column names to the array's indices.

required
meta_columns set[str]

Set of columns used to tag object with memberships.

required
meta_data DataFrame

Dictionary containing at least unit of every column.

required
attribute_objects dict[str, tuple[object, dict[str, Meta]]]

NOT USED, currently only used in HydroModulesNames.

None

Returns:

Type Description
dict[str, tuple[HydroGenerator, dict[str, Meta]]]

dict[str, dict[str, Meta]]: A dictionary with the generator ID as key and the attribute object and metadata as value.

Source code in framdata/database_names/HydroGeneratorNames.py
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
@staticmethod
def create_component(
    row: NDArray,
    indices: dict[str, int],
    meta_columns: set[str],
    meta_data: pd.DataFrame,
    attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
) -> dict[str, tuple[HydroGenerator, dict[str, Meta]]]:
    """
    Create a hydro generator attribute object.

    Args:
        row (NDArray): Array containing the values of one table row, represeting one HydroModule object.
        indices (list[str, int]): Mapping of table's Column names to the array's indices.
        meta_columns (set[str]): Set of columns used to tag object with memberships.
        meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
        attribute_objects (dict[str, tuple[object, dict[str, Meta]]], optional): NOT USED, currently only used in HydroModulesNames.

    Returns:
        dict[str, dict[str, Meta]]: A dictionary with the generator ID as key and the attribute object and metadata as value.

    """
    columns_to_parse = [
        HydroGeneratorNames.pq_curve_col,
        HydroGeneratorNames.tailw_elev_col,
        HydroGeneratorNames.head_nom_col,
        HydroGeneratorNames.en_eq_col,
    ]

    arg_user_code = HydroGeneratorNames._parse_args(row, indices, columns_to_parse, meta_data)

    generator = HydroGenerator(
        power_node=row[indices[HydroGeneratorNames.node_col]],
        energy_eq=Conversion(level=arg_user_code[HydroGeneratorNames.en_eq_col]),
        pq_curve=arg_user_code[HydroGeneratorNames.pq_curve_col],
        tailwater_elevation=arg_user_code[HydroGeneratorNames.tailw_elev_col],
        nominal_head=arg_user_code[HydroGeneratorNames.head_nom_col],
    )

    meta = {}
    HydroGeneratorNames._add_meta(meta, row, indices, meta_columns)

    return {row[indices[HydroGeneratorNames.id_col]]: (generator, meta)}
get_attribute_data_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for attribute data in the Hydropower.Generators file.

Returns:

Type Description
DataFrameModel

pa.DataFrameModel: Pandera DataFrameModel schema for the Generator attribute data.

Source code in framdata/database_names/HydroGeneratorNames.py
 98
 99
100
101
102
103
104
105
106
107
@staticmethod
def get_attribute_data_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for attribute data in the Hydropower.Generators file.

    Returns:
        pa.DataFrameModel: Pandera DataFrameModel schema for the Generator attribute data.

    """
    return GeneratorSchema
get_metadata_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for the metadata table in the Hydropower.Generators file.

Returns:

Type Description
DataFrameModel

pa.DataFrameModel: Pandera DataFrameModel schema for the Generator metadata.

Source code in framdata/database_names/HydroGeneratorNames.py
109
110
111
112
113
114
115
116
117
118
@staticmethod
def get_metadata_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for the metadata table in the Hydropower.Generators file.

    Returns:
        pa.DataFrameModel: Pandera DataFrameModel schema for the Generator metadata.

    """
    return GeneratorMetadataSchema

HydroInflowNames

Define the InflowNames class and related Pandera schemas for handling hydropower inflow data.

Includes attribute and metadata schemas.

HydroInflowNames

Bases: _BaseComponentsNames

Convert hydropower inflow data to attribute objects for HydroModules. Handle attribute and metadata schema validation.

Source code in framdata/database_names/HydroInflowNames.py
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
class HydroInflowNames(_BaseComponentsNames):
    """Convert hydropower inflow data to attribute objects for HydroModules. Handle attribute and metadata schema validation."""

    id_col = "InflowID"
    yr_vol_col = "YearlyVolume"
    profile_col = "InflowProfileID"

    columns: ClassVar[list[str]] = [
        id_col,
        yr_vol_col,
        profile_col,
    ]

    ref_columns: ClassVar[list[str]] = [
        yr_vol_col,
        profile_col,
    ]

    @staticmethod
    def create_component(
        row: NDArray,
        indices: dict[str, int],
        meta_columns: set[str],
        meta_data: pd.DataFrame,
        attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
    ) -> dict[str, AvgFlowVolume]:
        """
        Create a hydro inflow component.

        Args:
            row (NDArray): Array containing the values of one table row, represeting one HydroModule object.
            indices (list[str, int]): Mapping of table's Column names to the array's indices.
            meta_columns (set[str]): Set of columns used to tag object with memberships.
            meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
            attribute_objects (dict[str, tuple[object, dict[str, Meta]]], optional): NOT USED, currently only used in HydroModulesNames.

        Returns:
            dict[str, Component]: A dictionary with the inflow ID as key and the module unit as value.

        """
        if HydroInflowNames._ref_period_lacks_profiles(row, indices, [HydroInflowNames.profile_col], meta_data):
            return {row[indices[HydroInflowNames.id_col]]: None}
        columns_to_parse = [
            HydroInflowNames.yr_vol_col,
            HydroInflowNames.profile_col,
        ]

        arg_user_code = HydroInflowNames._parse_args(row, indices, columns_to_parse, meta_data)

        inflow = AvgFlowVolume(
            level=arg_user_code[HydroInflowNames.yr_vol_col],
            profile=arg_user_code[HydroInflowNames.profile_col],
        )

        meta = {}
        HydroInflowNames._add_meta(meta, row, indices, meta_columns)

        return {row[indices[HydroInflowNames.id_col]]: (inflow, meta)}

    @staticmethod
    def get_attribute_data_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for attribute data in the Hydropower.Inflow file.

        Returns:
            pa.DataFrameModel: Pandera DataFrameModel schema for the Inflow attribute data.

        """
        return InflowSchema

    @staticmethod
    def get_metadata_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for the metadata table in the Hydropower.Inflow file.

        Returns:
            pa.DataFrameModel: Pandera DataFrameModel schema for the Inflow metadata.

        """
        return InflowMetadataSchema

    @staticmethod
    def _get_unique_check_descriptions() -> dict[str, tuple[str, bool]]:
        """
        Retrieve a dictionary with descriptons of validation checks that are specific to the Inflow schemas.

        Returns:
            dict[str, tuple[str, bool]]: A dictionary where:
                - Keys (str): The name of the validation check method.
                - Values (tuple[str, bool]):
                    - The first element (str) provides a concise and user-friendly description of the check. E.g. what
                      caused the validation error or what is required for the check to pass.
                    - The second element (bool) indicates whether the check is a warning (True) or an error (False).


        """
        return None

    @staticmethod
    def _format_unique_checks(errors: pd.DataFrame) -> pd.DataFrame:
        """
        Format the error DataFrame according to the validation checks that are specific to the Inflow schemas.

        Args:
            errors (pd.DataFrame): The error DataFrame containing validation errors.

        Returns:
            pd.DataFrame: The updated error DataFrame with formatted rows for unique validation checks.

        """
        return None
create_component(row: NDArray, indices: dict[str, int], meta_columns: set[str], meta_data: pd.DataFrame, attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None) -> dict[str, AvgFlowVolume] staticmethod

Create a hydro inflow component.

Parameters:

Name Type Description Default
row NDArray

Array containing the values of one table row, represeting one HydroModule object.

required
indices list[str, int]

Mapping of table's Column names to the array's indices.

required
meta_columns set[str]

Set of columns used to tag object with memberships.

required
meta_data DataFrame

Dictionary containing at least unit of every column.

required
attribute_objects dict[str, tuple[object, dict[str, Meta]]]

NOT USED, currently only used in HydroModulesNames.

None

Returns:

Type Description
dict[str, AvgFlowVolume]

dict[str, Component]: A dictionary with the inflow ID as key and the module unit as value.

Source code in framdata/database_names/HydroInflowNames.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
@staticmethod
def create_component(
    row: NDArray,
    indices: dict[str, int],
    meta_columns: set[str],
    meta_data: pd.DataFrame,
    attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
) -> dict[str, AvgFlowVolume]:
    """
    Create a hydro inflow component.

    Args:
        row (NDArray): Array containing the values of one table row, represeting one HydroModule object.
        indices (list[str, int]): Mapping of table's Column names to the array's indices.
        meta_columns (set[str]): Set of columns used to tag object with memberships.
        meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
        attribute_objects (dict[str, tuple[object, dict[str, Meta]]], optional): NOT USED, currently only used in HydroModulesNames.

    Returns:
        dict[str, Component]: A dictionary with the inflow ID as key and the module unit as value.

    """
    if HydroInflowNames._ref_period_lacks_profiles(row, indices, [HydroInflowNames.profile_col], meta_data):
        return {row[indices[HydroInflowNames.id_col]]: None}
    columns_to_parse = [
        HydroInflowNames.yr_vol_col,
        HydroInflowNames.profile_col,
    ]

    arg_user_code = HydroInflowNames._parse_args(row, indices, columns_to_parse, meta_data)

    inflow = AvgFlowVolume(
        level=arg_user_code[HydroInflowNames.yr_vol_col],
        profile=arg_user_code[HydroInflowNames.profile_col],
    )

    meta = {}
    HydroInflowNames._add_meta(meta, row, indices, meta_columns)

    return {row[indices[HydroInflowNames.id_col]]: (inflow, meta)}
get_attribute_data_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for attribute data in the Hydropower.Inflow file.

Returns:

Type Description
DataFrameModel

pa.DataFrameModel: Pandera DataFrameModel schema for the Inflow attribute data.

Source code in framdata/database_names/HydroInflowNames.py
78
79
80
81
82
83
84
85
86
87
@staticmethod
def get_attribute_data_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for attribute data in the Hydropower.Inflow file.

    Returns:
        pa.DataFrameModel: Pandera DataFrameModel schema for the Inflow attribute data.

    """
    return InflowSchema
get_metadata_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for the metadata table in the Hydropower.Inflow file.

Returns:

Type Description
DataFrameModel

pa.DataFrameModel: Pandera DataFrameModel schema for the Inflow metadata.

Source code in framdata/database_names/HydroInflowNames.py
89
90
91
92
93
94
95
96
97
98
@staticmethod
def get_metadata_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for the metadata table in the Hydropower.Inflow file.

    Returns:
        pa.DataFrameModel: Pandera DataFrameModel schema for the Inflow metadata.

    """
    return InflowMetadataSchema
InflowMetadataSchema

Bases: _AttributeMetadataSchema

Pandera DataFrameModel schema for metadata in the Hydropower.Inflow file.

Source code in framdata/database_names/HydroInflowNames.py
138
139
140
141
class InflowMetadataSchema(_AttributeMetadataSchema):
    """Pandera DataFrameModel schema for metadata in the Hydropower.Inflow file."""

    pass
InflowSchema

Bases: DataFrameModel

Pandera DataFrameModel schema for attribute data in the Hydropower.Inflow file.

Source code in framdata/database_names/HydroInflowNames.py
132
133
134
135
class InflowSchema(pa.DataFrameModel):
    """Pandera DataFrameModel schema for attribute data in the Hydropower.Inflow file."""

    pass

HydroModulesNames

Defines schema, names, and component creation logic for hydropower modules.

This module provides: - HydroModulesNames: class for column names and component creation for hydropower modules. - HydroModuleSchema: Pandera schema for attribute data. - HydroModuleMetadataSchema: Pandera schema for metadata.

HydroModuleMetadataSchema

Bases: _AttributeMetadataSchema

Pandera DataFrameModel schema for metadata in the Hydropower.Modules file.

Source code in framdata/database_names/HydroModulesNames.py
239
240
241
242
class HydroModuleMetadataSchema(_AttributeMetadataSchema):
    """Pandera DataFrameModel schema for metadata in the Hydropower.Modules file."""

    pass
HydroModuleSchema

Bases: DataFrameModel

Pandera DataFrameModel schema for attribute data in the Hydropower.Modules file.

Source code in framdata/database_names/HydroModulesNames.py
233
234
235
236
class HydroModuleSchema(pa.DataFrameModel):
    """Pandera DataFrameModel schema for attribute data in the Hydropower.Modules file."""

    pass
HydroModulesNames

Bases: _BaseComponentsNames

Provides column names, schema accessors, and component creation logic for hydropower modules.

This class defines constants for column names, methods for creating HydroModule components from data rows, and accessors for Pandera schemas used for validation of attribute and metadata tables.

Source code in framdata/database_names/HydroModulesNames.py
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
class HydroModulesNames(_BaseComponentsNames):
    """
    Provides column names, schema accessors, and component creation logic for hydropower modules.

    This class defines constants for column names, methods for creating HydroModule components from data rows,
    and accessors for Pandera schemas used for validation of attribute and metadata tables.
    """

    filename = "Hydropower.Modules"

    id_col = "ModuleID"
    pump_col = "PumpID"
    gen_col = "GeneratorID"
    res_col = "ReservoirID"
    byp_col = "BypassID"
    hyd_code_col = "HydraulicCoupling"
    inflow_col = "InflowID"
    rel_to_col = "ReleaseTo"
    spill_to_col = "SpillTo"
    rel_cap_col = "CapacityRelease"
    min_bnd_col = "MinOperationalRelease"
    max_bnd_col = "MaxOperationalRelease"
    min_penalty_col = "MinViolationPenalty"
    max_penalty_col = "MaxViolationPenalty"

    columns: ClassVar[list[str]] = [
        id_col,
        pump_col,
        gen_col,
        res_col,
        byp_col,
        hyd_code_col,
        inflow_col,
        rel_to_col,
        spill_to_col,
        rel_cap_col,
        min_bnd_col,
        max_bnd_col,
        min_penalty_col,
        max_penalty_col,
    ]

    ref_columns: ClassVar[list[str]] = [
        rel_to_col,
        spill_to_col,
        rel_cap_col,
        min_bnd_col,
        max_bnd_col,
        min_penalty_col,
        max_penalty_col,
    ]

    @staticmethod
    def create_component(
        row: NDArray,
        indices: dict[str, int],
        meta_columns: set[str],
        meta_data: pd.DataFrame,
        attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
    ) -> dict[str, Component]:
        """
        Create a hydro module component.

        Args:
            row (NDArray): Array containing the values of one table row, represeting one HydroModule object.
            indices (list[str, int]): Mapping of table's Column names to the array's indices.
            meta_columns (set[str]): Set of columns used to tag object with memberships.
            meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
            attribute_objects (dict[str, tuple[object, dict[str, Meta]]], optional): Dictionary of attributes to link to the HydroModule.

        Returns:
            dict[str, Component]: A dictionary with the module_id as key and the module unit as value.

        """
        columns_to_parse = [
            HydroModulesNames.rel_cap_col,
            HydroModulesNames.min_bnd_col,
            HydroModulesNames.max_bnd_col,
            HydroModulesNames.min_penalty_col,
            HydroModulesNames.max_penalty_col,
        ]
        name = row[indices[HydroModulesNames.id_col]]
        inflow_name = indices[HydroModulesNames.inflow_col]
        pump_name = indices[HydroModulesNames.pump_col]
        gen_name = indices[HydroModulesNames.gen_col]
        res_name = indices[HydroModulesNames.res_col]
        byp_name = indices[HydroModulesNames.byp_col]
        arg_user_code = HydroModulesNames._parse_args(row, indices, columns_to_parse, meta_data)
        inflow, inflow_meta = HydroModulesNames._get_attribute_object(
            attribute_objects,
            row[inflow_name],
            name,
            HydroModule,
            AvgFlowVolume,
        )
        pump, pump_meta = HydroModulesNames._get_attribute_object(
            attribute_objects,
            row[pump_name],
            name,
            HydroModule,
            HydroPump,
        )
        generator, generator_meta = HydroModulesNames._get_attribute_object(
            attribute_objects,
            row[gen_name],
            name,
            HydroModule,
            HydroGenerator,
        )
        reservoir, reservoir_meta = HydroModulesNames._get_attribute_object(
            attribute_objects,
            row[res_name],
            name,
            HydroModule,
            HydroReservoir,
        )
        bypass, bypass_meta = HydroModulesNames._get_attribute_object(
            attribute_objects,
            row[byp_name],
            name,
            HydroModule,
            HydroBypass,
        )
        module = HydroModule(
            release_capacity=MaxFlowVolume(level=arg_user_code[HydroModulesNames.rel_cap_col]),
            hydraulic_coupling=row[indices[HydroModulesNames.hyd_code_col]],
            inflow=inflow,
            pump=pump,
            generator=generator,
            reservoir=reservoir,
            bypass=bypass,
            release_to=row[indices[HydroModulesNames.rel_to_col]],
            spill_to=row[indices[HydroModulesNames.spill_to_col]],
        )

        if "EnergyEqDownstream" in meta_columns:
            HydroModulesNames._add_meta(module, row, indices, ["EnergyEqDownstream"], unit="kWh/m3")

        meta_columns = [c for c in meta_columns if c != "EnergyEqDownstream"]
        HydroModulesNames._add_meta(module, row, indices, meta_columns)  # fails because Modules want floats in Meta.

        attr_meta = {
            inflow_name: inflow_meta,
            pump_name: pump_meta,
            gen_name: generator_meta,
            res_name: reservoir_meta,
            byp_name: bypass_meta,
        }
        HydroModulesNames._merge_attribute_meta(
            name,
            module,
            {k: v for k, v in attr_meta.items() if k and v},
        )

        return {name: module}

    @staticmethod
    def get_attribute_data_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for attribute data in the Hydropower.Modules file.

        Returns:
            pa.DataFrameModel: Pandera DataFrameModel schema for the HydroModule attribute data.

        """
        return HydroModuleSchema

    @staticmethod
    def get_metadata_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for the metadata table in the Hydropower.Modules file.

        Returns:
            pa.DataFrameModel: Pandera DataFrameModel schema for the HydroModule metadata.

        """
        return HydroModuleMetadataSchema

    @staticmethod
    def _get_unique_check_descriptions() -> dict[str, tuple[str, bool]]:
        """
        Retrieve a dictionary with descriptons of validation checks that are specific to the HydroModule schemas.

        Returns:
            dict[str, tuple[str, bool]]: A dictionary where:
                - Keys (str): The name of the validation check method.
                - Values (tuple[str, bool]):
                    - The first element (str) provides a concise and user-friendly description of the check. E.g. what
                      caused the validation error or what is required for the check to pass.
                    - The second element (bool) indicates whether the check is a warning (True) or an error (False).


        """
        return None

    @staticmethod
    def _format_unique_checks(errors: pd.DataFrame) -> pd.DataFrame:
        """
        Format the error DataFrame according to the validation checks that are specific to the HydroModule schemas.

        Args:
            errors (pd.DataFrame): The error DataFrame containing validation errors.

        Returns:
            pd.DataFrame: The updated error DataFrame with formatted rows for unique validation checks.

        """
        return None
create_component(row: NDArray, indices: dict[str, int], meta_columns: set[str], meta_data: pd.DataFrame, attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None) -> dict[str, Component] staticmethod

Create a hydro module component.

Parameters:

Name Type Description Default
row NDArray

Array containing the values of one table row, represeting one HydroModule object.

required
indices list[str, int]

Mapping of table's Column names to the array's indices.

required
meta_columns set[str]

Set of columns used to tag object with memberships.

required
meta_data DataFrame

Dictionary containing at least unit of every column.

required
attribute_objects dict[str, tuple[object, dict[str, Meta]]]

Dictionary of attributes to link to the HydroModule.

None

Returns:

Type Description
dict[str, Component]

dict[str, Component]: A dictionary with the module_id as key and the module unit as value.

Source code in framdata/database_names/HydroModulesNames.py
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
@staticmethod
def create_component(
    row: NDArray,
    indices: dict[str, int],
    meta_columns: set[str],
    meta_data: pd.DataFrame,
    attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
) -> dict[str, Component]:
    """
    Create a hydro module component.

    Args:
        row (NDArray): Array containing the values of one table row, represeting one HydroModule object.
        indices (list[str, int]): Mapping of table's Column names to the array's indices.
        meta_columns (set[str]): Set of columns used to tag object with memberships.
        meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
        attribute_objects (dict[str, tuple[object, dict[str, Meta]]], optional): Dictionary of attributes to link to the HydroModule.

    Returns:
        dict[str, Component]: A dictionary with the module_id as key and the module unit as value.

    """
    columns_to_parse = [
        HydroModulesNames.rel_cap_col,
        HydroModulesNames.min_bnd_col,
        HydroModulesNames.max_bnd_col,
        HydroModulesNames.min_penalty_col,
        HydroModulesNames.max_penalty_col,
    ]
    name = row[indices[HydroModulesNames.id_col]]
    inflow_name = indices[HydroModulesNames.inflow_col]
    pump_name = indices[HydroModulesNames.pump_col]
    gen_name = indices[HydroModulesNames.gen_col]
    res_name = indices[HydroModulesNames.res_col]
    byp_name = indices[HydroModulesNames.byp_col]
    arg_user_code = HydroModulesNames._parse_args(row, indices, columns_to_parse, meta_data)
    inflow, inflow_meta = HydroModulesNames._get_attribute_object(
        attribute_objects,
        row[inflow_name],
        name,
        HydroModule,
        AvgFlowVolume,
    )
    pump, pump_meta = HydroModulesNames._get_attribute_object(
        attribute_objects,
        row[pump_name],
        name,
        HydroModule,
        HydroPump,
    )
    generator, generator_meta = HydroModulesNames._get_attribute_object(
        attribute_objects,
        row[gen_name],
        name,
        HydroModule,
        HydroGenerator,
    )
    reservoir, reservoir_meta = HydroModulesNames._get_attribute_object(
        attribute_objects,
        row[res_name],
        name,
        HydroModule,
        HydroReservoir,
    )
    bypass, bypass_meta = HydroModulesNames._get_attribute_object(
        attribute_objects,
        row[byp_name],
        name,
        HydroModule,
        HydroBypass,
    )
    module = HydroModule(
        release_capacity=MaxFlowVolume(level=arg_user_code[HydroModulesNames.rel_cap_col]),
        hydraulic_coupling=row[indices[HydroModulesNames.hyd_code_col]],
        inflow=inflow,
        pump=pump,
        generator=generator,
        reservoir=reservoir,
        bypass=bypass,
        release_to=row[indices[HydroModulesNames.rel_to_col]],
        spill_to=row[indices[HydroModulesNames.spill_to_col]],
    )

    if "EnergyEqDownstream" in meta_columns:
        HydroModulesNames._add_meta(module, row, indices, ["EnergyEqDownstream"], unit="kWh/m3")

    meta_columns = [c for c in meta_columns if c != "EnergyEqDownstream"]
    HydroModulesNames._add_meta(module, row, indices, meta_columns)  # fails because Modules want floats in Meta.

    attr_meta = {
        inflow_name: inflow_meta,
        pump_name: pump_meta,
        gen_name: generator_meta,
        res_name: reservoir_meta,
        byp_name: bypass_meta,
    }
    HydroModulesNames._merge_attribute_meta(
        name,
        module,
        {k: v for k, v in attr_meta.items() if k and v},
    )

    return {name: module}
get_attribute_data_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for attribute data in the Hydropower.Modules file.

Returns:

Type Description
DataFrameModel

pa.DataFrameModel: Pandera DataFrameModel schema for the HydroModule attribute data.

Source code in framdata/database_names/HydroModulesNames.py
179
180
181
182
183
184
185
186
187
188
@staticmethod
def get_attribute_data_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for attribute data in the Hydropower.Modules file.

    Returns:
        pa.DataFrameModel: Pandera DataFrameModel schema for the HydroModule attribute data.

    """
    return HydroModuleSchema
get_metadata_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for the metadata table in the Hydropower.Modules file.

Returns:

Type Description
DataFrameModel

pa.DataFrameModel: Pandera DataFrameModel schema for the HydroModule metadata.

Source code in framdata/database_names/HydroModulesNames.py
190
191
192
193
194
195
196
197
198
199
@staticmethod
def get_metadata_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for the metadata table in the Hydropower.Modules file.

    Returns:
        pa.DataFrameModel: Pandera DataFrameModel schema for the HydroModule metadata.

    """
    return HydroModuleMetadataSchema

HydroPumpNames

Define the PumpNames class and related Pandera schemas for handling hydropower pump data.

Includes attribute and metadata validation for the Hydropower.Pumps file.

HydroPumpNames

Bases: _BaseComponentsNames

Handle naming conventions, schema definitions, and component creation for hydropower pump data.

Source code in framdata/database_names/HydroPumpNames.py
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
class HydroPumpNames(_BaseComponentsNames):
    """Handle naming conventions, schema definitions, and component creation for hydropower pump data."""

    id_col = "PumpID"
    node_col = "PowerNode"
    pump_from_col = "PumpFrom"
    pump_to_col = "PumpTo"
    power_capacity_col = "PowerCapacity"
    vol_capacity_col = "Capacity"
    energy_equiv_col = "EnergyEq"
    h_min_col = "HeadMin"
    h_max_col = "HeadMax"
    q_min_col = "QMin"
    q_max_col = "QMax"

    columns: ClassVar[list[str]] = [
        id_col,
        node_col,
        pump_from_col,
        pump_to_col,
        power_capacity_col,
        vol_capacity_col,
        energy_equiv_col,
        h_min_col,
        h_max_col,
        q_min_col,
        q_max_col,
    ]

    ref_columns: ClassVar[list[str]] = [
        node_col,
        pump_from_col,
        pump_to_col,
        power_capacity_col,
        vol_capacity_col,
        energy_equiv_col,
        h_min_col,
        h_max_col,
        q_min_col,
        q_max_col,
    ]

    @staticmethod
    def create_component(
        row: NDArray,
        indices: dict[str, int],
        meta_columns: set[str],
        meta_data: pd.DataFrame,
        attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
    ) -> dict[str, HydroPump]:
        """
        Create a HydroPump object from a row in the Hydropower.Pumps table.

        Args:
            row (NDArray): Array containing the values of one table row, represeting one HydroModule object.
            indices (list[str, int]): Mapping of table's Column names to the array's indices.
            meta_columns (set[str]): Set of columns used to tag object with memberships.
            meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
            attribute_objects (dict[str, tuple[object, dict[str, Meta]]], optional): NOT USED, currently only used in HydroModulesNames.

        Returns:
            dict[str, HydroPump]: A dictionary with the pump ID as key and the module unit as value.

        """
        columns_to_parse = [
            HydroPumpNames.power_capacity_col,
            HydroPumpNames.vol_capacity_col,
            HydroPumpNames.energy_equiv_col,
            HydroPumpNames.h_min_col,
            HydroPumpNames.h_max_col,
            HydroPumpNames.q_min_col,
            HydroPumpNames.q_max_col,
        ]

        arg_user_code = HydroPumpNames._parse_args(row, indices, columns_to_parse, meta_data)

        pump = HydroPump(
            power_node=row[indices[HydroPumpNames.node_col]],
            from_module=row[indices[HydroPumpNames.pump_from_col]],
            to_module=row[indices[HydroPumpNames.pump_to_col]],
            water_capacity=MaxFlowVolume(level=arg_user_code[HydroPumpNames.vol_capacity_col]),
            energy_eq=Conversion(level=arg_user_code[HydroPumpNames.energy_equiv_col]),
            power_capacity=MaxFlowVolume(level=arg_user_code[HydroPumpNames.power_capacity_col]),
            head_max=arg_user_code[HydroPumpNames.h_max_col],
            head_min=arg_user_code[HydroPumpNames.h_min_col],
            q_max=arg_user_code[HydroPumpNames.q_max_col],
            q_min=arg_user_code[HydroPumpNames.q_min_col],
        )

        meta = {}
        HydroPumpNames._add_meta(meta, row, indices, meta_columns)

        return {row[indices[HydroPumpNames.id_col]]: (pump, meta)}

    @staticmethod
    def get_attribute_data_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for attribute data in the Hydropower.Pumps file.

        Returns:
            pa.DataFrameModel: Pandera DataFrameModel schema for the Pump attribute data.

        """
        return PumpSchema

    @staticmethod
    def get_metadata_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for the metadata table in the Hydropower.Pumps file.

        Returns:
            pa.DataFrameModel: Pandera DataFrameModel schema for the Pump metadata.

        """
        return PumpMetadataSchema

    @staticmethod
    def _get_unique_check_descriptions() -> dict[str, tuple[str, bool]]:
        """
        Retrieve a dictionary with descriptons of validation checks that are specific to the Pump schemas.

        Returns:
            dict[str, tuple[str, bool]]: A dictionary where:
                - Keys (str): The name of the validation check method.
                - Values (tuple[str, bool]):
                    - The first element (str) provides a concise and user-friendly description of the check. E.g. what
                      caused the validation error or what is required for the check to pass.
                    - The second element (bool) indicates whether the check is a warning (True) or an error (False).


        """
        return None

    @staticmethod
    def _format_unique_checks(errors: pd.DataFrame) -> pd.DataFrame:
        """
        Format the error DataFrame according to the validation checks that are specific to the Pump schemas.

        Args:
            errors (pd.DataFrame): The error DataFrame containing validation errors.

        Returns:
            pd.DataFrame: The updated error DataFrame with formatted rows for unique validation checks.

        """
        return None
create_component(row: NDArray, indices: dict[str, int], meta_columns: set[str], meta_data: pd.DataFrame, attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None) -> dict[str, HydroPump] staticmethod

Create a HydroPump object from a row in the Hydropower.Pumps table.

Parameters:

Name Type Description Default
row NDArray

Array containing the values of one table row, represeting one HydroModule object.

required
indices list[str, int]

Mapping of table's Column names to the array's indices.

required
meta_columns set[str]

Set of columns used to tag object with memberships.

required
meta_data DataFrame

Dictionary containing at least unit of every column.

required
attribute_objects dict[str, tuple[object, dict[str, Meta]]]

NOT USED, currently only used in HydroModulesNames.

None

Returns:

Type Description
dict[str, HydroPump]

dict[str, HydroPump]: A dictionary with the pump ID as key and the module unit as value.

Source code in framdata/database_names/HydroPumpNames.py
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
@staticmethod
def create_component(
    row: NDArray,
    indices: dict[str, int],
    meta_columns: set[str],
    meta_data: pd.DataFrame,
    attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
) -> dict[str, HydroPump]:
    """
    Create a HydroPump object from a row in the Hydropower.Pumps table.

    Args:
        row (NDArray): Array containing the values of one table row, represeting one HydroModule object.
        indices (list[str, int]): Mapping of table's Column names to the array's indices.
        meta_columns (set[str]): Set of columns used to tag object with memberships.
        meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
        attribute_objects (dict[str, tuple[object, dict[str, Meta]]], optional): NOT USED, currently only used in HydroModulesNames.

    Returns:
        dict[str, HydroPump]: A dictionary with the pump ID as key and the module unit as value.

    """
    columns_to_parse = [
        HydroPumpNames.power_capacity_col,
        HydroPumpNames.vol_capacity_col,
        HydroPumpNames.energy_equiv_col,
        HydroPumpNames.h_min_col,
        HydroPumpNames.h_max_col,
        HydroPumpNames.q_min_col,
        HydroPumpNames.q_max_col,
    ]

    arg_user_code = HydroPumpNames._parse_args(row, indices, columns_to_parse, meta_data)

    pump = HydroPump(
        power_node=row[indices[HydroPumpNames.node_col]],
        from_module=row[indices[HydroPumpNames.pump_from_col]],
        to_module=row[indices[HydroPumpNames.pump_to_col]],
        water_capacity=MaxFlowVolume(level=arg_user_code[HydroPumpNames.vol_capacity_col]),
        energy_eq=Conversion(level=arg_user_code[HydroPumpNames.energy_equiv_col]),
        power_capacity=MaxFlowVolume(level=arg_user_code[HydroPumpNames.power_capacity_col]),
        head_max=arg_user_code[HydroPumpNames.h_max_col],
        head_min=arg_user_code[HydroPumpNames.h_min_col],
        q_max=arg_user_code[HydroPumpNames.q_max_col],
        q_min=arg_user_code[HydroPumpNames.q_min_col],
    )

    meta = {}
    HydroPumpNames._add_meta(meta, row, indices, meta_columns)

    return {row[indices[HydroPumpNames.id_col]]: (pump, meta)}
get_attribute_data_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for attribute data in the Hydropower.Pumps file.

Returns:

Type Description
DataFrameModel

pa.DataFrameModel: Pandera DataFrameModel schema for the Pump attribute data.

Source code in framdata/database_names/HydroPumpNames.py
113
114
115
116
117
118
119
120
121
122
@staticmethod
def get_attribute_data_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for attribute data in the Hydropower.Pumps file.

    Returns:
        pa.DataFrameModel: Pandera DataFrameModel schema for the Pump attribute data.

    """
    return PumpSchema
get_metadata_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for the metadata table in the Hydropower.Pumps file.

Returns:

Type Description
DataFrameModel

pa.DataFrameModel: Pandera DataFrameModel schema for the Pump metadata.

Source code in framdata/database_names/HydroPumpNames.py
124
125
126
127
128
129
130
131
132
133
@staticmethod
def get_metadata_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for the metadata table in the Hydropower.Pumps file.

    Returns:
        pa.DataFrameModel: Pandera DataFrameModel schema for the Pump metadata.

    """
    return PumpMetadataSchema
PumpMetadataSchema

Bases: _AttributeMetadataSchema

Pandera DataFrameModel schema for metadata in the Hydropower.Pumps file.

Source code in framdata/database_names/HydroPumpNames.py
173
174
175
176
class PumpMetadataSchema(_AttributeMetadataSchema):
    """Pandera DataFrameModel schema for metadata in the Hydropower.Pumps file."""

    pass
PumpSchema

Bases: DataFrameModel

Pandera DataFrameModel schema for attribute data in the Hydropower.Pumps file.

Source code in framdata/database_names/HydroPumpNames.py
167
168
169
170
class PumpSchema(pa.DataFrameModel):
    """Pandera DataFrameModel schema for attribute data in the Hydropower.Pumps file."""

    pass

HydroReservoirNames

Module for handling reservoir names and schemas in hydropower data.

This module defines the ReservoirNames class for managing reservoir attributes, and provides Pandera schemas for validating reservoir attribute and metadata tables.

HydroReservoirMetadataSchema

Bases: _AttributeMetadataSchema

Pandera DataFrameModel schema for metadata in the Hydropower.Reservoirs file.

Source code in framdata/database_names/HydroReservoirNames.py
168
169
170
171
class HydroReservoirMetadataSchema(_AttributeMetadataSchema):
    """Pandera DataFrameModel schema for metadata in the Hydropower.Reservoirs file."""

    pass
HydroReservoirNames

Bases: _BaseComponentsNames

Class for managing reservoir attribute names and providing methods for schema validation and component creation.

This class defines column names for reservoir attributes, methods for creating HydroReservoir components, and functions to retrieve Pandera schemas for validating reservoir attribute and metadata tables.

Source code in framdata/database_names/HydroReservoirNames.py
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
class HydroReservoirNames(_BaseComponentsNames):
    """
    Class for managing reservoir attribute names and providing methods for schema validation and component creation.

    This class defines column names for reservoir attributes, methods for creating HydroReservoir components,
    and functions to retrieve Pandera schemas for validating reservoir attribute and metadata tables.
    """

    id_col = "ReservoirID"
    capacity_col = "Capacity"
    res_curve_col = "ReservoirCurve"
    min_res_col = "MinOperationalFilling"
    min_penalty_col = "MinViolationPenalty"
    max_res_col = "MaxOperationalFilling"
    max_penalty_col = "MaxViolationPenalty"
    res_buf_col = "TargetFilling"
    buf_penalty_col = "TargetViolationPenalty"

    columns: ClassVar[list[str]] = [
        id_col,
        capacity_col,
        res_curve_col,
        min_res_col,
        max_res_col,
        res_buf_col,
        min_penalty_col,
        max_penalty_col,
        buf_penalty_col,
    ]

    ref_columns: ClassVar[list[str]] = [
        capacity_col,
        res_curve_col,
        min_res_col,
        max_res_col,
        res_buf_col,
        min_penalty_col,
        max_penalty_col,
        buf_penalty_col,
    ]

    @staticmethod
    def create_component(
        row: NDArray,
        indices: dict[str, int],
        meta_columns: set[str],
        meta_data: pd.DataFrame,
        attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
    ) -> dict[str, HydroReservoir]:
        """
        Create a HydroReservoir object.

        Args:
            row (NDArray): Array containing the values of one table row, represeting one HydroModule object.
            indices (list[str, int]): Mapping of table's Column names to the array's indices.
            meta_columns (set[str]): Set of columns used to tag object with memberships.
            meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
            attribute_objects (dict[str, tuple[object, dict[str, Meta]]], optional): NOT USED, currently only used in HydroModulesNames.

        Returns:
            dict[str, HydroReservoir]: A dictionary with the inflow ID as key and the module unit as value.

        """
        columns_to_parse = [
            HydroReservoirNames.capacity_col,
            HydroReservoirNames.res_curve_col,
            HydroReservoirNames.min_res_col,
            HydroReservoirNames.max_res_col,
            HydroReservoirNames.res_buf_col,
            HydroReservoirNames.min_penalty_col,
            HydroReservoirNames.max_penalty_col,
            HydroReservoirNames.buf_penalty_col,
        ]

        arg_user_code = HydroReservoirNames._parse_args(row, indices, columns_to_parse, meta_data)

        reservoir_curve = ReservoirCurve(arg_user_code[HydroReservoirNames.res_curve_col])

        reservoir = HydroReservoir(
            capacity=StockVolume(level=arg_user_code[HydroReservoirNames.capacity_col]),
            reservoir_curve=reservoir_curve,
        )

        meta = {}
        HydroReservoirNames._add_meta(meta, row, indices, meta_columns)

        return {row[indices[HydroReservoirNames.id_col]]: (reservoir, meta)}

    @staticmethod
    def get_attribute_data_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for attribute data in the Hydropower.Reservoirs file.

        Returns:
            pa.DataFrameModel: Pandera DataFrameModel schema for the Reservoir attribute data.

        """
        return HydroReservoirSchema

    @staticmethod
    def get_metadata_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for the metadata table in the Hydropower.Reservoirs file.

        Returns:
            pa.DataFrameModel: Pandera DataFrameModel schema for the Reservoir metadata.

        """
        return HydroReservoirMetadataSchema

    @staticmethod
    def _get_unique_check_descriptions() -> dict[str, tuple[str, bool]]:
        """
        Retrieve a dictionary with descriptons of validation checks that are specific to the Reservoir schemas.

        Returns:
            dict[str, tuple[str, bool]]: A dictionary where:
                - Keys (str): The name of the validation check method.
                - Values (tuple[str, bool]):
                    - The first element (str) provides a concise and user-friendly description of the check. E.g. what
                      caused the validation error or what is required for the check to pass.
                    - The second element (bool) indicates whether the check is a warning (True) or an error (False).


        """
        return None

    @staticmethod
    def _format_unique_checks(errors: pd.DataFrame) -> pd.DataFrame:
        """
        Format the error DataFrame according to the validation checks that are specific to the Reservoir schemas.

        Args:
            errors (pd.DataFrame): The error DataFrame containing validation errors.

        Returns:
            pd.DataFrame: The updated error DataFrame with formatted rows for unique validation checks.

        """
        return None
create_component(row: NDArray, indices: dict[str, int], meta_columns: set[str], meta_data: pd.DataFrame, attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None) -> dict[str, HydroReservoir] staticmethod

Create a HydroReservoir object.

Parameters:

Name Type Description Default
row NDArray

Array containing the values of one table row, represeting one HydroModule object.

required
indices list[str, int]

Mapping of table's Column names to the array's indices.

required
meta_columns set[str]

Set of columns used to tag object with memberships.

required
meta_data DataFrame

Dictionary containing at least unit of every column.

required
attribute_objects dict[str, tuple[object, dict[str, Meta]]]

NOT USED, currently only used in HydroModulesNames.

None

Returns:

Type Description
dict[str, HydroReservoir]

dict[str, HydroReservoir]: A dictionary with the inflow ID as key and the module unit as value.

Source code in framdata/database_names/HydroReservoirNames.py
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
@staticmethod
def create_component(
    row: NDArray,
    indices: dict[str, int],
    meta_columns: set[str],
    meta_data: pd.DataFrame,
    attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
) -> dict[str, HydroReservoir]:
    """
    Create a HydroReservoir object.

    Args:
        row (NDArray): Array containing the values of one table row, represeting one HydroModule object.
        indices (list[str, int]): Mapping of table's Column names to the array's indices.
        meta_columns (set[str]): Set of columns used to tag object with memberships.
        meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
        attribute_objects (dict[str, tuple[object, dict[str, Meta]]], optional): NOT USED, currently only used in HydroModulesNames.

    Returns:
        dict[str, HydroReservoir]: A dictionary with the inflow ID as key and the module unit as value.

    """
    columns_to_parse = [
        HydroReservoirNames.capacity_col,
        HydroReservoirNames.res_curve_col,
        HydroReservoirNames.min_res_col,
        HydroReservoirNames.max_res_col,
        HydroReservoirNames.res_buf_col,
        HydroReservoirNames.min_penalty_col,
        HydroReservoirNames.max_penalty_col,
        HydroReservoirNames.buf_penalty_col,
    ]

    arg_user_code = HydroReservoirNames._parse_args(row, indices, columns_to_parse, meta_data)

    reservoir_curve = ReservoirCurve(arg_user_code[HydroReservoirNames.res_curve_col])

    reservoir = HydroReservoir(
        capacity=StockVolume(level=arg_user_code[HydroReservoirNames.capacity_col]),
        reservoir_curve=reservoir_curve,
    )

    meta = {}
    HydroReservoirNames._add_meta(meta, row, indices, meta_columns)

    return {row[indices[HydroReservoirNames.id_col]]: (reservoir, meta)}
get_attribute_data_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for attribute data in the Hydropower.Reservoirs file.

Returns:

Type Description
DataFrameModel

pa.DataFrameModel: Pandera DataFrameModel schema for the Reservoir attribute data.

Source code in framdata/database_names/HydroReservoirNames.py
108
109
110
111
112
113
114
115
116
117
@staticmethod
def get_attribute_data_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for attribute data in the Hydropower.Reservoirs file.

    Returns:
        pa.DataFrameModel: Pandera DataFrameModel schema for the Reservoir attribute data.

    """
    return HydroReservoirSchema
get_metadata_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for the metadata table in the Hydropower.Reservoirs file.

Returns:

Type Description
DataFrameModel

pa.DataFrameModel: Pandera DataFrameModel schema for the Reservoir metadata.

Source code in framdata/database_names/HydroReservoirNames.py
119
120
121
122
123
124
125
126
127
128
@staticmethod
def get_metadata_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for the metadata table in the Hydropower.Reservoirs file.

    Returns:
        pa.DataFrameModel: Pandera DataFrameModel schema for the Reservoir metadata.

    """
    return HydroReservoirMetadataSchema
HydroReservoirSchema

Bases: DataFrameModel

Pandera DataFrameModel schema for attribute data in the Hydropower.Reservoirs file.

Source code in framdata/database_names/HydroReservoirNames.py
162
163
164
165
class HydroReservoirSchema(pa.DataFrameModel):
    """Pandera DataFrameModel schema for attribute data in the Hydropower.Reservoirs file."""

    pass

ThermalNames

Classes defining Thermal tables.

ThermalMetadataSchema

Bases: _AttributeMetadataSchema

Pandera DataFrameModel schema for metadata in the Thermal.Generators file.

Source code in framdata/database_names/ThermalNames.py
228
229
230
231
class ThermalMetadataSchema(_AttributeMetadataSchema):
    """Pandera DataFrameModel schema for metadata in the Thermal.Generators file."""

    pass
ThermalNames

Bases: _BaseComponentsNames

Container class for describing the Thermal attribute table's names and structure.

Source code in framdata/database_names/ThermalNames.py
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
class ThermalNames(_BaseComponentsNames):
    """Container class for describing the Thermal attribute table's names and structure."""

    id_col = "ThermalID"
    main_unit_col = "MainUnit"
    nice_name_col = "NiceName"
    power_node_col = "PowerNode"
    fuel_node_col = "FuelNode"
    emission_node_col = "EmissionNode"
    emission_coeff_col = "EmissionCoefficient"
    type_col = "Type"
    capacity_col = "Capacity"
    full_load_col = "FullLoadEfficiency"
    part_load_col = "PartLoadEfficiency"
    voc_col = "VOC"
    start_costs_col = "StartCosts"
    start_hours_col = "StartHours"
    min_stable_load_col = "MinStableLoad"
    min_op_bound_col = "MinOperationalBound"
    max_op_bound_col = "MaxOperationalBound"
    ramp_up_col = "RampUp"
    ramp_down_col = "RampDown"

    # Should include rampup/down data in Thermal, when we get data for this
    columns: ClassVar[list[str]] = [
        id_col,
        nice_name_col,
        type_col,
        main_unit_col,
        power_node_col,
        fuel_node_col,
        emission_node_col,
        capacity_col,
        full_load_col,
        part_load_col,
        voc_col,
        start_costs_col,
        start_hours_col,
        min_stable_load_col,
        min_op_bound_col,
        max_op_bound_col,
        emission_coeff_col,
    ]

    ref_columns: ClassVar[list[str]] = [
        power_node_col,
        fuel_node_col,
        emission_node_col,
        capacity_col,
        full_load_col,
        part_load_col,
        voc_col,
        start_costs_col,
        start_hours_col,
        min_stable_load_col,
        min_op_bound_col,
        max_op_bound_col,
        emission_coeff_col,
    ]

    @staticmethod
    def create_component(
        row: NDArray,
        indices: dict[str, int],
        meta_columns: set[str],
        meta_data: pd.DataFrame,
        attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
    ) -> dict[str, Thermal]:
        """
        Create a thermal unit component.

        Args:
            row (NDArray): Array containing the values of one table row, represeting one Thermal object.
            indices (list[str, int]): Mapping of table's Column names to the array's indices.
            meta_columns (set[str]): Set of columns used to tag object with memberships.
            meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
            attribute_objects (dict[str, tuple[object, dict[str, Meta]]], optional): NOT USED

        Returns:
            dict[str, Thermal]: A dictionary with the thermal_id as key and the thermal unit as value.

        """
        columns_to_parse = [
            ThermalNames.emission_node_col,
            ThermalNames.capacity_col,
            ThermalNames.full_load_col,
            ThermalNames.part_load_col,
            ThermalNames.voc_col,
            ThermalNames.start_costs_col,
            ThermalNames.start_hours_col,
            ThermalNames.min_stable_load_col,
            ThermalNames.min_op_bound_col,
            ThermalNames.max_op_bound_col,
            ThermalNames.emission_coeff_col,
        ]

        arg_user_code = ThermalNames._parse_args(row, indices, columns_to_parse, meta_data)

        no_start_up_costs_condition = (
            (arg_user_code[ThermalNames.start_costs_col] is None)
            or (arg_user_code[ThermalNames.min_stable_load_col] is None)
            or (arg_user_code[ThermalNames.start_hours_col] is None)
            or (arg_user_code[ThermalNames.part_load_col] is None)
        )
        start_up_cost = (
            None
            if no_start_up_costs_condition
            else StartUpCost(
                startup_cost=Cost(level=arg_user_code[ThermalNames.start_costs_col]),
                min_stable_load=Proportion(level=arg_user_code[ThermalNames.min_stable_load_col]),
                start_hours=Hours(level=arg_user_code[ThermalNames.start_hours_col]),
                part_load_efficiency=Efficiency(level=arg_user_code[ThermalNames.part_load_col]),
            )
        )

        voc = (
            None
            if arg_user_code[ThermalNames.voc_col] is None
            else Cost(
                level=arg_user_code[ThermalNames.voc_col],
                profile=None,
            )
        )

        min_capacity = (
            None
            if arg_user_code[ThermalNames.min_op_bound_col] is None
            else MaxFlowVolume(
                level=arg_user_code[ThermalNames.capacity_col],
                profile=arg_user_code[ThermalNames.min_op_bound_col],
            )
        )

        thermal = Thermal(
            power_node=row[indices[ThermalNames.power_node_col]],
            fuel_node=row[indices[ThermalNames.fuel_node_col]],
            efficiency=Efficiency(level=arg_user_code[ThermalNames.full_load_col]),
            emission_node=row[indices[ThermalNames.emission_node_col]],
            emission_coefficient=Conversion(level=arg_user_code[FuelNodesNames.emission_coefficient_col]),
            max_capacity=MaxFlowVolume(
                level=arg_user_code[ThermalNames.capacity_col],
                profile=arg_user_code[ThermalNames.max_op_bound_col],
            ),
            min_capacity=min_capacity,
            voc=voc,
            startupcost=start_up_cost,
        )
        ThermalNames._add_meta(thermal, row, indices, meta_columns)

        return {row[indices[ThermalNames.id_col]]: thermal}

    @staticmethod
    def get_attribute_data_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for attribute data in the Thermal.Generators file.

        Returns:
            pa.DataFrameModel: Pandera DataFrameModel schema for Thermal attribute data.

        """
        return ThermalSchema

    @staticmethod
    def get_metadata_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for the metadata table in the Thermal.Generators file.

        Returns:
            pa.DataFrameModel: Pandera DataFrameModel schema for the Thermal metadata.

        """
        return ThermalMetadataSchema

    @staticmethod
    def _get_unique_check_descriptions() -> dict[str, tuple[str, bool]]:
        """
        Retrieve a dictionary with descriptons of validation checks that are specific to the Thermal schemas.

        Returns:
            dict[str, tuple[str, bool]]: A dictionary where:
                - Keys (str): The name of the validation check method.
                - Values (tuple[str, bool]):
                    - The first element (str) provides a concise and user-friendly description of the check. E.g. what
                      caused the validation error or what is required for the check to pass.
                    - The second element (bool) indicates whether the check is a warning (True) or an error (False).


        """
        return None

    @staticmethod
    def _format_unique_checks(errors: pd.DataFrame) -> pd.DataFrame:
        """
        Format the error DataFrame according to the validation checks that are specific to the Thermal schemas.

        Args:
            errors (pd.DataFrame): The error DataFrame containing validation errors.

        Returns:
            pd.DataFrame: The updated error DataFrame with formatted rows for unique validation checks.

        """
        return None
create_component(row: NDArray, indices: dict[str, int], meta_columns: set[str], meta_data: pd.DataFrame, attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None) -> dict[str, Thermal] staticmethod

Create a thermal unit component.

Parameters:

Name Type Description Default
row NDArray

Array containing the values of one table row, represeting one Thermal object.

required
indices list[str, int]

Mapping of table's Column names to the array's indices.

required
meta_columns set[str]

Set of columns used to tag object with memberships.

required
meta_data DataFrame

Dictionary containing at least unit of every column.

required
attribute_objects dict[str, tuple[object, dict[str, Meta]]]

NOT USED

None

Returns:

Type Description
dict[str, Thermal]

dict[str, Thermal]: A dictionary with the thermal_id as key and the thermal unit as value.

Source code in framdata/database_names/ThermalNames.py
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
@staticmethod
def create_component(
    row: NDArray,
    indices: dict[str, int],
    meta_columns: set[str],
    meta_data: pd.DataFrame,
    attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
) -> dict[str, Thermal]:
    """
    Create a thermal unit component.

    Args:
        row (NDArray): Array containing the values of one table row, represeting one Thermal object.
        indices (list[str, int]): Mapping of table's Column names to the array's indices.
        meta_columns (set[str]): Set of columns used to tag object with memberships.
        meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
        attribute_objects (dict[str, tuple[object, dict[str, Meta]]], optional): NOT USED

    Returns:
        dict[str, Thermal]: A dictionary with the thermal_id as key and the thermal unit as value.

    """
    columns_to_parse = [
        ThermalNames.emission_node_col,
        ThermalNames.capacity_col,
        ThermalNames.full_load_col,
        ThermalNames.part_load_col,
        ThermalNames.voc_col,
        ThermalNames.start_costs_col,
        ThermalNames.start_hours_col,
        ThermalNames.min_stable_load_col,
        ThermalNames.min_op_bound_col,
        ThermalNames.max_op_bound_col,
        ThermalNames.emission_coeff_col,
    ]

    arg_user_code = ThermalNames._parse_args(row, indices, columns_to_parse, meta_data)

    no_start_up_costs_condition = (
        (arg_user_code[ThermalNames.start_costs_col] is None)
        or (arg_user_code[ThermalNames.min_stable_load_col] is None)
        or (arg_user_code[ThermalNames.start_hours_col] is None)
        or (arg_user_code[ThermalNames.part_load_col] is None)
    )
    start_up_cost = (
        None
        if no_start_up_costs_condition
        else StartUpCost(
            startup_cost=Cost(level=arg_user_code[ThermalNames.start_costs_col]),
            min_stable_load=Proportion(level=arg_user_code[ThermalNames.min_stable_load_col]),
            start_hours=Hours(level=arg_user_code[ThermalNames.start_hours_col]),
            part_load_efficiency=Efficiency(level=arg_user_code[ThermalNames.part_load_col]),
        )
    )

    voc = (
        None
        if arg_user_code[ThermalNames.voc_col] is None
        else Cost(
            level=arg_user_code[ThermalNames.voc_col],
            profile=None,
        )
    )

    min_capacity = (
        None
        if arg_user_code[ThermalNames.min_op_bound_col] is None
        else MaxFlowVolume(
            level=arg_user_code[ThermalNames.capacity_col],
            profile=arg_user_code[ThermalNames.min_op_bound_col],
        )
    )

    thermal = Thermal(
        power_node=row[indices[ThermalNames.power_node_col]],
        fuel_node=row[indices[ThermalNames.fuel_node_col]],
        efficiency=Efficiency(level=arg_user_code[ThermalNames.full_load_col]),
        emission_node=row[indices[ThermalNames.emission_node_col]],
        emission_coefficient=Conversion(level=arg_user_code[FuelNodesNames.emission_coefficient_col]),
        max_capacity=MaxFlowVolume(
            level=arg_user_code[ThermalNames.capacity_col],
            profile=arg_user_code[ThermalNames.max_op_bound_col],
        ),
        min_capacity=min_capacity,
        voc=voc,
        startupcost=start_up_cost,
    )
    ThermalNames._add_meta(thermal, row, indices, meta_columns)

    return {row[indices[ThermalNames.id_col]]: thermal}
get_attribute_data_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for attribute data in the Thermal.Generators file.

Returns:

Type Description
DataFrameModel

pa.DataFrameModel: Pandera DataFrameModel schema for Thermal attribute data.

Source code in framdata/database_names/ThermalNames.py
168
169
170
171
172
173
174
175
176
177
@staticmethod
def get_attribute_data_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for attribute data in the Thermal.Generators file.

    Returns:
        pa.DataFrameModel: Pandera DataFrameModel schema for Thermal attribute data.

    """
    return ThermalSchema
get_metadata_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for the metadata table in the Thermal.Generators file.

Returns:

Type Description
DataFrameModel

pa.DataFrameModel: Pandera DataFrameModel schema for the Thermal metadata.

Source code in framdata/database_names/ThermalNames.py
179
180
181
182
183
184
185
186
187
188
@staticmethod
def get_metadata_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for the metadata table in the Thermal.Generators file.

    Returns:
        pa.DataFrameModel: Pandera DataFrameModel schema for the Thermal metadata.

    """
    return ThermalMetadataSchema
ThermalSchema

Bases: DataFrameModel

Pandera DataFrameModel schema for attribute data in the Thermal.Generators file.

Source code in framdata/database_names/ThermalNames.py
222
223
224
225
class ThermalSchema(pa.DataFrameModel):
    """Pandera DataFrameModel schema for attribute data in the Thermal.Generators file."""

    pass

TimeVectorMetadataNames

Contains names of fields in time vector metadata.

TimeVectorMetadataNames

Denote available fields in time vector metadata, and provide functionality for time vector metadata processing.

The processing is concerned with casting the metadata fields to correct types and decoding the fields and/or values if they are stored as bytes.

Source code in framdata/database_names/TimeVectorMetadataNames.py
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
class TimeVectorMetadataNames:
    """
    Denote available fields in time vector metadata, and provide functionality for time vector metadata processing.

    The processing is concerned with casting the metadata fields to correct types and decoding the fields and/or values if they are stored as bytes.

    """

    ENCODING = "utf-8"

    DATETIME_COL = "DateTime"
    # OBS! when adding new metadata entries, you also have to parse them in FileHandler.get_parquet_metadata
    # otherwise they will not be read.
    # Metadata fields

    # Id column name
    ID_COLUMN_NAME = "ID"

    # Required bools
    IS_MAX_LEVEL = "IsMaxLevel"
    IS_ZERO_ONE_PROFILE = "IsZeroOneProfile"
    IS_52_WEEK_YEARS = "Is52WeekYears"
    EXTRAPOLATE_FISRT_POINT = "ExtrapolateFirstPoint"
    EXTRAPOLATE_LAST_POINT = "ExtrapolateLastPoint"

    # reference period
    REF_PERIOD_START_YEAR = "RefPeriodStartYear"
    REF_PERIOD_NUM_YEARS = "RefPeriodNumberOfYears"

    START = "StartDateTime"
    FREQUENCY = "Frequency"
    NUM_POINTS = "NumberOfPoints"
    TIMEZONE = "TimeZone"

    UNIT = "Unit"
    CURRENCY = "Currency"

    # reference_period = "ReferencePeriod"

    B_IS_MAX_LEVEL = IS_MAX_LEVEL.encode(ENCODING)
    B_IS_ZERO_ONE_PROFILE = IS_ZERO_ONE_PROFILE.encode(ENCODING)
    B_IS_52_WEEK_YEARS = IS_52_WEEK_YEARS.encode(ENCODING)
    B_ID_COLUMN_NAME = ID_COLUMN_NAME.encode(ENCODING)
    B_EXTRAPOLATE_FISRT_POINT = EXTRAPOLATE_FISRT_POINT.encode(ENCODING)
    B_EXTRAPOLATE_LAST_POINT = EXTRAPOLATE_LAST_POINT.encode(ENCODING)

    # reference period
    B_REF_PERIOD_START_YEAR = REF_PERIOD_START_YEAR.encode(ENCODING)
    B_REF_PERIOD_NUM_YEARS = REF_PERIOD_NUM_YEARS.encode(ENCODING)

    B_START = START.encode(ENCODING)
    B_FREQUENCY = FREQUENCY.encode(ENCODING)
    B_NUM_POINTS = NUM_POINTS.encode(ENCODING)
    B_TIMEZONE = TIMEZONE.encode(ENCODING)
    B_UNIT = UNIT.encode(ENCODING)
    B_CURRENCY = CURRENCY.encode(ENCODING)

    str_keys_to_bytes_map: ClassVar[dict[str, bytes]] = {
        ID_COLUMN_NAME: B_ID_COLUMN_NAME,
        IS_MAX_LEVEL: B_IS_MAX_LEVEL,
        IS_ZERO_ONE_PROFILE: B_IS_ZERO_ONE_PROFILE,
        IS_52_WEEK_YEARS: B_IS_52_WEEK_YEARS,
        EXTRAPOLATE_FISRT_POINT: B_EXTRAPOLATE_FISRT_POINT,
        EXTRAPOLATE_LAST_POINT: B_EXTRAPOLATE_LAST_POINT,
        REF_PERIOD_START_YEAR: B_REF_PERIOD_START_YEAR,
        REF_PERIOD_NUM_YEARS: B_REF_PERIOD_NUM_YEARS,
        START: B_START,
        FREQUENCY: B_FREQUENCY,
        NUM_POINTS: B_NUM_POINTS,
        TIMEZONE: B_TIMEZONE,
        UNIT: B_UNIT,
        CURRENCY: B_CURRENCY,
    }

    strict_bools_cast: ClassVar[set[str]] = {
        IS_52_WEEK_YEARS,
        EXTRAPOLATE_FISRT_POINT,
        EXTRAPOLATE_LAST_POINT,
    }
    keys_cast_methods: ClassVar[dict[str, Callable | type]] = {
        ID_COLUMN_NAME: str,
        IS_MAX_LEVEL: bool,
        IS_ZERO_ONE_PROFILE: bool,
        REF_PERIOD_START_YEAR: int,
        REF_PERIOD_NUM_YEARS: int,
        START: pd.to_datetime,
        FREQUENCY: pd.to_timedelta,
        NUM_POINTS: int,
        TIMEZONE: pytz.timezone,
        UNIT: str,
        CURRENCY: str,
    }

    @staticmethod
    def cast_meta(
        raw_meta: dict[str | bytes, str | bytes | int | bool | None],
    ) -> tuple[dict[str, str, bool | int | str | datetime | timedelta | tzinfo | None], set[str]]:
        """
        Decode possible binary keys and values and cast values of metadata dict to their defined types.

        Args:
            raw_meta (dict[str  |  bytes, str  |  bytes  |  int  |  bool  |  None]): Dictionary to decode and cast.

        Returns:
            tuple[dict[str, Any], set[str]]: Decoded and cast dictionary, set of missing keys.

        """
        tvmn = TimeVectorMetadataNames
        str_bytes_map = tvmn.str_keys_to_bytes_map
        cast_meta = {key: raw_meta[key] for key in set(str_bytes_map.keys()) | set(str_bytes_map.values()) if key in raw_meta}
        str_to_bytes_meta = tvmn.bytes_keys_to_str(cast_meta)
        cast_meta = str_to_bytes_meta if str_to_bytes_meta else cast_meta  # Keys were bytes and we decode to str.

        missing_keys: set[str] = {key for key in str_bytes_map if key not in cast_meta}

        # Update with cast values for strict bools and others.
        cast_meta.update({key: tvmn.cast_strict_bool_value(cast_meta[key]) for key in tvmn.strict_bools_cast if key in cast_meta})
        cast_meta.update({key: tvmn.cast_value(cast_meta[key], cast_method) for key, cast_method in tvmn.keys_cast_methods.items() if key in cast_meta})

        return cast_meta, missing_keys

    @staticmethod
    def str_keys_to_bytes(raw_meta: dict[str, bytes]) -> dict[bytes, bytes]:
        return {bytes_name: raw_meta[str_name] for str_name, bytes_name in TimeVectorMetadataNames.str_keys_to_bytes_map.items() if str_name in raw_meta}

    @staticmethod
    def bytes_keys_to_str(raw_meta: dict[bytes, bytes]) -> dict[str, bytes]:
        return {str_name: raw_meta[bytes_name] for str_name, bytes_name in TimeVectorMetadataNames.str_keys_to_bytes_map.items() if bytes_name in raw_meta}

    @staticmethod
    def cast_value(value: str | bytes | None, cast_function: Callable | type) -> object | None:
        """
        Cast a string value into new type, but always return None if value is None or "None".

        Args:
            value (str | None): A string value or None.
            cast_function (Union[Callable, type]): Function or type with which to cast the value into.

        Raises:
            RuntimeError: If anything goes wrong in the cast_function.

        Returns:
            object|None: Value as new type or None.

        """
        if isinstance(value, bytes):
            if cast_function is bool:
                return None if value == b"None" else value == b"True"
            value = value.decode(encoding=TimeVectorMetadataNames.ENCODING)

        if value is None or value in {"None", ""}:  # Handle missing values
            return None
        try:
            return cast_function(value)
        except Exception as e:
            msg = f"Could not cast metadata value: {value}. Casting method: {cast_function}"
            raise RuntimeError(msg) from e

    @staticmethod
    def cast_strict_bool_value(value: str | bool | bytes) -> bool:
        if isinstance(value, bytes):
            return value == b"True"
        return bool(value)
cast_meta(raw_meta: dict[str | bytes, str | bytes | int | bool | None]) -> tuple[dict[str, str, bool | int | str | datetime | timedelta | tzinfo | None], set[str]] staticmethod

Decode possible binary keys and values and cast values of metadata dict to their defined types.

Parameters:

Name Type Description Default
raw_meta dict[str | bytes, str | bytes | int | bool | None]

Dictionary to decode and cast.

required

Returns:

Type Description
tuple[dict[str, str, bool | int | str | datetime | timedelta | tzinfo | None], set[str]]

tuple[dict[str, Any], set[str]]: Decoded and cast dictionary, set of missing keys.

Source code in framdata/database_names/TimeVectorMetadataNames.py
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
@staticmethod
def cast_meta(
    raw_meta: dict[str | bytes, str | bytes | int | bool | None],
) -> tuple[dict[str, str, bool | int | str | datetime | timedelta | tzinfo | None], set[str]]:
    """
    Decode possible binary keys and values and cast values of metadata dict to their defined types.

    Args:
        raw_meta (dict[str  |  bytes, str  |  bytes  |  int  |  bool  |  None]): Dictionary to decode and cast.

    Returns:
        tuple[dict[str, Any], set[str]]: Decoded and cast dictionary, set of missing keys.

    """
    tvmn = TimeVectorMetadataNames
    str_bytes_map = tvmn.str_keys_to_bytes_map
    cast_meta = {key: raw_meta[key] for key in set(str_bytes_map.keys()) | set(str_bytes_map.values()) if key in raw_meta}
    str_to_bytes_meta = tvmn.bytes_keys_to_str(cast_meta)
    cast_meta = str_to_bytes_meta if str_to_bytes_meta else cast_meta  # Keys were bytes and we decode to str.

    missing_keys: set[str] = {key for key in str_bytes_map if key not in cast_meta}

    # Update with cast values for strict bools and others.
    cast_meta.update({key: tvmn.cast_strict_bool_value(cast_meta[key]) for key in tvmn.strict_bools_cast if key in cast_meta})
    cast_meta.update({key: tvmn.cast_value(cast_meta[key], cast_method) for key, cast_method in tvmn.keys_cast_methods.items() if key in cast_meta})

    return cast_meta, missing_keys
cast_value(value: str | bytes | None, cast_function: Callable | type) -> object | None staticmethod

Cast a string value into new type, but always return None if value is None or "None".

Parameters:

Name Type Description Default
value str | None

A string value or None.

required
cast_function Union[Callable, type]

Function or type with which to cast the value into.

required

Raises:

Type Description
RuntimeError

If anything goes wrong in the cast_function.

Returns:

Type Description
object | None

object|None: Value as new type or None.

Source code in framdata/database_names/TimeVectorMetadataNames.py
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
@staticmethod
def cast_value(value: str | bytes | None, cast_function: Callable | type) -> object | None:
    """
    Cast a string value into new type, but always return None if value is None or "None".

    Args:
        value (str | None): A string value or None.
        cast_function (Union[Callable, type]): Function or type with which to cast the value into.

    Raises:
        RuntimeError: If anything goes wrong in the cast_function.

    Returns:
        object|None: Value as new type or None.

    """
    if isinstance(value, bytes):
        if cast_function is bool:
            return None if value == b"None" else value == b"True"
        value = value.decode(encoding=TimeVectorMetadataNames.ENCODING)

    if value is None or value in {"None", ""}:  # Handle missing values
        return None
    try:
        return cast_function(value)
    except Exception as e:
        msg = f"Could not cast metadata value: {value}. Casting method: {cast_function}"
        raise RuntimeError(msg) from e

TransmissionNames

Defines the TransmissionNames class and related Pandera schemas.

These describe validate Transmission attributes and metadata tables in the energy model database.

TransmissionMetadataSchema

Bases: _AttributeMetadataSchema

Pandera DataFrameModel schema for metadata in the Transmission.Grid file.

Source code in framdata/database_names/TransmissionNames.py
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
class TransmissionMetadataSchema(_AttributeMetadataSchema):
    """Pandera DataFrameModel schema for metadata in the Transmission.Grid file."""

    @pa.dataframe_check
    @classmethod
    def check_unit_is_str_for_attributes(cls, df: pd.DataFrame) -> Series[bool]:
        """
        Check that the 'unit' value is a string for the rows where 'attribute' is 'Capacity' and 'Loss'.

        Args:
            df (Dataframe): DataFrame used to check value for "unit".

        Returns:
            Series[bool]: Series of boolean values detonating if each element has passed the check.

        """
        return check_unit_is_str_for_attributes(df, [TransmissionNames.capacity_col, TransmissionNames.tariff_col])
check_unit_is_str_for_attributes(df: pd.DataFrame) -> Series[bool] classmethod

Check that the 'unit' value is a string for the rows where 'attribute' is 'Capacity' and 'Loss'.

Parameters:

Name Type Description Default
df Dataframe

DataFrame used to check value for "unit".

required

Returns:

Type Description
Series[bool]

Series[bool]: Series of boolean values detonating if each element has passed the check.

Source code in framdata/database_names/TransmissionNames.py
273
274
275
276
277
278
279
280
281
282
283
284
285
286
@pa.dataframe_check
@classmethod
def check_unit_is_str_for_attributes(cls, df: pd.DataFrame) -> Series[bool]:
    """
    Check that the 'unit' value is a string for the rows where 'attribute' is 'Capacity' and 'Loss'.

    Args:
        df (Dataframe): DataFrame used to check value for "unit".

    Returns:
        Series[bool]: Series of boolean values detonating if each element has passed the check.

    """
    return check_unit_is_str_for_attributes(df, [TransmissionNames.capacity_col, TransmissionNames.tariff_col])
TransmissionNames

Bases: _BaseComponentsNames

Container class for describing the Transmission attribute table's names and structure.

Source code in framdata/database_names/TransmissionNames.py
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
class TransmissionNames(_BaseComponentsNames):
    """Container class for describing the Transmission attribute table's names and structure."""

    id_col = "TransmissionID"
    from_node_col = "FromNode"
    to_node_col = "ToNode"
    capacity_col = "Capacity"
    loss_col = "Loss"
    tariff_col = "Tariff"
    max_op_bound_col = "MaxOperationalBound"
    min_op_bound_col = "MinOperationalBound"
    ramp_up_col = "RampUp"
    ramp_down_col = "RampDown"

    columns: ClassVar[list[str]] = [
        id_col,
        from_node_col,
        to_node_col,
        capacity_col,
        loss_col,
        tariff_col,
        max_op_bound_col,
        min_op_bound_col,
        ramp_up_col,
        ramp_down_col,
    ]

    ref_columns: ClassVar[list[str]] = [
        from_node_col,
        to_node_col,
        capacity_col,
        loss_col,
        tariff_col,
        max_op_bound_col,
        min_op_bound_col,
        ramp_up_col,
        ramp_down_col,
    ]

    @staticmethod
    def create_component(
        row: NDArray,
        indices: dict[str, int],
        meta_columns: set[str],
        meta_data: pd.DataFrame,
        attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
    ) -> dict[str, Transmission]:
        """
        Create a transmission unit component.

        Args:
            row (NDArray): Array containing the values of one table row, represeting one Transmission object.
            indices (list[str, int]): Mapping of table's Column names to the array's indices.
            meta_columns (set[str]): Set of columns used to tag object with memberships.
            meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
            attribute_objects (dict[str, tuple[object, dict[str, Meta]]] | None, optional): NOT USED

        Returns:
            dict[str, Transmission]: A dictionary with the transmission_id as key and the transmission unit as value.

        """
        columns_to_parse = [
            TransmissionNames.capacity_col,
            TransmissionNames.loss_col,
            TransmissionNames.tariff_col,
            TransmissionNames.max_op_bound_col,
            TransmissionNames.min_op_bound_col,
            TransmissionNames.ramp_up_col,
            TransmissionNames.ramp_down_col,
        ]

        arg_user_code = TransmissionNames._parse_args(row, indices, columns_to_parse, meta_data)

        ramp_up = None if arg_user_code[TransmissionNames.ramp_up_col] is None else Proportion(level=arg_user_code[TransmissionNames.ramp_up_col])
        ramp_down = None if arg_user_code[TransmissionNames.ramp_down_col] is None else Proportion(level=arg_user_code[TransmissionNames.ramp_down_col])
        loss = None if arg_user_code[TransmissionNames.loss_col] is None else Loss(level=arg_user_code[TransmissionNames.loss_col])

        tariff = None if arg_user_code[TransmissionNames.tariff_col] is None else Cost(level=arg_user_code[TransmissionNames.tariff_col])

        min_capacity = (
            None
            if arg_user_code[TransmissionNames.min_op_bound_col] is None
            else MaxFlowVolume(
                level=arg_user_code[TransmissionNames.capacity_col],
                profile=arg_user_code[TransmissionNames.min_op_bound_col],
            )
        )

        transmission = Transmission(
            from_node=row[indices[TransmissionNames.from_node_col]],
            to_node=row[indices[TransmissionNames.to_node_col]],
            max_capacity=MaxFlowVolume(
                level=arg_user_code[TransmissionNames.capacity_col],
                profile=arg_user_code[TransmissionNames.max_op_bound_col],
            ),
            min_capacity=min_capacity,
            loss=loss,
            tariff=tariff,
            ramp_up=ramp_up,
            ramp_down=ramp_down,
        )
        TransmissionNames._add_meta(transmission, row, indices, meta_columns)

        return {row[indices[TransmissionNames.id_col]]: transmission}

    @staticmethod
    def get_attribute_data_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for attribute data in the Transmission.Grid file.

        Returns:
            pa.DataFrameModel: Pandera DataFrameModel schema for Transmission attribute data.

        """
        return TransmissionSchema

    @staticmethod
    def get_metadata_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for the metadata table in the Transmission.Grid file.

        Returns:
            pa.DataFrameModel: Pandera DataFrameModel schema for the Transmission metadata.

        """
        return TransmissionMetadataSchema

    @staticmethod
    def _get_unique_check_descriptions() -> dict[str, tuple[str, bool]]:
        """
        Retrieve a dictionary with descriptons of validation checks that are specific to the Transmission schemas.

        Returns:
            dict[str, tuple[str, bool]]: A dictionary where:
                - Keys (str): The name of the validation check method.
                - Values (tuple[str, bool]):
                    - The first element (str) provides a concise and user-friendly description of the check. E.g. what
                      caused the validation error or what is required for the check to pass.
                    - The second element (bool) indicates whether the check is a warning (True) or an error (False).


        """
        return {
            "check_internal_line_error": ("Transmission line is internal (FromNode equals ToNode).", False),
        }

    @staticmethod
    def _format_unique_checks(errors: pd.DataFrame) -> pd.DataFrame:
        """
        Format the error DataFrame according to the validation checks that are specific to the Transmission schemas.

        Args:
            errors (pd.DataFrame): The error DataFrame containing validation errors.

        Returns:
            pd.DataFrame: The updated error DataFrame with formatted rows for unique validation checks.

        """
        check_name = "check_internal_line_error"
        if check_name in errors[TransmissionNames.COL_CHECK].to_numpy():
            check_rows = errors.loc[
                (errors[TransmissionNames.COL_CHECK] == check_name)
                & (
                    errors[TransmissionNames.COL_COLUMN].isin(
                        [TransmissionNames.from_node_col, TransmissionNames.to_node_col],
                    )
                )
            ]
            check_rows.loc[:, TransmissionNames.COL_COLUMN] = f"{TransmissionNames.from_node_col}, {TransmissionNames.to_node_col}"
            check_rows = check_rows.drop_duplicates()
            errors = errors[~(errors[TransmissionNames.COL_CHECK] == check_name)]
            errors = pd.concat([errors, check_rows], ignore_index=True)

        return errors
create_component(row: NDArray, indices: dict[str, int], meta_columns: set[str], meta_data: pd.DataFrame, attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None) -> dict[str, Transmission] staticmethod

Create a transmission unit component.

Parameters:

Name Type Description Default
row NDArray

Array containing the values of one table row, represeting one Transmission object.

required
indices list[str, int]

Mapping of table's Column names to the array's indices.

required
meta_columns set[str]

Set of columns used to tag object with memberships.

required
meta_data DataFrame

Dictionary containing at least unit of every column.

required
attribute_objects dict[str, tuple[object, dict[str, Meta]]] | None

NOT USED

None

Returns:

Type Description
dict[str, Transmission]

dict[str, Transmission]: A dictionary with the transmission_id as key and the transmission unit as value.

Source code in framdata/database_names/TransmissionNames.py
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
@staticmethod
def create_component(
    row: NDArray,
    indices: dict[str, int],
    meta_columns: set[str],
    meta_data: pd.DataFrame,
    attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
) -> dict[str, Transmission]:
    """
    Create a transmission unit component.

    Args:
        row (NDArray): Array containing the values of one table row, represeting one Transmission object.
        indices (list[str, int]): Mapping of table's Column names to the array's indices.
        meta_columns (set[str]): Set of columns used to tag object with memberships.
        meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
        attribute_objects (dict[str, tuple[object, dict[str, Meta]]] | None, optional): NOT USED

    Returns:
        dict[str, Transmission]: A dictionary with the transmission_id as key and the transmission unit as value.

    """
    columns_to_parse = [
        TransmissionNames.capacity_col,
        TransmissionNames.loss_col,
        TransmissionNames.tariff_col,
        TransmissionNames.max_op_bound_col,
        TransmissionNames.min_op_bound_col,
        TransmissionNames.ramp_up_col,
        TransmissionNames.ramp_down_col,
    ]

    arg_user_code = TransmissionNames._parse_args(row, indices, columns_to_parse, meta_data)

    ramp_up = None if arg_user_code[TransmissionNames.ramp_up_col] is None else Proportion(level=arg_user_code[TransmissionNames.ramp_up_col])
    ramp_down = None if arg_user_code[TransmissionNames.ramp_down_col] is None else Proportion(level=arg_user_code[TransmissionNames.ramp_down_col])
    loss = None if arg_user_code[TransmissionNames.loss_col] is None else Loss(level=arg_user_code[TransmissionNames.loss_col])

    tariff = None if arg_user_code[TransmissionNames.tariff_col] is None else Cost(level=arg_user_code[TransmissionNames.tariff_col])

    min_capacity = (
        None
        if arg_user_code[TransmissionNames.min_op_bound_col] is None
        else MaxFlowVolume(
            level=arg_user_code[TransmissionNames.capacity_col],
            profile=arg_user_code[TransmissionNames.min_op_bound_col],
        )
    )

    transmission = Transmission(
        from_node=row[indices[TransmissionNames.from_node_col]],
        to_node=row[indices[TransmissionNames.to_node_col]],
        max_capacity=MaxFlowVolume(
            level=arg_user_code[TransmissionNames.capacity_col],
            profile=arg_user_code[TransmissionNames.max_op_bound_col],
        ),
        min_capacity=min_capacity,
        loss=loss,
        tariff=tariff,
        ramp_up=ramp_up,
        ramp_down=ramp_down,
    )
    TransmissionNames._add_meta(transmission, row, indices, meta_columns)

    return {row[indices[TransmissionNames.id_col]]: transmission}
get_attribute_data_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for attribute data in the Transmission.Grid file.

Returns:

Type Description
DataFrameModel

pa.DataFrameModel: Pandera DataFrameModel schema for Transmission attribute data.

Source code in framdata/database_names/TransmissionNames.py
133
134
135
136
137
138
139
140
141
142
@staticmethod
def get_attribute_data_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for attribute data in the Transmission.Grid file.

    Returns:
        pa.DataFrameModel: Pandera DataFrameModel schema for Transmission attribute data.

    """
    return TransmissionSchema
get_metadata_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for the metadata table in the Transmission.Grid file.

Returns:

Type Description
DataFrameModel

pa.DataFrameModel: Pandera DataFrameModel schema for the Transmission metadata.

Source code in framdata/database_names/TransmissionNames.py
144
145
146
147
148
149
150
151
152
153
@staticmethod
def get_metadata_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for the metadata table in the Transmission.Grid file.

    Returns:
        pa.DataFrameModel: Pandera DataFrameModel schema for the Transmission metadata.

    """
    return TransmissionMetadataSchema
TransmissionSchema

Bases: DataFrameModel

Pandera DataFrameModel schema for attribute data in the Transmission.Grid file.

Source code in framdata/database_names/TransmissionNames.py
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
class TransmissionSchema(pa.DataFrameModel):
    """Pandera DataFrameModel schema for attribute data in the Transmission.Grid file."""

    TransmissionID: Series[str] = pa.Field(unique=True, nullable=False)
    FromNode: Series[str] = pa.Field(nullable=False)
    ToNode: Series[str] = pa.Field(nullable=False)
    Capacity: Series[Any] = pa.Field(nullable=False)
    Loss: Series[Any] = pa.Field(nullable=True)
    Tariff: Series[Any] = pa.Field(nullable=True)
    MaxOperationalBound: Series[Any] = pa.Field(nullable=True)
    MinOperationalBound: Series[Any] = pa.Field(nullable=True)
    RampUp: Series[Any] = pa.Field(nullable=True)
    RampDown: Series[Any] = pa.Field(nullable=True)

    @pa.check(TransmissionNames.capacity_col)
    @classmethod
    def dtype_str_int_float(cls, series: Series[Any]) -> Series[bool]:
        """Check if values in the series are of datatype: str, int or float."""
        return dtype_str_int_float(series)

    @pa.check(
        TransmissionNames.loss_col,
        TransmissionNames.tariff_col,
        TransmissionNames.max_op_bound_col,
        TransmissionNames.min_op_bound_col,
        TransmissionNames.ramp_up_col,
        TransmissionNames.ramp_down_col,
    )
    @classmethod
    def dtype_str_int_float_none(cls, series: Series[Any]) -> Series[bool]:
        """Check if values in the series are of datatype: str, int, float or None."""
        return dtype_str_int_float_none(series)

    @pa.check(TransmissionNames.capacity_col)
    @classmethod
    def numeric_values_greater_than_or_equal_to_0(cls, series: Series[Any]) -> Series[bool]:
        """Check if numeric values in the series are greater than or equal to zero."""
        return numeric_values_greater_than_or_equal_to(series, 0)

    @pa.check(TransmissionNames.loss_col)
    @classmethod
    def numeric_values_are_between_or_equal_to_0_and_1(cls, series: Series[Any]) -> Series[bool]:
        """Check if numeric values in the series are between zero and one or equal to zero and one."""
        return numeric_values_are_between_or_equal_to(series, 0, 1)

    @pa.dataframe_check
    @classmethod
    def check_internal_line_error(cls, dataframe: pd.DataFrame) -> Series[bool]:
        """
        Raise warning if origin node is the same as destination node, in which case we have an internal line.

        Args:
            dataframe (pd.DataFrame): DataFrame to check.

        Returns:
            Series[bool]: Series of boolean values denoting if each element has passed the check.

        """
        return dataframe[TransmissionNames.from_node_col] != dataframe[TransmissionNames.to_node_col]

    class Config:
        """Schema-wide configuration for the DemandSchema class."""

        unique_column_names = True
Config

Schema-wide configuration for the DemandSchema class.

Source code in framdata/database_names/TransmissionNames.py
264
265
266
267
class Config:
    """Schema-wide configuration for the DemandSchema class."""

    unique_column_names = True
check_internal_line_error(dataframe: pd.DataFrame) -> Series[bool] classmethod

Raise warning if origin node is the same as destination node, in which case we have an internal line.

Parameters:

Name Type Description Default
dataframe DataFrame

DataFrame to check.

required

Returns:

Type Description
Series[bool]

Series[bool]: Series of boolean values denoting if each element has passed the check.

Source code in framdata/database_names/TransmissionNames.py
249
250
251
252
253
254
255
256
257
258
259
260
261
262
@pa.dataframe_check
@classmethod
def check_internal_line_error(cls, dataframe: pd.DataFrame) -> Series[bool]:
    """
    Raise warning if origin node is the same as destination node, in which case we have an internal line.

    Args:
        dataframe (pd.DataFrame): DataFrame to check.

    Returns:
        Series[bool]: Series of boolean values denoting if each element has passed the check.

    """
    return dataframe[TransmissionNames.from_node_col] != dataframe[TransmissionNames.to_node_col]
dtype_str_int_float(series: Series[Any]) -> Series[bool] classmethod

Check if values in the series are of datatype: str, int or float.

Source code in framdata/database_names/TransmissionNames.py
218
219
220
221
222
@pa.check(TransmissionNames.capacity_col)
@classmethod
def dtype_str_int_float(cls, series: Series[Any]) -> Series[bool]:
    """Check if values in the series are of datatype: str, int or float."""
    return dtype_str_int_float(series)
dtype_str_int_float_none(series: Series[Any]) -> Series[bool] classmethod

Check if values in the series are of datatype: str, int, float or None.

Source code in framdata/database_names/TransmissionNames.py
224
225
226
227
228
229
230
231
232
233
234
235
@pa.check(
    TransmissionNames.loss_col,
    TransmissionNames.tariff_col,
    TransmissionNames.max_op_bound_col,
    TransmissionNames.min_op_bound_col,
    TransmissionNames.ramp_up_col,
    TransmissionNames.ramp_down_col,
)
@classmethod
def dtype_str_int_float_none(cls, series: Series[Any]) -> Series[bool]:
    """Check if values in the series are of datatype: str, int, float or None."""
    return dtype_str_int_float_none(series)
numeric_values_are_between_or_equal_to_0_and_1(series: Series[Any]) -> Series[bool] classmethod

Check if numeric values in the series are between zero and one or equal to zero and one.

Source code in framdata/database_names/TransmissionNames.py
243
244
245
246
247
@pa.check(TransmissionNames.loss_col)
@classmethod
def numeric_values_are_between_or_equal_to_0_and_1(cls, series: Series[Any]) -> Series[bool]:
    """Check if numeric values in the series are between zero and one or equal to zero and one."""
    return numeric_values_are_between_or_equal_to(series, 0, 1)
numeric_values_greater_than_or_equal_to_0(series: Series[Any]) -> Series[bool] classmethod

Check if numeric values in the series are greater than or equal to zero.

Source code in framdata/database_names/TransmissionNames.py
237
238
239
240
241
@pa.check(TransmissionNames.capacity_col)
@classmethod
def numeric_values_greater_than_or_equal_to_0(cls, series: Series[Any]) -> Series[bool]:
    """Check if numeric values in the series are greater than or equal to zero."""
    return numeric_values_greater_than_or_equal_to(series, 0)

WindSolarNames

Classes defining Wind and Solar tables and how to create Components from them.

SolarNames

Bases: WindSolarNames

Class representing the names and structure of Solar tables, and method for creating Solar Component objects.

Source code in framdata/database_names/WindSolarNames.py
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
class SolarNames(WindSolarNames):
    """Class representing the names and structure of Solar tables, and method for creating Solar Component objects."""

    @staticmethod
    def create_component(
        row: NDArray,
        indices: dict[str, int],
        meta_columns: set[str],
        meta_data: pd.DataFrame,
        attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
    ) -> dict[str, Solar]:
        """
        Create a Solar Component from a row in the Solar.Generators table.

        Args:
            row (NDArray): Array containing the values of one table row, represeting one solar object.
            indices (list[str, int]): Mapping of table's Column names to the array's indices.
            meta_columns (set[str]): Set of columns used to tag object with memberships.
            meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
            attribute_objects (dict[str, tuple[object, dict[str, Meta]]] | None, optional): NOT USED

        Returns:
            dict[str, Solar]: A dictionary with the id as key and the solar unit as value.

        """
        columns_to_parse = [
            SolarNames.profile_col,
            SolarNames.capacity_col,
        ]

        arg_user_code = SolarNames._parse_args(row, indices, columns_to_parse, meta_data)

        solar = Solar(
            power_node=row[indices[SolarNames.power_node_col]],
            max_capacity=MaxFlowVolume(
                level=arg_user_code[SolarNames.capacity_col],
                profile=arg_user_code[SolarNames.profile_col],
            ),
            voc=None,
        )

        SolarNames._add_meta(solar, row, indices, meta_columns)

        return {row[indices[SolarNames.id_col]]: solar}
create_component(row: NDArray, indices: dict[str, int], meta_columns: set[str], meta_data: pd.DataFrame, attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None) -> dict[str, Solar] staticmethod

Create a Solar Component from a row in the Solar.Generators table.

Parameters:

Name Type Description Default
row NDArray

Array containing the values of one table row, represeting one solar object.

required
indices list[str, int]

Mapping of table's Column names to the array's indices.

required
meta_columns set[str]

Set of columns used to tag object with memberships.

required
meta_data DataFrame

Dictionary containing at least unit of every column.

required
attribute_objects dict[str, tuple[object, dict[str, Meta]]] | None

NOT USED

None

Returns:

Type Description
dict[str, Solar]

dict[str, Solar]: A dictionary with the id as key and the solar unit as value.

Source code in framdata/database_names/WindSolarNames.py
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
@staticmethod
def create_component(
    row: NDArray,
    indices: dict[str, int],
    meta_columns: set[str],
    meta_data: pd.DataFrame,
    attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
) -> dict[str, Solar]:
    """
    Create a Solar Component from a row in the Solar.Generators table.

    Args:
        row (NDArray): Array containing the values of one table row, represeting one solar object.
        indices (list[str, int]): Mapping of table's Column names to the array's indices.
        meta_columns (set[str]): Set of columns used to tag object with memberships.
        meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
        attribute_objects (dict[str, tuple[object, dict[str, Meta]]] | None, optional): NOT USED

    Returns:
        dict[str, Solar]: A dictionary with the id as key and the solar unit as value.

    """
    columns_to_parse = [
        SolarNames.profile_col,
        SolarNames.capacity_col,
    ]

    arg_user_code = SolarNames._parse_args(row, indices, columns_to_parse, meta_data)

    solar = Solar(
        power_node=row[indices[SolarNames.power_node_col]],
        max_capacity=MaxFlowVolume(
            level=arg_user_code[SolarNames.capacity_col],
            profile=arg_user_code[SolarNames.profile_col],
        ),
        voc=None,
    )

    SolarNames._add_meta(solar, row, indices, meta_columns)

    return {row[indices[SolarNames.id_col]]: solar}
WindNames

Bases: WindSolarNames

Class representing the names and structure of Wind tables, and method for creating Wind Component objects.

Source code in framdata/database_names/WindSolarNames.py
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
class WindNames(WindSolarNames):
    """Class representing the names and structure of Wind tables, and method for creating Wind Component objects."""

    @staticmethod
    def create_component(
        row: NDArray,
        indices: dict[str, int],
        meta_columns: set[str],
        meta_data: pd.DataFrame,
        attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
    ) -> dict[str, Wind]:
        """
        Create a Wind Component from a row in the Wind.Generators table.

        Args:
            row (NDArray): Array containing the values of one table row, represeting one Wind object.
            indices (list[str, int]): Mapping of table's Column names to the array's indices.
            meta_columns (set[str]): Set of columns used to tag object with memberships.
            meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
            attribute_objects (dict[str, tuple[object, dict[str, Meta]]] | None, optional): NOT USED

        Returns:
            dict[str, Wind]: A dictionary with the wind_id as key and the wind unit as value.

        """
        columns_to_parse = [
            WindNames.profile_col,
            WindNames.capacity_col,
        ]

        arg_user_code = WindNames._parse_args(row, indices, columns_to_parse, meta_data)

        wind = Wind(
            power_node=row[indices[WindNames.power_node_col]],
            max_capacity=MaxFlowVolume(
                level=arg_user_code[WindNames.capacity_col],
                profile=arg_user_code[WindNames.profile_col],
            ),
            voc=None,
        )
        WindNames._add_meta(wind, row, indices, meta_columns)

        return {row[indices[WindNames.id_col]]: wind}
create_component(row: NDArray, indices: dict[str, int], meta_columns: set[str], meta_data: pd.DataFrame, attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None) -> dict[str, Wind] staticmethod

Create a Wind Component from a row in the Wind.Generators table.

Parameters:

Name Type Description Default
row NDArray

Array containing the values of one table row, represeting one Wind object.

required
indices list[str, int]

Mapping of table's Column names to the array's indices.

required
meta_columns set[str]

Set of columns used to tag object with memberships.

required
meta_data DataFrame

Dictionary containing at least unit of every column.

required
attribute_objects dict[str, tuple[object, dict[str, Meta]]] | None

NOT USED

None

Returns:

Type Description
dict[str, Wind]

dict[str, Wind]: A dictionary with the wind_id as key and the wind unit as value.

Source code in framdata/database_names/WindSolarNames.py
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
@staticmethod
def create_component(
    row: NDArray,
    indices: dict[str, int],
    meta_columns: set[str],
    meta_data: pd.DataFrame,
    attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
) -> dict[str, Wind]:
    """
    Create a Wind Component from a row in the Wind.Generators table.

    Args:
        row (NDArray): Array containing the values of one table row, represeting one Wind object.
        indices (list[str, int]): Mapping of table's Column names to the array's indices.
        meta_columns (set[str]): Set of columns used to tag object with memberships.
        meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
        attribute_objects (dict[str, tuple[object, dict[str, Meta]]] | None, optional): NOT USED

    Returns:
        dict[str, Wind]: A dictionary with the wind_id as key and the wind unit as value.

    """
    columns_to_parse = [
        WindNames.profile_col,
        WindNames.capacity_col,
    ]

    arg_user_code = WindNames._parse_args(row, indices, columns_to_parse, meta_data)

    wind = Wind(
        power_node=row[indices[WindNames.power_node_col]],
        max_capacity=MaxFlowVolume(
            level=arg_user_code[WindNames.capacity_col],
            profile=arg_user_code[WindNames.profile_col],
        ),
        voc=None,
    )
    WindNames._add_meta(wind, row, indices, meta_columns)

    return {row[indices[WindNames.id_col]]: wind}
WindSolarMetadataSchema

Bases: _AttributeMetadataSchema

Standard Pandera DataFrameModel schema for metadata in the Wind and Solar files.

Source code in framdata/database_names/WindSolarNames.py
 98
 99
100
101
class WindSolarMetadataSchema(_AttributeMetadataSchema):
    """Standard Pandera DataFrameModel schema for metadata in the Wind and Solar files."""

    pass
WindSolarNames

Bases: _BaseComponentsNames

Class representing the names and structure of Wind and Solar tables.

Source code in framdata/database_names/WindSolarNames.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
class WindSolarNames(_BaseComponentsNames):
    """Class representing the names and structure of Wind and Solar tables."""

    id_col = "ID"
    power_node_col = "PowerNode"
    profile_col = "Profile"
    type_col = "TechnologyType"
    capacity_col = "Capacity"

    columns: ClassVar[list[str]] = [
        id_col,
        power_node_col,
        profile_col,
        capacity_col,
    ]

    ref_columns: ClassVar[list[str]] = [
        power_node_col,
        profile_col,
        capacity_col,
    ]

    @staticmethod
    def get_attribute_data_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for attribute data in a Wind and Solar file.

        Returns:
            pa.DataFrameModel: Pandera DataFrameModel schema for Wind and Solar attribute data.

        """
        return WindSolarSchema

    @staticmethod
    def get_metadata_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for the metadata table in a Wind and Solar file.

        Returns:
            pa.DataFrameModel: Pandera DataFrameModel schema for the Thermal metadata.

        """
        return WindSolarMetadataSchema

    @staticmethod
    def _get_unique_check_descriptions() -> dict[str, tuple[str, bool]]:
        """
        Retrieve a dictionary with descriptons of validation checks that are specific to the Wind and Solar schemas.

        Returns:
            dict[str, tuple[str, bool]]: A dictionary where:
                - Keys (str): The name of the validation check method.
                - Values (tuple[str, bool]):
                    - The first element (str) provides a concise and user-friendly description of the check. E.g. what
                      caused the validation error or what is required for the check to pass.
                    - The second element (bool) indicates whether the check is a warning (True) or an error (False).


        """
        return None

    @staticmethod
    def _format_unique_checks(errors: pd.DataFrame) -> pd.DataFrame:
        """
        Format the error DataFrame according to the validation checks that are specific to the Wind and Solar schemas.

        Args:
            errors (pd.DataFrame): The error DataFrame containing validation errors.

        Returns:
            pd.DataFrame: The updated error DataFrame with formatted rows for unique validation checks.

        """
        return None
get_attribute_data_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for attribute data in a Wind and Solar file.

Returns:

Type Description
DataFrameModel

pa.DataFrameModel: Pandera DataFrameModel schema for Wind and Solar attribute data.

Source code in framdata/database_names/WindSolarNames.py
38
39
40
41
42
43
44
45
46
47
@staticmethod
def get_attribute_data_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for attribute data in a Wind and Solar file.

    Returns:
        pa.DataFrameModel: Pandera DataFrameModel schema for Wind and Solar attribute data.

    """
    return WindSolarSchema
get_metadata_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for the metadata table in a Wind and Solar file.

Returns:

Type Description
DataFrameModel

pa.DataFrameModel: Pandera DataFrameModel schema for the Thermal metadata.

Source code in framdata/database_names/WindSolarNames.py
49
50
51
52
53
54
55
56
57
58
@staticmethod
def get_metadata_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for the metadata table in a Wind and Solar file.

    Returns:
        pa.DataFrameModel: Pandera DataFrameModel schema for the Thermal metadata.

    """
    return WindSolarMetadataSchema
WindSolarSchema

Bases: DataFrameModel

Standard Pandera DataFrameModel schema for attribute data in the Wind and Solar files.

Source code in framdata/database_names/WindSolarNames.py
92
93
94
95
class WindSolarSchema(pa.DataFrameModel):
    """Standard Pandera DataFrameModel schema for attribute data in the Wind and Solar files."""

    pass

YamlNames

Define names and fields used in yaml files.

YamlNames

Contain names in yaml files.

Source code in framdata/database_names/YamlNames.py
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
class YamlNames:
    """Contain names in yaml files."""

    encoding = "utf-8"

    metadata_field = "Metadata"
    x_field = "X"
    y_field = "Y"

    # ========= Metadata fields =========
    attribute = "Attribute"
    description = "Description"
    dtype = "Dtype"
    unit = "Unit"

nodes_names

Define class for handling tables with Nodes.

EmissionNodesNames

Bases: NodesNames

Class representing the names and structure of emission nodes tables.

Source code in framdata/database_names/nodes_names.py
155
156
157
158
159
160
class EmissionNodesNames(NodesNames):
    """Class representing the names and structure of emission nodes tables."""

    filename = "Emission.Nodes"

    tax_col = "Tax"  # deprecated?
FuelNodesNames

Bases: NodesNames

Class representing the names and structure of fuel nodes tables.

Source code in framdata/database_names/nodes_names.py
146
147
148
149
150
151
152
class FuelNodesNames(NodesNames):
    """Class representing the names and structure of fuel nodes tables."""

    filename = "Fuel.Nodes"

    emission_coefficient_col = "EmissionCoefficient"
    tax_col = "Tax"  # deprecated?
NodesMetadataSchema

Bases: _AttributeMetadataSchema

Standard Pandera DataFrameModel schema for metadata in the Nodes files.

Source code in framdata/database_names/nodes_names.py
134
135
136
137
class NodesMetadataSchema(_AttributeMetadataSchema):
    """Standard Pandera DataFrameModel schema for metadata in the Nodes files."""

    pass
NodesNames

Bases: _BaseComponentsNames

Class representing the names and structure of nodes tables, and the convertion of the table to Node objects.

Source code in framdata/database_names/nodes_names.py
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
class NodesNames(_BaseComponentsNames):
    """Class representing the names and structure of nodes tables, and the convertion of the table to Node objects."""

    id_col = "NodeID"

    commodity_col = "Commodity"
    nice_name = "NiceName"
    price_col = "ExogenPrice"
    profile_col = "PriceProfile"
    exogenous_col = "IsExogenous"

    columns: ClassVar[list[str]] = [id_col, nice_name, commodity_col, price_col, profile_col, exogenous_col]

    ref_columns: ClassVar[list[str]] = [price_col, profile_col]

    @staticmethod
    def create_component(
        row: NDArray,
        indices: dict[str, int],
        meta_columns: set[str],
        meta_data: pd.DataFrame,
        attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
    ) -> tuple[dict[str, Node], list[str]]:
        """
        Create a node object from direct parameters.

        Args:
            row (NDArray): Array containing the values of one table row, represeting one Node object.
            indices (list[str, int]): Mapping of table's Column names to the array's indices.
            meta_columns (list[str]): Set of columns which defines memberships in meta groups for aggregation.
            meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
            attribute_objects (dict[str, tuple[object, dict[str, Meta]]], optional): NOT USED

        Returns:
            dict[str, Node]: Dictionary of node id and the Node object.

        """
        columns_to_parse = [
            NodesNames.price_col,
            NodesNames.profile_col,
        ]

        arg_user_code = NodesNames._parse_args(row, indices, columns_to_parse, meta_data)
        price = None
        if arg_user_code[NodesNames.price_col] is not None:
            price = Price(
                level=arg_user_code[NodesNames.price_col],
                profile=arg_user_code[NodesNames.profile_col],
            )

        node = Node(
            row[indices[NodesNames.commodity_col]],
            is_exogenous=row[indices[NodesNames.exogenous_col]],
            price=price,
        )
        NodesNames._add_meta(node, row, indices, meta_columns)
        return {row[indices[NodesNames.id_col]]: node}

    @staticmethod
    def get_attribute_data_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for attribute data in a Nodes file.

        Returns:
            pa.DataFrameModel: Pandera DataFrameModel schema for Nodes attribute data.

        """
        return NodesSchema

    @staticmethod
    def get_metadata_schema() -> pa.DataFrameModel:
        """
        Get the Pandera DataFrameModel schema for the metadata table in a Nodes file.

        Returns:
            pa.DataFrameModel: Pandera DataFrameModel schema for the Thermal metadata.

        """
        return NodesMetadataSchema

    @staticmethod
    def _get_unique_check_descriptions() -> dict[str, tuple[str, bool]]:
        """
        Retrieve a dictionary with descriptons of validation checks that are specific to the Nodes schemas.

        Returns:
            dict[str, tuple[str, bool]]: A dictionary where:
                - Keys (str): The name of the validation check method.
                - Values (tuple[str, bool]):
                    - The first element (str) provides a concise and user-friendly description of the check. E.g. what
                      caused the validation error or what is required for the check to pass.
                    - The second element (bool) indicates whether the check is a warning (True) or an error (False).


        """
        return None

    @staticmethod
    def _format_unique_checks(errors: pd.DataFrame) -> pd.DataFrame:
        """
        Format the error DataFrame according to the validation checks that are specific to the Nodes schemas.

        Args:
            errors (pd.DataFrame): The error DataFrame containing validation errors.

        Returns:
            pd.DataFrame: The updated error DataFrame with formatted rows for unique validation checks.

        """
        return None
create_component(row: NDArray, indices: dict[str, int], meta_columns: set[str], meta_data: pd.DataFrame, attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None) -> tuple[dict[str, Node], list[str]] staticmethod

Create a node object from direct parameters.

Parameters:

Name Type Description Default
row NDArray

Array containing the values of one table row, represeting one Node object.

required
indices list[str, int]

Mapping of table's Column names to the array's indices.

required
meta_columns list[str]

Set of columns which defines memberships in meta groups for aggregation.

required
meta_data DataFrame

Dictionary containing at least unit of every column.

required
attribute_objects dict[str, tuple[object, dict[str, Meta]]]

NOT USED

None

Returns:

Type Description
tuple[dict[str, Node], list[str]]

dict[str, Node]: Dictionary of node id and the Node object.

Source code in framdata/database_names/nodes_names.py
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
@staticmethod
def create_component(
    row: NDArray,
    indices: dict[str, int],
    meta_columns: set[str],
    meta_data: pd.DataFrame,
    attribute_objects: dict[str, tuple[object, dict[str, Meta]]] | None = None,
) -> tuple[dict[str, Node], list[str]]:
    """
    Create a node object from direct parameters.

    Args:
        row (NDArray): Array containing the values of one table row, represeting one Node object.
        indices (list[str, int]): Mapping of table's Column names to the array's indices.
        meta_columns (list[str]): Set of columns which defines memberships in meta groups for aggregation.
        meta_data (pd.DataFrame): Dictionary containing at least unit of every column.
        attribute_objects (dict[str, tuple[object, dict[str, Meta]]], optional): NOT USED

    Returns:
        dict[str, Node]: Dictionary of node id and the Node object.

    """
    columns_to_parse = [
        NodesNames.price_col,
        NodesNames.profile_col,
    ]

    arg_user_code = NodesNames._parse_args(row, indices, columns_to_parse, meta_data)
    price = None
    if arg_user_code[NodesNames.price_col] is not None:
        price = Price(
            level=arg_user_code[NodesNames.price_col],
            profile=arg_user_code[NodesNames.profile_col],
        )

    node = Node(
        row[indices[NodesNames.commodity_col]],
        is_exogenous=row[indices[NodesNames.exogenous_col]],
        price=price,
    )
    NodesNames._add_meta(node, row, indices, meta_columns)
    return {row[indices[NodesNames.id_col]]: node}
get_attribute_data_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for attribute data in a Nodes file.

Returns:

Type Description
DataFrameModel

pa.DataFrameModel: Pandera DataFrameModel schema for Nodes attribute data.

Source code in framdata/database_names/nodes_names.py
74
75
76
77
78
79
80
81
82
83
@staticmethod
def get_attribute_data_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for attribute data in a Nodes file.

    Returns:
        pa.DataFrameModel: Pandera DataFrameModel schema for Nodes attribute data.

    """
    return NodesSchema
get_metadata_schema() -> pa.DataFrameModel staticmethod

Get the Pandera DataFrameModel schema for the metadata table in a Nodes file.

Returns:

Type Description
DataFrameModel

pa.DataFrameModel: Pandera DataFrameModel schema for the Thermal metadata.

Source code in framdata/database_names/nodes_names.py
85
86
87
88
89
90
91
92
93
94
@staticmethod
def get_metadata_schema() -> pa.DataFrameModel:
    """
    Get the Pandera DataFrameModel schema for the metadata table in a Nodes file.

    Returns:
        pa.DataFrameModel: Pandera DataFrameModel schema for the Thermal metadata.

    """
    return NodesMetadataSchema
NodesSchema

Bases: DataFrameModel

Standard Pandera DataFrameModel schema for attribute data in the Nodes files.

Source code in framdata/database_names/nodes_names.py
128
129
130
131
class NodesSchema(pa.DataFrameModel):
    """Standard Pandera DataFrameModel schema for attribute data in the Nodes files."""

    pass
PowerNodesNames

Bases: NodesNames

Class representing the names and structure of power nodes tables.

Source code in framdata/database_names/nodes_names.py
140
141
142
143
class PowerNodesNames(NodesNames):
    """Class representing the names and structure of power nodes tables."""

    filename = "Power.Nodes"

validation_functions

Module containing registered custom check functions used by Pandera schema classes.

check_unit_is_str_for_attributes(df: pd.DataFrame, attribute_names: list[str]) -> Series[bool]

Check if 'Unit' column values are strings for the rows where the 'Attribute' column matches specific attributes.

This function checks whether the values in the 'Unit' column are strings for rows where the 'Attribute' column matches any of the specified attribute names. Rows that do not match the specified attributes are considered valid by default. This function is commonly used by subclasses of 'AttributeMetadataSchema' to validate that a unit is given for certain attributes in the metadata belonging to a Component.

Parameters:

Name Type Description Default
df DataFrame

The DataFrame containing the columns to validate.

required
attribute_names list[str]

A list with the names of the attributes to check in the 'Attribute' column.

required

Returns:

Type Description
Series[bool]

Series[bool]: A boolean Series indicating whether each row passes the validation. Rows where the 'Attribute'

Series[bool]

column does not match the specified attribute are automatically marked as valid.

Example

Given the following DataFrame:

attribute unit
Volume MWh
Temperature None
Capacity None

And attribute_names = ["Volume", "Capacity"], the method will validate that the 'Unit' column contains strings for rows where 'attribute' is "Volume" and "Capacity". The resulting Series will be:

validation_result
True
True
False
Source code in framdata/database_names/validation_functions.py
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
@extensions.register_check_method()
def check_unit_is_str_for_attributes(df: pd.DataFrame, attribute_names: list[str]) -> Series[bool]:
    """
    Check if 'Unit' column values are strings for the rows where the 'Attribute' column matches specific attributes.

    This function checks whether the values in the 'Unit' column are strings for rows where the 'Attribute' column
    matches any of the specified attribute names. Rows that do not match the specified attributes are considered valid
    by default. This function is commonly used by subclasses of 'AttributeMetadataSchema' to validate that a unit is
    given for certain attributes in the metadata belonging to a Component.

    Args:
        df (pd.DataFrame): The DataFrame containing the columns to validate.
        attribute_names (list[str]): A list with the names of the attributes to check in the 'Attribute' column.

    Returns:
        Series[bool]: A boolean Series indicating whether each row passes the validation. Rows where the 'Attribute'
        column does not match the specified attribute are automatically marked as valid.

    Example:
        Given the following DataFrame:

        | attribute   | unit       |
        |-------------|------------|
        | Volume      | MWh        |
        | Temperature | None       |
        | Capacity    | None       |

        And `attribute_names = ["Volume", "Capacity"]`, the method will validate that the 'Unit' column contains strings
        for rows where 'attribute' is "Volume" and "Capacity". The resulting Series will be:

        | validation_result |
        |-------------------|
        | True              |
        | True              |
        | False             |

    """
    is_attribute_rows = df[_AttributeMetadataNames.attribute].isin(attribute_names)
    unit_is_str = df[_AttributeMetadataNames.unit].apply(lambda x: isinstance(x, str))
    return ~is_attribute_rows | unit_is_str
dtype_str_int_float(series: Series[Any]) -> Series[bool]

Check if the series contains only str, int or float values.

Parameters:

Name Type Description Default
series Series[Any]

Series to check.

required

Returns:

Type Description
Series[bool]

Series[bool]: Series of boolean values detonating if each element has passed the check.

Source code in framdata/database_names/validation_functions.py
40
41
42
43
44
45
46
47
48
49
50
51
52
@extensions.register_check_method()
def dtype_str_int_float(series: Series[Any]) -> Series[bool]:
    """
    Check if the series contains only str, int or float values.

    Args:
        series (Series[Any]): Series to check.

    Returns:
        Series[bool]: Series of boolean values detonating if each element has passed the check.

    """
    return series.apply(lambda value: isinstance(value, str | int | float))
dtype_str_int_float_none(series: Series[Any]) -> Series[bool]

Check if the series contains only str, int, float or None values.

Parameters:

Name Type Description Default
series Series[Any]

Series to check.

required

Returns:

Type Description
Series[bool]

Series[bool]: Series of boolean values detonating if each element has passed the check.

Source code in framdata/database_names/validation_functions.py
55
56
57
58
59
60
61
62
63
64
65
66
67
@extensions.register_check_method()
def dtype_str_int_float_none(series: Series[Any]) -> Series[bool]:
    """
    Check if the series contains only str, int, float or None values.

    Args:
        series (Series[Any]): Series to check.

    Returns:
        Series[bool]: Series of boolean values detonating if each element has passed the check.

    """
    return series.apply(lambda value: isinstance(value, str | int | float | type(None)))
numeric_values_are_between_or_equal_to(series: Series[Any], min_value: int | float, max_value: int | float) -> Series[bool]

Check if values are between or equal to a min and max value if they are of type int or float.

Parameters:

Name Type Description Default
series Series[Any]

Series to check.

required
min_value int | float

Value that the elements in the series should be greater than or equal.

required
max_value int | float

Value that the elements in the series should be less than or equal.

required

Returns:

Type Description
Series[bool]

Series[bool]: Series of boolean values detonating if each element has passed the check.

Source code in framdata/database_names/validation_functions.py
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
@extensions.register_check_method()
def numeric_values_are_between_or_equal_to(
    series: Series[Any],
    min_value: int | float,
    max_value: int | float,
) -> Series[bool]:
    """
    Check if values are between or equal to a min and max value if they are of type int or float.

    Args:
        series (Series[Any]): Series to check.
        min_value (int | float): Value that the elements in the series should be greater than or equal.
        max_value (int | float): Value that the elements in the series should be less than or equal.

    Returns:
        Series[bool]: Series of boolean values detonating if each element has passed the check.

    """
    if not isinstance(min_value, (int | float)) and not isinstance(max_value, (int | float)):
        message = "min and max value must be of type int or float."
        raise ValueError(message)
    return series.apply(lambda x: min_value <= x <= max_value if isinstance(x, (int | float)) else True)
numeric_values_greater_than_or_equal_to(series: Series[Any], min_value: int | float) -> Series[bool]

Check if values are greater than or equal to min_value if they are of type int or float.

Parameters:

Name Type Description Default
series Series[Any]

Series to check.

required
min_value int | float

Value that the elements in the series should be greater than or equal.

required

Returns:

Type Description
Series[bool]

Series[bool]: Series of boolean values detonating if each element has passed the check.

Source code in framdata/database_names/validation_functions.py
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
@extensions.register_check_method()
def numeric_values_greater_than_or_equal_to(series: Series[Any], min_value: int | float) -> Series[bool]:
    """
    Check if values are greater than or equal to min_value if they are of type int or float.

    Args:
        series (Series[Any]): Series to check.
        min_value (int | float): Value that the elements in the series should be greater than or equal.

    Returns:
        Series[bool]: Series of boolean values detonating if each element has passed the check.

    """
    if not isinstance(min_value, (int | float)):
        message = "min_value must be of type int or float."
        raise ValueError(message)
    return series.apply(lambda x: x >= min_value if isinstance(x, (int | float)) else True)
numeric_values_less_than_or_equal_to(series: Series[Any], max_value: int | float) -> Series[bool]

Check if values are less than or equal to max_value if they are of type int or float.

Parameters:

Name Type Description Default
series Series[Any]

Series to check.

required
max_value int | float

Value that the elements in the series should be greater than or equal.

required

Returns:

Type Description
Series[bool]

Series[bool]: Series of boolean values detonating if each element has passed the check.

Source code in framdata/database_names/validation_functions.py
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
@extensions.register_check_method()
def numeric_values_less_than_or_equal_to(series: Series[Any], max_value: int | float) -> Series[bool]:
    """
    Check if values are less than or equal to max_value if they are of type int or float.

    Args:
        series (Series[Any]): Series to check.
        max_value (int | float): Value that the elements in the series should be greater than or equal.

    Returns:
        Series[bool]: Series of boolean values detonating if each element has passed the check.

    """
    if not isinstance(max_value, (int | float)):
        message = "max_value must be of type int or float."
        raise ValueError(message)
    return series.apply(lambda x: x <= max_value if isinstance(x, (int | float)) else True)

file_editors

NVEFileEditor

Contain class with common functionality for editing files.

NVEFileEditor

Bases: Base

Parent class with common functionality for classes concerned with editing FRAM files.

Source code in framdata/file_editors/NVEFileEditor.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
class NVEFileEditor(Base):
    """Parent class with common functionality for classes concerned with editing FRAM files."""

    def __init__(self, source: Path | str | None = None) -> None:
        """
        Set path to parquet file if supplied, load/initialize table and metadata as pd.DataFrame and dictionary respectively.

        Args:
            source (Path | str | None, optional): Path to parquet file with timevectors. Defaults to None.

        """
        super().__init__()

        self._check_type(source, (Path, str, type(None)))
        self._source = None if source is None else Path(source)

    def get_source(self) -> Path:
        """Get the source file path of the editor."""
        return self._source

    def set_source(self, source: Path) -> None:
        """Set the source file path of the editor."""
        self._check_type(source, (Path, str))
        self._source = Path(source)
__init__(source: Path | str | None = None) -> None

Set path to parquet file if supplied, load/initialize table and metadata as pd.DataFrame and dictionary respectively.

Parameters:

Name Type Description Default
source Path | str | None

Path to parquet file with timevectors. Defaults to None.

None
Source code in framdata/file_editors/NVEFileEditor.py
11
12
13
14
15
16
17
18
19
20
21
22
def __init__(self, source: Path | str | None = None) -> None:
    """
    Set path to parquet file if supplied, load/initialize table and metadata as pd.DataFrame and dictionary respectively.

    Args:
        source (Path | str | None, optional): Path to parquet file with timevectors. Defaults to None.

    """
    super().__init__()

    self._check_type(source, (Path, str, type(None)))
    self._source = None if source is None else Path(source)
get_source() -> Path

Get the source file path of the editor.

Source code in framdata/file_editors/NVEFileEditor.py
24
25
26
def get_source(self) -> Path:
    """Get the source file path of the editor."""
    return self._source
set_source(source: Path) -> None

Set the source file path of the editor.

Source code in framdata/file_editors/NVEFileEditor.py
28
29
30
31
def set_source(self, source: Path) -> None:
    """Set the source file path of the editor."""
    self._check_type(source, (Path, str))
    self._source = Path(source)

NVEH5TimeVectorEditor

Contains class for editing time vectors in H5 files.

NVEH5TimeVectorEditor

Bases: NVEFileEditor

Class with functionality concerned with editing time vectors and their metadata in H5 files.

Source code in framdata/file_editors/NVEH5TimeVectorEditor.py
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
class NVEH5TimeVectorEditor(NVEFileEditor):
    """Class with functionality concerned with editing time vectors and their metadata in H5 files."""

    def __init__(self, source: Path | str | None = None) -> None:
        """
        Set path to parquet file if supplied, load/initialize table and metadata as pd.DataFrame and dictionary respectively.

        Args:
            source (Path | str | None, optional): Path to parquet file with timevectors. Defaults to None.

        """
        super().__init__(source)

        meta_tuple = ({}, None) if self._source is None or not self._source.exists() else self._read_data(H5Names.METADATA_GROUP, True)
        self._metadata, self._common_metadata = meta_tuple
        index_tuple = (defaultdict(NDArray), None) if self._source is None or not self._source.exists() else self._read_data(H5Names.INDEX_GROUP, False)
        self._index, self._common_index = index_tuple
        self._index = {k: v.astype(str) for k, v in self._index.items()}

        vectors_tuple = (defaultdict(NDArray), None) if self._source is None or not self._source.exists() else self._read_data(H5Names.VECTORS_GROUP, False)
        self._vectors, __ = vectors_tuple

    def get_metadata(self, vector_id: str) -> None | dict:
        """Get a copy of the metadata of the parquet file."""
        try:
            return self._metadata[vector_id]
        except KeyError as e:
            f"Found no ID '{vector_id}' in metadata."
            raise KeyError from e

    def set_metadata(self, vector_id: str, value: dict[str, METADATA_TYPES]) -> None:
        """Set a field (new or overwrite) in the metadata."""
        self._check_type(vector_id, str)
        self._check_type(value, dict)
        self._metadata[vector_id] = value

    def get_common_metadata(self) -> None | dict:
        """Get a copy of the metadata of the parquet file."""
        return self._common_metadata if self._common_metadata is None else self._common_metadata.copy()

    def set_common_metadata(self, value: dict[str, METADATA_TYPES]) -> None:
        """Set a field (new or overwrite) in the metadata."""
        self._check_type(value, dict)
        self._common_metadata = value

    def set_index(self, vector_id: str, index: NDArray) -> None:
        """Set a whole index in the time index table."""
        self._check_type(vector_id, str)
        self._check_type(index, np.ndarray)
        self._index[vector_id] = index

    def get_index(self, vector_id: str) -> NDArray:
        """Return a copy of a given index as a pandas series from the table."""
        try:
            return self._index[vector_id]
        except KeyError as e:
            f"Found no ID '{vector_id}' among indexes."
            raise KeyError from e

    def set_common_index(self, values: NDArray) -> None:
        """Set a whole index in the time index table."""
        self._check_type(values, np.ndarray)
        self._common_index = values

    def get_common_index(self) -> NDArray | None:
        """Return a copy of a given index as a pandas series from the table."""
        return self._common_index

    def set_vector(self, vector_id: str, values: NDArray) -> None:
        """Set a whole vector in the time vector table."""
        self._check_type(vector_id, str)
        self._check_type(values, np.ndarray)
        self._vectors[vector_id] = values

    def get_vector(self, vector_id: str) -> NDArray:
        """Return a copy of a given vector as a pandas series from the table."""
        try:
            return self._vectors[vector_id]
        except KeyError as e:
            msg = f"Found no ID '{vector_id}' among vectors."
            raise KeyError(msg) from e

    def get_vector_ids(self) -> list[str]:
        """Get the IDs of all vectors."""
        return list(self._vectors.keys())

    def save_to_h5(self, path: Path | str) -> None:
        self._check_type(path, (Path, str))
        path = Path(path)

        missing_index = {v for v in self._vectors if v not in self._index}
        if self._common_index is None and len(missing_index) != 0:
            msg = f"Found vectors missing indexes and common index is not set: {missing_index}."
            raise KeyError(msg)

        missing_meta = {v for v in self._vectors if v not in self._metadata}
        if self._common_metadata is None and len(missing_meta) != 0:
            msg = f"Found vectors missing metadata and common metadata is not set: {missing_meta}."
            raise KeyError(msg)

        with h5py.File(path, mode="w") as f:
            if self._common_metadata is not None:
                common_meta_group = f.create_group(H5Names.COMMON_PREFIX + H5Names.METADATA_GROUP)
                self._write_meta_to_group(common_meta_group, self._common_metadata)
            if self._common_index is not None:
                f.create_dataset(H5Names.COMMON_PREFIX + H5Names.INDEX_GROUP, data=self._common_index.astype(bytes))

            if self._metadata:
                meta_group = f.create_group(H5Names.METADATA_GROUP)
                for vector_id, meta in self._metadata.items():
                    vm_group = meta_group.create_group(vector_id)
                    self._write_meta_to_group(vm_group, meta)

            if self._index:
                index_group = f.create_group(H5Names.INDEX_GROUP)
                for vector_id, index in self._index.items():
                    index_group.create_dataset(vector_id, data=index.astype(bytes))

            if self._vectors:
                vector_group = f.create_group(H5Names.VECTORS_GROUP)
                for vector_id, vector in self._vectors.items():
                    vector_group.create_dataset(vector_id, data=vector)

    def _write_meta_to_group(self, meta_group: h5py.Group, metadata: dict) -> None:
        for k, v in metadata.items():
            meta_group.create_dataset(k, data=str(v).encode(TvMn.ENCODING))

    def _read_data(
        self, group_name: str, cast_meta: bool
    ) -> tuple[dict[str, dict[str, METADATA_TYPES]] | dict[str, dict[str, NDArray]], dict[str, METADATA_TYPES] | dict[str, NDArray]]:
        common_field = H5Names.COMMON_PREFIX + group_name
        data = {}
        common_data = None
        with h5py.File(self._source, mode="r") as f:
            if group_name in f and isinstance(f[group_name], h5py.Group):
                group = f[group_name]
                data.update(
                    {
                        vector_id: TvMn.cast_meta(self._read_datasets(vector_data)) if cast_meta else self._read_datasets(vector_data)
                        for vector_id, vector_data in group.items()
                    },
                )

            if common_field in f and isinstance(f[common_field], h5py.Group):
                datasets = self._read_datasets(f[common_field])
                common_data, __ = TvMn.cast_meta(datasets) if cast_meta else (datasets, None)
            elif common_field in f and isinstance(f[common_field], h5py.Dataset):
                common_data = f[common_field][()]

        return data, common_data

    def _read_datasets(self, field: h5py.Group | h5py.Dataset) -> dict | NDArray | bytes:
        if isinstance(field, h5py.Dataset):
            return field[()]
        datasets = {}
        for key, val in field.items():
            if isinstance(val, h5py.Dataset):
                datasets[key] = val[()]
            else:
                msg = f"Expected only {h5py.Dataset} in field, but found {type(val)}"
                raise TypeError(msg)

        return datasets
__init__(source: Path | str | None = None) -> None

Set path to parquet file if supplied, load/initialize table and metadata as pd.DataFrame and dictionary respectively.

Parameters:

Name Type Description Default
source Path | str | None

Path to parquet file with timevectors. Defaults to None.

None
Source code in framdata/file_editors/NVEH5TimeVectorEditor.py
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
def __init__(self, source: Path | str | None = None) -> None:
    """
    Set path to parquet file if supplied, load/initialize table and metadata as pd.DataFrame and dictionary respectively.

    Args:
        source (Path | str | None, optional): Path to parquet file with timevectors. Defaults to None.

    """
    super().__init__(source)

    meta_tuple = ({}, None) if self._source is None or not self._source.exists() else self._read_data(H5Names.METADATA_GROUP, True)
    self._metadata, self._common_metadata = meta_tuple
    index_tuple = (defaultdict(NDArray), None) if self._source is None or not self._source.exists() else self._read_data(H5Names.INDEX_GROUP, False)
    self._index, self._common_index = index_tuple
    self._index = {k: v.astype(str) for k, v in self._index.items()}

    vectors_tuple = (defaultdict(NDArray), None) if self._source is None or not self._source.exists() else self._read_data(H5Names.VECTORS_GROUP, False)
    self._vectors, __ = vectors_tuple
get_common_index() -> NDArray | None

Return a copy of a given index as a pandas series from the table.

Source code in framdata/file_editors/NVEH5TimeVectorEditor.py
82
83
84
def get_common_index(self) -> NDArray | None:
    """Return a copy of a given index as a pandas series from the table."""
    return self._common_index
get_common_metadata() -> None | dict

Get a copy of the metadata of the parquet file.

Source code in framdata/file_editors/NVEH5TimeVectorEditor.py
54
55
56
def get_common_metadata(self) -> None | dict:
    """Get a copy of the metadata of the parquet file."""
    return self._common_metadata if self._common_metadata is None else self._common_metadata.copy()
get_index(vector_id: str) -> NDArray

Return a copy of a given index as a pandas series from the table.

Source code in framdata/file_editors/NVEH5TimeVectorEditor.py
69
70
71
72
73
74
75
def get_index(self, vector_id: str) -> NDArray:
    """Return a copy of a given index as a pandas series from the table."""
    try:
        return self._index[vector_id]
    except KeyError as e:
        f"Found no ID '{vector_id}' among indexes."
        raise KeyError from e
get_metadata(vector_id: str) -> None | dict

Get a copy of the metadata of the parquet file.

Source code in framdata/file_editors/NVEH5TimeVectorEditor.py
40
41
42
43
44
45
46
def get_metadata(self, vector_id: str) -> None | dict:
    """Get a copy of the metadata of the parquet file."""
    try:
        return self._metadata[vector_id]
    except KeyError as e:
        f"Found no ID '{vector_id}' in metadata."
        raise KeyError from e
get_vector(vector_id: str) -> NDArray

Return a copy of a given vector as a pandas series from the table.

Source code in framdata/file_editors/NVEH5TimeVectorEditor.py
92
93
94
95
96
97
98
def get_vector(self, vector_id: str) -> NDArray:
    """Return a copy of a given vector as a pandas series from the table."""
    try:
        return self._vectors[vector_id]
    except KeyError as e:
        msg = f"Found no ID '{vector_id}' among vectors."
        raise KeyError(msg) from e
get_vector_ids() -> list[str]

Get the IDs of all vectors.

Source code in framdata/file_editors/NVEH5TimeVectorEditor.py
100
101
102
def get_vector_ids(self) -> list[str]:
    """Get the IDs of all vectors."""
    return list(self._vectors.keys())
set_common_index(values: NDArray) -> None

Set a whole index in the time index table.

Source code in framdata/file_editors/NVEH5TimeVectorEditor.py
77
78
79
80
def set_common_index(self, values: NDArray) -> None:
    """Set a whole index in the time index table."""
    self._check_type(values, np.ndarray)
    self._common_index = values
set_common_metadata(value: dict[str, METADATA_TYPES]) -> None

Set a field (new or overwrite) in the metadata.

Source code in framdata/file_editors/NVEH5TimeVectorEditor.py
58
59
60
61
def set_common_metadata(self, value: dict[str, METADATA_TYPES]) -> None:
    """Set a field (new or overwrite) in the metadata."""
    self._check_type(value, dict)
    self._common_metadata = value
set_index(vector_id: str, index: NDArray) -> None

Set a whole index in the time index table.

Source code in framdata/file_editors/NVEH5TimeVectorEditor.py
63
64
65
66
67
def set_index(self, vector_id: str, index: NDArray) -> None:
    """Set a whole index in the time index table."""
    self._check_type(vector_id, str)
    self._check_type(index, np.ndarray)
    self._index[vector_id] = index
set_metadata(vector_id: str, value: dict[str, METADATA_TYPES]) -> None

Set a field (new or overwrite) in the metadata.

Source code in framdata/file_editors/NVEH5TimeVectorEditor.py
48
49
50
51
52
def set_metadata(self, vector_id: str, value: dict[str, METADATA_TYPES]) -> None:
    """Set a field (new or overwrite) in the metadata."""
    self._check_type(vector_id, str)
    self._check_type(value, dict)
    self._metadata[vector_id] = value
set_vector(vector_id: str, values: NDArray) -> None

Set a whole vector in the time vector table.

Source code in framdata/file_editors/NVEH5TimeVectorEditor.py
86
87
88
89
90
def set_vector(self, vector_id: str, values: NDArray) -> None:
    """Set a whole vector in the time vector table."""
    self._check_type(vector_id, str)
    self._check_type(values, np.ndarray)
    self._vectors[vector_id] = values

NVEParquetTimeVectorEditor

Contains class for editing time vectors in parquet files.

NVEParquetTimeVectorEditor

Bases: NVEFileEditor

Class for managing time vectors and their metadata stored in parquet files.

Source code in framdata/file_editors/NVEParquetTimeVectorEditor.py
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
class NVEParquetTimeVectorEditor(NVEFileEditor):
    """Class for managing time vectors and their metadata stored in parquet files."""

    def __init__(self, source: Path | str | None = None) -> None:
        """
        Set path to parquet file if supplied, load/initialize table and metadata as pd.DataFrame and dictionary respectively.

        Args:
            source (Path | str | None, optional): Path to parquet file with timevectors. Defaults to None.

        """
        super().__init__(source)
        self._metadata, __ = ({}, None) if self._source is None or not self._source.exists() else self._read_metadata()
        self._data = pd.DataFrame() if self._source is None or not self._source.exists() else pd.read_parquet(self._source)

    def save_to_parquet(self, path: Path | str) -> None:
        """
        Save the edited dataframe and metadata to parquet file.

        Args:
            path (Path): Path to save tha file to. Must be defined to force user to explicitly overwrite the original file if they want.

        """
        self._check_type(path, (Path, str))
        path = Path(path)
        table = pa.Table.from_pandas(self._data)

        # ensure binary strings with defined encoding, since parquet encodes metadata anyway
        schema_with_meta = table.schema.with_metadata({str(k).encode(TvMn.ENCODING): str(v).encode(TvMn.ENCODING) for k, v in self._metadata.items()})
        table = pa.Table.from_pandas(self._data, schema=schema_with_meta)

        pq.write_table(table, path)

    def get_metadata(self):
        """Get a copy of the metadata of the parquet file."""
        return self._metadata if self._metadata is None else self._metadata.copy()

    def set_metadata(self, key: str, value: bool | int | str | datetime | timedelta | tzinfo | None) -> None:
        """Set a field (new or overwrite) in the metadata."""
        self._check_type(key, str)
        self._check_type(value, (bool, int, str, datetime, timedelta, tzinfo, type(None)))
        self._metadata[key] = value

    def set_vector(self, vector_id: str, values: pd.Series) -> None:
        """Set a whole vector in the time vector table."""
        self._check_type(vector_id, str)
        self._check_type(values, pd.Series)
        if not self._data.empty and len(values) != len(self._data):
            message = f"Series values has different size than the other vectors in the table.\nLength values: {len(values)}\nLength vectors: {len(self._data)}"
            raise IndexError(message)
        self._data[vector_id] = values

    def get_vector(self, vector_id: str) -> pd.Series:
        """Return a copy of a given vector as a pandas series from the table."""
        try:
            return self._data[vector_id].copy()
        except KeyError as e:
            f"Found no vector named '{vector_id}' in table at {self._source}."
            raise KeyError from e

    def get_dataframe(self) -> pd.DataFrame:
        """Return a copy of all of the vector table as a pandas dataframe."""
        return self._data.copy()

    def set_dataframe(self, dataframe: pd.DataFrame) -> None:
        """Set the dataframe of the editor."""
        self._check_type(dataframe, pd.DataFrame)
        self._data = dataframe

    def get_vector_ids(self) -> list[str]:
        """Get the IDs of all vectors."""
        return [c for c in self._data.columns if c != TvMn.DATETIME_COL]

    def set_index_column(self, index: pd.Series) -> None:
        """Set the index column."""
        self._check_type(index, pd.Series)
        if not self._data.empty and len(index) != len(self._data):
            message = f"Series index has different size than the other vectors in the table.\nLength index: {len(index)}\nLength vectors: {len(self._data)}"
            raise IndexError(message)
        self._data[TvMn.DATETIME_COL] = index

    def get_index_column(self) -> pd.Series:
        """Get the datetime column of the dataframe."""
        if TvMn.DATETIME_COL not in self._data:
            message = f"Table at {self._source} does not have an index column. Index column must exist and be named '{TvMn.DATETIME_COL}'."
            raise KeyError(message)
        return self._data[TvMn.DATETIME_COL].copy()

    def _read_metadata(self) -> tuple[dict[str, bool | int | str | datetime | timedelta | tzinfo | None], set[str]]:
        if self._source is None:
            message = "Must set a source before reading file."
            raise ValueError(message)
        metadata = pq.ParquetFile(self._source).schema_arrow.metadata
        return TvMn.cast_meta(metadata)
__init__(source: Path | str | None = None) -> None

Set path to parquet file if supplied, load/initialize table and metadata as pd.DataFrame and dictionary respectively.

Parameters:

Name Type Description Default
source Path | str | None

Path to parquet file with timevectors. Defaults to None.

None
Source code in framdata/file_editors/NVEParquetTimeVectorEditor.py
17
18
19
20
21
22
23
24
25
26
27
def __init__(self, source: Path | str | None = None) -> None:
    """
    Set path to parquet file if supplied, load/initialize table and metadata as pd.DataFrame and dictionary respectively.

    Args:
        source (Path | str | None, optional): Path to parquet file with timevectors. Defaults to None.

    """
    super().__init__(source)
    self._metadata, __ = ({}, None) if self._source is None or not self._source.exists() else self._read_metadata()
    self._data = pd.DataFrame() if self._source is None or not self._source.exists() else pd.read_parquet(self._source)
get_dataframe() -> pd.DataFrame

Return a copy of all of the vector table as a pandas dataframe.

Source code in framdata/file_editors/NVEParquetTimeVectorEditor.py
74
75
76
def get_dataframe(self) -> pd.DataFrame:
    """Return a copy of all of the vector table as a pandas dataframe."""
    return self._data.copy()
get_index_column() -> pd.Series

Get the datetime column of the dataframe.

Source code in framdata/file_editors/NVEParquetTimeVectorEditor.py
 95
 96
 97
 98
 99
100
def get_index_column(self) -> pd.Series:
    """Get the datetime column of the dataframe."""
    if TvMn.DATETIME_COL not in self._data:
        message = f"Table at {self._source} does not have an index column. Index column must exist and be named '{TvMn.DATETIME_COL}'."
        raise KeyError(message)
    return self._data[TvMn.DATETIME_COL].copy()
get_metadata()

Get a copy of the metadata of the parquet file.

Source code in framdata/file_editors/NVEParquetTimeVectorEditor.py
47
48
49
def get_metadata(self):
    """Get a copy of the metadata of the parquet file."""
    return self._metadata if self._metadata is None else self._metadata.copy()
get_vector(vector_id: str) -> pd.Series

Return a copy of a given vector as a pandas series from the table.

Source code in framdata/file_editors/NVEParquetTimeVectorEditor.py
66
67
68
69
70
71
72
def get_vector(self, vector_id: str) -> pd.Series:
    """Return a copy of a given vector as a pandas series from the table."""
    try:
        return self._data[vector_id].copy()
    except KeyError as e:
        f"Found no vector named '{vector_id}' in table at {self._source}."
        raise KeyError from e
get_vector_ids() -> list[str]

Get the IDs of all vectors.

Source code in framdata/file_editors/NVEParquetTimeVectorEditor.py
83
84
85
def get_vector_ids(self) -> list[str]:
    """Get the IDs of all vectors."""
    return [c for c in self._data.columns if c != TvMn.DATETIME_COL]
save_to_parquet(path: Path | str) -> None

Save the edited dataframe and metadata to parquet file.

Parameters:

Name Type Description Default
path Path

Path to save tha file to. Must be defined to force user to explicitly overwrite the original file if they want.

required
Source code in framdata/file_editors/NVEParquetTimeVectorEditor.py
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
def save_to_parquet(self, path: Path | str) -> None:
    """
    Save the edited dataframe and metadata to parquet file.

    Args:
        path (Path): Path to save tha file to. Must be defined to force user to explicitly overwrite the original file if they want.

    """
    self._check_type(path, (Path, str))
    path = Path(path)
    table = pa.Table.from_pandas(self._data)

    # ensure binary strings with defined encoding, since parquet encodes metadata anyway
    schema_with_meta = table.schema.with_metadata({str(k).encode(TvMn.ENCODING): str(v).encode(TvMn.ENCODING) for k, v in self._metadata.items()})
    table = pa.Table.from_pandas(self._data, schema=schema_with_meta)

    pq.write_table(table, path)
set_dataframe(dataframe: pd.DataFrame) -> None

Set the dataframe of the editor.

Source code in framdata/file_editors/NVEParquetTimeVectorEditor.py
78
79
80
81
def set_dataframe(self, dataframe: pd.DataFrame) -> None:
    """Set the dataframe of the editor."""
    self._check_type(dataframe, pd.DataFrame)
    self._data = dataframe
set_index_column(index: pd.Series) -> None

Set the index column.

Source code in framdata/file_editors/NVEParquetTimeVectorEditor.py
87
88
89
90
91
92
93
def set_index_column(self, index: pd.Series) -> None:
    """Set the index column."""
    self._check_type(index, pd.Series)
    if not self._data.empty and len(index) != len(self._data):
        message = f"Series index has different size than the other vectors in the table.\nLength index: {len(index)}\nLength vectors: {len(self._data)}"
        raise IndexError(message)
    self._data[TvMn.DATETIME_COL] = index
set_metadata(key: str, value: bool | int | str | datetime | timedelta | tzinfo | None) -> None

Set a field (new or overwrite) in the metadata.

Source code in framdata/file_editors/NVEParquetTimeVectorEditor.py
51
52
53
54
55
def set_metadata(self, key: str, value: bool | int | str | datetime | timedelta | tzinfo | None) -> None:
    """Set a field (new or overwrite) in the metadata."""
    self._check_type(key, str)
    self._check_type(value, (bool, int, str, datetime, timedelta, tzinfo, type(None)))
    self._metadata[key] = value
set_vector(vector_id: str, values: pd.Series) -> None

Set a whole vector in the time vector table.

Source code in framdata/file_editors/NVEParquetTimeVectorEditor.py
57
58
59
60
61
62
63
64
def set_vector(self, vector_id: str, values: pd.Series) -> None:
    """Set a whole vector in the time vector table."""
    self._check_type(vector_id, str)
    self._check_type(values, pd.Series)
    if not self._data.empty and len(values) != len(self._data):
        message = f"Series values has different size than the other vectors in the table.\nLength values: {len(values)}\nLength vectors: {len(self._data)}"
        raise IndexError(message)
    self._data[vector_id] = values

loaders

NVEExcelTimeVectorLoader

Bases: NVETimeVectorLoader

Class for loading time vector data from NVE excel file sources.

Meant for short time vectors (e.g. yearly volumes or installed capacities) which are desireable to view and edit easily through Excel. Supports the followinf formats: - 'Horizontal': One column containing IDs, the other column names represents the index. Vector values as rows - 'Vertical': One column as index (DateTime), the oher columns names are vector IDs. Vectors as column values.

Source code in framdata/loaders/time_vector_loaders.py
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
class NVEExcelTimeVectorLoader(NVETimeVectorLoader):
    """
    Class for loading time vector data from NVE excel file sources.

    Meant for short time vectors (e.g. yearly volumes or installed capacities) which are desireable to view and edit easily through Excel.
    Supports the followinf formats:
        - 'Horizontal': One column containing IDs, the other column names represents the index. Vector values as rows
        - 'Vertical': One column as index (DateTime), the oher columns names are vector IDs. Vectors as column values.

    """

    _SUPPORTED_SUFFIXES: ClassVar[list] = [".xlsx"]
    _DATA_SHEET = "Data"
    _METADATA_SHEET = "Metadata"

    def __init__(self, source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None:
        """
        Intitialize loader instance and connect it to an Excel file containing time vector data.

        Args:
            source (Path | str): Absolute Path to database or excel file.
            require_whole_years (bool): Flag for validating that the time vectors in the source contain data for complete years.
            relative_loc (Path | str | None, optional): Path to excel file relative to source. Defaults to None.
            validate (bool, optional): Flag to turn on validation of timevectors. NB! Loads all data into memory at once. Defaults to True.

        """
        super().__init__(source, require_whole_years, relative_loc)
        self._index: TimeIndex = None

        if validate:
            self.validate_vectors()

    def get_unit(self, vector_id: str) -> str:
        """
        Get the unit of the given time vector.

        Args:
            vector_id (str): ID of a time vector. Not used since all time vectors in the NVE excel files have the same
                             unit.

        Returns:
            str: Unit of the time vector.

        """
        return self.get_metadata("")[TvMn.UNIT]

    def get_values(self, vector_id: str) -> NDArray:
        """
        Get numpy array with all the values of a given vector in the Loader's excel file.

        Args:
            vector_id (str): Unique id of the vector in the file.

        Returns:
            NDArray: Numpy array with values.

        """
        if self._data is None:
            self._data = pd.DataFrame()
        if vector_id not in self._data.columns:
            is_horizontal = self._is_horizontal_format()
            column_filter = [vector_id]
            usecols = None
            if not is_horizontal:
                usecols = column_filter

            values_df = pd.read_excel(self.get_source(), sheet_name=self._DATA_SHEET, usecols=usecols)

            if is_horizontal:  # Convert the table to large time series format
                values_df = self._process_horizontal_format(values_df)
                values_df = self._enforce_dtypes(values_df, is_horizontal)
                self._data = values_df
            else:
                values_df = self._enforce_dtypes(values_df, is_horizontal)
                self._data[vector_id] = values_df
        return self._data[vector_id].to_numpy()

    def get_index(self, vector_id: str) -> ListTimeIndex:
        """
        Get the TimeIndex describing the time dimension of the vectors in the file.

        Args:
            vector_id (str): Not used since all vectors in the NVE excel files have the same index.

        Returns:
            TimeIndex: TimeIndex object describing the excel file's index.

        """
        meta = self.get_metadata("")
        if self._index is None:
            self._index = self._create_index(
                self.get_values(TvMn.DATETIME_COL),
                is_52_week_years=meta[TvMn.IS_52_WEEK_YEARS],
                extrapolate_first_point=meta[TvMn.EXTRAPOLATE_FISRT_POINT],
                extrapolate_last_point=meta[TvMn.EXTRAPOLATE_LAST_POINT],
            )
        return self._index

    def get_metadata(self, vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]:
        """
        Read Excel file metadata.

        Args:
            vector_id (str): Not used.

        Raises:
            KeyError: If an expected metadata key is missing.

        Returns:
            dict[str, bool|int|str|datetime|timedelta|tzinfo|None]: Metadata dictionary.

        """
        if self._meta is None:
            path = self.get_source()
            raw_meta = pd.read_excel(path, sheet_name=self._METADATA_SHEET, na_values=[""]).replace([np.nan], [None]).to_dict("records")[0]

            self._meta = self._process_meta(raw_meta)
        return self._meta

    def _enforce_dtypes(self, values_df: pd.DataFrame | pd.Series, issmallformat: bool) -> pd.DataFrame:
        set_dtypes = "float"
        if isinstance(values_df, pd.DataFrame):
            set_dtypes = {c: "float" for c in values_df.columns if c != TvMn.DATETIME_COL}

        # ensure correct dtypes
        try:
            return values_df.astype(set_dtypes)
        except ValueError as e:
            index_column = TvMn.ID_COLUMN_NAME if issmallformat else TvMn.DATETIME_COL
            message = f"Error in {self} while reading file. All columns except '{index_column}' must consist of only float or integer numbers."
            raise RuntimeError(message) from e

    def _process_horizontal_format(self, horizontal_format_df: pd.DataFrame) -> pd.DataFrame:
        # We have to read the whole file to find the correct series

        # Rename the id column name and then transpose to get the correct format
        # Since the columns are counted as indices when transposing, we need to reset the index (but keep the DateTime
        # column)
        reformat_df = horizontal_format_df.rename(columns={TvMn.ID_COLUMN_NAME: TvMn.DATETIME_COL}).T.reset_index(drop=False)

        # after transposing, column names are set a the first row, which is DateTime, IDs
        reformat_df.columns = reformat_df.iloc[0]
        # We reindex by dropping the first row, thus removing the row of DateTime, IDs
        reformat_df = reformat_df.reindex(reformat_df.index.drop(0)).reset_index(drop=True)

        # Since It is possible to write only year or year-month as timestamp in the table,
        # we need to reformat to correct datetime format
        reformat_df[TvMn.DATETIME_COL] = self._to_iso_datetimes(reformat_df[TvMn.DATETIME_COL])

        return reformat_df

    def _to_iso_datetimes(self, series: pd.Series) -> list[datetime]:
        """
        Convert a series of dates to ISO datetime format.

        Args:
            series (pd.Series): Series which values will be converted to ISO format.

        Raises:
            RuntimeError: When an input value which cannot be converted is encountered.

        Returns:
            list[datetime]: List of formatted datetimes.

        """
        reformatted = []
        three_segments = 3
        two_segments = 2
        one_segment = 1
        for i in series:
            new_i = str(i)
            date_split = len(new_i.split("-"))
            space_split = len(new_i.split(" "))
            time_split = len(new_i.split(":"))
            try:
                if date_split == one_segment:  # Only year is defined
                    # get datetime for first week first day
                    new_i = datetime.fromisocalendar(int(new_i), 1, 1)
                elif date_split == two_segments:
                    # Year and month is defined
                    new_i = datetime.strptime(new_i + "-01", "%Y-%m-%d")  # Add first day
                elif date_split == three_segments and space_split == one_segment and time_split == one_segment:
                    # days defined but not time
                    new_i = datetime.strptime(new_i, "%Y-%m-%d")
                elif date_split == three_segments and space_split == two_segments and time_split == one_segment:
                    new_i = datetime.strptime(new_i, "%Y-%m-%d %H")
                elif date_split == three_segments and space_split == two_segments and time_split == two_segments:
                    new_i = datetime.strptime(new_i, "%Y-%m-%d %H:%M")
                elif date_split == three_segments and space_split == two_segments and time_split == three_segments:
                    # Assume time is defined
                    new_i = datetime.strptime(new_i, "%Y-%m-%d %H:%M:%S")
                else:
                    msg = f"Could not convert value '{new_i}' to datetime format."
                    raise ValueError(msg)
            except Exception as e:
                msg = f"Loader {self} could not convert value '{new_i}' to datetime format. Check formatting, for example number of spaces."
                raise RuntimeError(msg) from e
            reformatted.append(new_i)
        return sorted(reformatted)

    def _is_horizontal_format(self) -> bool:
        """Determine if the file strucure is the NVE small format."""
        column_names = pd.read_excel(self.get_source(), nrows=0, sheet_name=self._DATA_SHEET).columns.tolist()
        return TvMn.ID_COLUMN_NAME in column_names

    def _get_ids(self) -> list[str]:
        if self._content_ids is not None:
            return self._content_ids
        try:
            if self._is_horizontal_format():
                self._content_ids = pd.read_excel(
                    self.get_source(),
                    usecols=[TvMn.ID_COLUMN_NAME],
                    sheet_name=self._DATA_SHEET,
                )[TvMn.ID_COLUMN_NAME].tolist()
            else:
                columns_list = pd.read_excel(self.get_source(), nrows=0, sheet_name=self._DATA_SHEET).columns.tolist()
                columns_list.remove(TvMn.DATETIME_COL)
                self._content_ids = columns_list
        except ValueError as e:
            message = f"{self}: found problem with TimeVector IDs."
            raise RuntimeError(message) from e

        return self._content_ids

    def clear_cache(self) -> None:
        """Clear cached data."""
        self._data = None
        self._meta = None
        self._index = None
__init__(source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None

Intitialize loader instance and connect it to an Excel file containing time vector data.

Parameters:

Name Type Description Default
source Path | str

Absolute Path to database or excel file.

required
require_whole_years bool

Flag for validating that the time vectors in the source contain data for complete years.

required
relative_loc Path | str | None

Path to excel file relative to source. Defaults to None.

None
validate bool

Flag to turn on validation of timevectors. NB! Loads all data into memory at once. Defaults to True.

True
Source code in framdata/loaders/time_vector_loaders.py
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
def __init__(self, source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None:
    """
    Intitialize loader instance and connect it to an Excel file containing time vector data.

    Args:
        source (Path | str): Absolute Path to database or excel file.
        require_whole_years (bool): Flag for validating that the time vectors in the source contain data for complete years.
        relative_loc (Path | str | None, optional): Path to excel file relative to source. Defaults to None.
        validate (bool, optional): Flag to turn on validation of timevectors. NB! Loads all data into memory at once. Defaults to True.

    """
    super().__init__(source, require_whole_years, relative_loc)
    self._index: TimeIndex = None

    if validate:
        self.validate_vectors()
clear_cache() -> None

Clear cached data.

Source code in framdata/loaders/time_vector_loaders.py
255
256
257
258
259
def clear_cache(self) -> None:
    """Clear cached data."""
    self._data = None
    self._meta = None
    self._index = None
get_index(vector_id: str) -> ListTimeIndex

Get the TimeIndex describing the time dimension of the vectors in the file.

Parameters:

Name Type Description Default
vector_id str

Not used since all vectors in the NVE excel files have the same index.

required

Returns:

Name Type Description
TimeIndex ListTimeIndex

TimeIndex object describing the excel file's index.

Source code in framdata/loaders/time_vector_loaders.py
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
def get_index(self, vector_id: str) -> ListTimeIndex:
    """
    Get the TimeIndex describing the time dimension of the vectors in the file.

    Args:
        vector_id (str): Not used since all vectors in the NVE excel files have the same index.

    Returns:
        TimeIndex: TimeIndex object describing the excel file's index.

    """
    meta = self.get_metadata("")
    if self._index is None:
        self._index = self._create_index(
            self.get_values(TvMn.DATETIME_COL),
            is_52_week_years=meta[TvMn.IS_52_WEEK_YEARS],
            extrapolate_first_point=meta[TvMn.EXTRAPOLATE_FISRT_POINT],
            extrapolate_last_point=meta[TvMn.EXTRAPOLATE_LAST_POINT],
        )
    return self._index
get_metadata(vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]

Read Excel file metadata.

Parameters:

Name Type Description Default
vector_id str

Not used.

required

Raises:

Type Description
KeyError

If an expected metadata key is missing.

Returns:

Type Description
dict[str, bool | int | str | datetime | timedelta | tzinfo | None]

dict[str, bool|int|str|datetime|timedelta|tzinfo|None]: Metadata dictionary.

Source code in framdata/loaders/time_vector_loaders.py
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
def get_metadata(self, vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]:
    """
    Read Excel file metadata.

    Args:
        vector_id (str): Not used.

    Raises:
        KeyError: If an expected metadata key is missing.

    Returns:
        dict[str, bool|int|str|datetime|timedelta|tzinfo|None]: Metadata dictionary.

    """
    if self._meta is None:
        path = self.get_source()
        raw_meta = pd.read_excel(path, sheet_name=self._METADATA_SHEET, na_values=[""]).replace([np.nan], [None]).to_dict("records")[0]

        self._meta = self._process_meta(raw_meta)
    return self._meta
get_unit(vector_id: str) -> str

Get the unit of the given time vector.

Parameters:

Name Type Description Default
vector_id str

ID of a time vector. Not used since all time vectors in the NVE excel files have the same unit.

required

Returns:

Name Type Description
str str

Unit of the time vector.

Source code in framdata/loaders/time_vector_loaders.py
62
63
64
65
66
67
68
69
70
71
72
73
74
def get_unit(self, vector_id: str) -> str:
    """
    Get the unit of the given time vector.

    Args:
        vector_id (str): ID of a time vector. Not used since all time vectors in the NVE excel files have the same
                         unit.

    Returns:
        str: Unit of the time vector.

    """
    return self.get_metadata("")[TvMn.UNIT]
get_values(vector_id: str) -> NDArray

Get numpy array with all the values of a given vector in the Loader's excel file.

Parameters:

Name Type Description Default
vector_id str

Unique id of the vector in the file.

required

Returns:

Name Type Description
NDArray NDArray

Numpy array with values.

Source code in framdata/loaders/time_vector_loaders.py
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
def get_values(self, vector_id: str) -> NDArray:
    """
    Get numpy array with all the values of a given vector in the Loader's excel file.

    Args:
        vector_id (str): Unique id of the vector in the file.

    Returns:
        NDArray: Numpy array with values.

    """
    if self._data is None:
        self._data = pd.DataFrame()
    if vector_id not in self._data.columns:
        is_horizontal = self._is_horizontal_format()
        column_filter = [vector_id]
        usecols = None
        if not is_horizontal:
            usecols = column_filter

        values_df = pd.read_excel(self.get_source(), sheet_name=self._DATA_SHEET, usecols=usecols)

        if is_horizontal:  # Convert the table to large time series format
            values_df = self._process_horizontal_format(values_df)
            values_df = self._enforce_dtypes(values_df, is_horizontal)
            self._data = values_df
        else:
            values_df = self._enforce_dtypes(values_df, is_horizontal)
            self._data[vector_id] = values_df
    return self._data[vector_id].to_numpy()

NVEH5TimeVectorLoader

Bases: NVETimeVectorLoader

Class for loading time vector data from NVE HDF5 file sources.

Meant for large time vectors (e.g. hourly data over multiple years). Supports differing lengths and metadata of vectors stored in the file.

Specialized to the following format
  • index (h5py.Group, optional): Used to define indexes for vectors if index is supposed to only apply to that vector.
  • common_index (h5py.Dataset): Contains one numpy array for all vectors. This is a fallback index for vectors which have not defined their own index in the index group. Also used on purpose if many or all vectors have the same index.
  • metadata (h5py.Group): Used connect a specific set of metadata to a particular vector.
  • common_metadata (h5py.Group): Contains one set of metadata fields for all vectors. Used in a similar way as common_index.
  • vectors (h5py.Group): Contains numpy arrays containing the vector values connected to a unique ID. The same ID is used to connect the vector to an index or metadata.
Source code in framdata/loaders/time_vector_loaders.py
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
class NVEH5TimeVectorLoader(NVETimeVectorLoader):
    """
    Class for loading time vector data from NVE HDF5 file sources.

    Meant for large time vectors (e.g. hourly data over multiple years). Supports differing lengths and metadata of vectors stored in the file.

    Specialized to the following format:
        - index (h5py.Group, optional): Used to define indexes for vectors if index is supposed to only apply to that vector.
        - common_index (h5py.Dataset): Contains one numpy array for all vectors. This is a fallback index for vectors which have not defined their own index in
                                       the index group. Also used on purpose if many or all vectors have the same index.
        - metadata (h5py.Group): Used connect a specific set of metadata to a particular vector.
        - common_metadata (h5py.Group): Contains one set of metadata fields for all vectors. Used in a similar way as common_index.
        - vectors (h5py.Group): Contains numpy arrays containing the vector values connected to a unique ID. The same ID is used to connect the vector to an
                                index or metadata.

    """

    _SUPPORTED_SUFFIXES: ClassVar[list] = [".h5", ".hdf5"]

    def __init__(self, source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None:
        """
        Intitialize loader instance and connect it to a H5 file containing time vector data.

        Args:
            source (Path | str): Absolute Path to database or HDF5 file.
            require_whole_years (bool): Flag for validating that the time vectors in the source contain data for complete years.
            relative_loc (Path | str | None, optional): Path to HDF5 file relative to source. Defaults to None.
            validate (bool, optional): Whether to validate vectors after loading. NB! Loads all data into memory at once. Defaults to True.

        """
        super().__init__(source, require_whole_years, relative_loc)
        self._index: TimeIndex = None
        self._file_pointer = None

        if validate:
            self.validate_vectors()

    def get_values(self, vector_id: str) -> NDArray:
        """
        Get numpy array with all the values of a given vector in the Loader's HDF5 file.

        Args:
            vector_id (str): Unique id of the vector in the file.

        Returns:
            NDArray: Numpy array with values.

        """
        if self._data is None:
            self._data = dict()
        if vector_id not in self._data:
            with h5py.File(self.get_source(), mode="r") as h5f:
                self._data[vector_id] = self._read_vector_field(h5f, H5Names.VECTORS_GROUP, vector_id, field_type=h5py.Dataset, use_fallback=False)[()]
        return self._data[vector_id]

    def get_index(self, vector_id: str) -> TimeIndex:
        """
        Get the TimeIndex describing the time dimension of the vectors in the file.

        Args:
            vector_id (str): Not used since all vectors in the NVE parquet files have the same index.

        Returns:
            TimeIndex: TimeIndex object describing the parquet file's index.

        """
        if self._index is None:
            meta = self.get_metadata("")

            if TvMn.FREQUENCY not in meta or (TvMn.FREQUENCY in meta and meta[TvMn.FREQUENCY] is None):
                self._index = self._create_index(
                    datetimes=self._read_index(vector_id),
                    is_52_week_years=meta[TvMn.IS_52_WEEK_YEARS],
                    extrapolate_first_point=meta[TvMn.EXTRAPOLATE_FISRT_POINT],
                    extrapolate_last_point=meta[TvMn.EXTRAPOLATE_LAST_POINT],
                )
                return self._index
            index_array = self._read_index(vector_id) if meta[TvMn.START] is None or meta[TvMn.NUM_POINTS] is None else None
            start = meta[TvMn.START] if index_array is None else index_array[0].item()
            num_points = meta[TvMn.NUM_POINTS] if index_array is None else index_array.size

            self._index = FixedFrequencyTimeIndex(
                start,
                meta[TvMn.FREQUENCY],
                num_points,
                is_52_week_years=meta[TvMn.IS_52_WEEK_YEARS],
                extrapolate_first_point=meta[TvMn.EXTRAPOLATE_FISRT_POINT],
                extrapolate_last_point=meta[TvMn.EXTRAPOLATE_LAST_POINT],
            )

        return self._index

    def _read_index(self, vector_id: str) -> NDArray[np.datetime64]:
        with h5py.File(self.get_source(), mode="r") as h5f:
            decoded_index = np.char.decode(self._read_vector_field(h5f, H5Names.INDEX_GROUP, vector_id, h5py.Dataset)[()].astype(np.bytes_), encoding="utf-8")
            return decoded_index.astype(np.datetime64)

    def _read_vector_field(
        self,
        h5file: h5py.File,
        field_name: str,
        vector_id: str,
        field_type: type[h5py.Dataset | h5py.Group],
        use_fallback: bool = True,
    ) -> h5py.Dataset | h5py.Group:
        error = ""
        if field_name in h5file:  # check if group_name exists
            main_group = h5file[field_name]
            if not isinstance(main_group, h5py.Group):
                message = f"{self} expected '{field_name}' to be a {h5py.Group} in {h5file}. Got {type(main_group)}."
                raise TypeError(message)

            if vector_id in main_group:
                vector_field = main_group[vector_id]
                if not isinstance(vector_field, field_type):
                    message = f"{self} expected '{vector_id}' to be a {field_type} in {h5file}. Got {type(vector_field)}"
                    raise TypeError(message)
                return vector_field
            error = f"'{vector_id}' was not found in '{field_name}' group"
        else:
            error = f"'{field_name}' was not found in file"

        no_fallback_message = f"{self} expected '{vector_id}' in {h5py.Group} '{field_name}' "
        if not use_fallback:
            no_fallback_message += f"but {error}."
            raise KeyError(no_fallback_message)

        fallback_name = H5Names.COMMON_PREFIX + field_name
        if fallback_name in h5file:  # check if common_ + group_name exists
            fallback_field = h5file[fallback_name]
            if not isinstance(fallback_field, field_type):
                message = f"{self} expected '{fallback_field}' to be a {field_type} in {h5file}. Got {type(fallback_field)}."
                raise TypeError(message)
            return fallback_field

        message = (
            no_fallback_message
            + f"or a fallback {field_type} '{fallback_name}' in H5 file but "
            + f"{error},"
            + f" and fallback {field_type} '{fallback_name}' not found in file."
        )
        raise KeyError(message)

    def get_metadata(self, vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]:
        """
        Retrieve and decodes custom metadata from parquet file.

        Args:
            vector_id (str): Not used

        Raises:
            KeyError: If any of the expected metadata keys is not found in file.

        Returns:
            dict: Dictionary with decoded metadata.

        """
        if self._meta is None:
            errors = set()
            meta = {}
            with h5py.File(self.get_source(), mode="r") as h5f:
                meta_group = self._read_vector_field(h5f, H5Names.METADATA_GROUP, vector_id, h5py.Group)
                for k, m in meta_group.items():
                    if isinstance(m, h5py.Dataset):
                        meta[k] = m[()]
                    else:
                        errors.add(f"Improper metadata format: Metadata key {k} exists but is a h5 group when it should be a h5 dataset.")
            self._report_errors(errors)
            self._meta = self._process_meta(meta)
        return self._meta

    def _get_ids(self) -> list[str]:
        with h5py.File(self.get_source(), mode="r") as h5f:
            if H5Names.VECTORS_GROUP in h5f:
                return list(h5f[H5Names.VECTORS_GROUP].keys())
            message = f"{self} required key '{H5Names.VECTORS_GROUP}' was not found in file."
            raise KeyError(message)

    def clear_cache(self) -> None:
        """Clear cached data."""
        self._data = None
        self._meta = None
        self._index = None
__init__(source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None

Intitialize loader instance and connect it to a H5 file containing time vector data.

Parameters:

Name Type Description Default
source Path | str

Absolute Path to database or HDF5 file.

required
require_whole_years bool

Flag for validating that the time vectors in the source contain data for complete years.

required
relative_loc Path | str | None

Path to HDF5 file relative to source. Defaults to None.

None
validate bool

Whether to validate vectors after loading. NB! Loads all data into memory at once. Defaults to True.

True
Source code in framdata/loaders/time_vector_loaders.py
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
def __init__(self, source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None:
    """
    Intitialize loader instance and connect it to a H5 file containing time vector data.

    Args:
        source (Path | str): Absolute Path to database or HDF5 file.
        require_whole_years (bool): Flag for validating that the time vectors in the source contain data for complete years.
        relative_loc (Path | str | None, optional): Path to HDF5 file relative to source. Defaults to None.
        validate (bool, optional): Whether to validate vectors after loading. NB! Loads all data into memory at once. Defaults to True.

    """
    super().__init__(source, require_whole_years, relative_loc)
    self._index: TimeIndex = None
    self._file_pointer = None

    if validate:
        self.validate_vectors()
clear_cache() -> None

Clear cached data.

Source code in framdata/loaders/time_vector_loaders.py
440
441
442
443
444
def clear_cache(self) -> None:
    """Clear cached data."""
    self._data = None
    self._meta = None
    self._index = None
get_index(vector_id: str) -> TimeIndex

Get the TimeIndex describing the time dimension of the vectors in the file.

Parameters:

Name Type Description Default
vector_id str

Not used since all vectors in the NVE parquet files have the same index.

required

Returns:

Name Type Description
TimeIndex TimeIndex

TimeIndex object describing the parquet file's index.

Source code in framdata/loaders/time_vector_loaders.py
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
def get_index(self, vector_id: str) -> TimeIndex:
    """
    Get the TimeIndex describing the time dimension of the vectors in the file.

    Args:
        vector_id (str): Not used since all vectors in the NVE parquet files have the same index.

    Returns:
        TimeIndex: TimeIndex object describing the parquet file's index.

    """
    if self._index is None:
        meta = self.get_metadata("")

        if TvMn.FREQUENCY not in meta or (TvMn.FREQUENCY in meta and meta[TvMn.FREQUENCY] is None):
            self._index = self._create_index(
                datetimes=self._read_index(vector_id),
                is_52_week_years=meta[TvMn.IS_52_WEEK_YEARS],
                extrapolate_first_point=meta[TvMn.EXTRAPOLATE_FISRT_POINT],
                extrapolate_last_point=meta[TvMn.EXTRAPOLATE_LAST_POINT],
            )
            return self._index
        index_array = self._read_index(vector_id) if meta[TvMn.START] is None or meta[TvMn.NUM_POINTS] is None else None
        start = meta[TvMn.START] if index_array is None else index_array[0].item()
        num_points = meta[TvMn.NUM_POINTS] if index_array is None else index_array.size

        self._index = FixedFrequencyTimeIndex(
            start,
            meta[TvMn.FREQUENCY],
            num_points,
            is_52_week_years=meta[TvMn.IS_52_WEEK_YEARS],
            extrapolate_first_point=meta[TvMn.EXTRAPOLATE_FISRT_POINT],
            extrapolate_last_point=meta[TvMn.EXTRAPOLATE_LAST_POINT],
        )

    return self._index
get_metadata(vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]

Retrieve and decodes custom metadata from parquet file.

Parameters:

Name Type Description Default
vector_id str

Not used

required

Raises:

Type Description
KeyError

If any of the expected metadata keys is not found in file.

Returns:

Name Type Description
dict dict[str, bool | int | str | datetime | timedelta | tzinfo | None]

Dictionary with decoded metadata.

Source code in framdata/loaders/time_vector_loaders.py
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
def get_metadata(self, vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]:
    """
    Retrieve and decodes custom metadata from parquet file.

    Args:
        vector_id (str): Not used

    Raises:
        KeyError: If any of the expected metadata keys is not found in file.

    Returns:
        dict: Dictionary with decoded metadata.

    """
    if self._meta is None:
        errors = set()
        meta = {}
        with h5py.File(self.get_source(), mode="r") as h5f:
            meta_group = self._read_vector_field(h5f, H5Names.METADATA_GROUP, vector_id, h5py.Group)
            for k, m in meta_group.items():
                if isinstance(m, h5py.Dataset):
                    meta[k] = m[()]
                else:
                    errors.add(f"Improper metadata format: Metadata key {k} exists but is a h5 group when it should be a h5 dataset.")
        self._report_errors(errors)
        self._meta = self._process_meta(meta)
    return self._meta
get_values(vector_id: str) -> NDArray

Get numpy array with all the values of a given vector in the Loader's HDF5 file.

Parameters:

Name Type Description Default
vector_id str

Unique id of the vector in the file.

required

Returns:

Name Type Description
NDArray NDArray

Numpy array with values.

Source code in framdata/loaders/time_vector_loaders.py
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
def get_values(self, vector_id: str) -> NDArray:
    """
    Get numpy array with all the values of a given vector in the Loader's HDF5 file.

    Args:
        vector_id (str): Unique id of the vector in the file.

    Returns:
        NDArray: Numpy array with values.

    """
    if self._data is None:
        self._data = dict()
    if vector_id not in self._data:
        with h5py.File(self.get_source(), mode="r") as h5f:
            self._data[vector_id] = self._read_vector_field(h5f, H5Names.VECTORS_GROUP, vector_id, field_type=h5py.Dataset, use_fallback=False)[()]
    return self._data[vector_id]

NVEParquetTimeVectorLoader

Bases: NVETimeVectorLoader

Class for loading time vector data from NVE parquet file sources.

Meant for large time vectors. All vectors in the file must have the same lenghts and metadata. Supports format: - 'Vertical' with one index collumn (DateTime) and the others containing vector values.

Source code in framdata/loaders/time_vector_loaders.py
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
class NVEParquetTimeVectorLoader(NVETimeVectorLoader):
    """
    Class for loading time vector data from NVE parquet file sources.

    Meant for large time vectors. All vectors in the file must have the same lenghts and metadata.
    Supports format:
        - 'Vertical' with one index collumn (DateTime) and the others containing vector values.

    """

    _SUPPORTED_SUFFIXES: ClassVar[list] = [".parquet"]

    def __init__(self, source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None:
        """
        Intitialize loader instance and connect it to an Parquet file containing time vector data.

        Args:
            source (Path | str): Absolute Path to database or parquet file.
            require_whole_years (bool): Flag for validating that the time vectors in the source contain data for complete years.
            relative_loc (Path | str | None, optional): Path to parquet file relative to source. Defaults to None.
            validate (bool, optional): Flag to turn on validation of timevectors. NB! Loads all data into memory at once. Defaults to True.

        """
        super().__init__(source, require_whole_years, relative_loc)
        self._index: TimeIndex = None
        if validate:
            self.validate_vectors()

    def get_values(self, vector_id: str) -> NDArray:
        """
        Get numpy array with all the values of a given vector in the Loader's parquet file.

        Args:
            vector_id (str): Unique id of the vector in the file.

        Returns:
            NDArray: Numpy array with values.

        """
        if self._data is None:
            self._data = dict()
        if vector_id not in self._data:
            table = pq.read_table(self.get_source(), columns=[vector_id])
            self._data[vector_id] = table[vector_id].to_numpy()
        # if self._data is None:
        #     self._data = pq.read_table(self.get_source())
        return self._data[vector_id]  # .to_numpy()

    def get_index(self, vector_id: str) -> TimeIndex:  # Could be more types of indexes?
        """
        Get the TimeIndex describing the time dimension of the vectors in the file.

        Args:
            vector_id (str): Not used since all vectors in the NVE parquet files have the same index.

        Returns:
            TimeIndex: TimeIndex object describing the parquet file's index.

        """
        if self._index is None:
            meta = self.get_metadata("")

            if TvMn.FREQUENCY not in meta or (TvMn.FREQUENCY in meta and meta[TvMn.FREQUENCY] is None):
                datetime_index = pd.DatetimeIndex(
                    pd.read_parquet(self.get_source(), columns=[TvMn.DATETIME_COL])[TvMn.DATETIME_COL],
                    tz=meta[TvMn.TIMEZONE],
                ).tolist()
                self._index = self._create_index(
                    datetimes=datetime_index,
                    is_52_week_years=meta[TvMn.IS_52_WEEK_YEARS],
                    extrapolate_first_point=meta[TvMn.EXTRAPOLATE_FISRT_POINT],
                    extrapolate_last_point=meta[TvMn.EXTRAPOLATE_LAST_POINT],
                )
                return self._index

            parquet_file = None
            if TvMn.START not in meta or (TvMn.START in meta and meta[TvMn.START] is None):
                parquet_file = pq.ParquetFile(self.get_source())
                start = pd.to_datetime(next(parquet_file.iter_batches(batch_size=1, columns=[TvMn.DATETIME_COL])))
            else:
                start = meta[TvMn.START]

            if TvMn.NUM_POINTS not in meta or (TvMn.NUM_POINTS in meta and meta[TvMn.NUM_POINTS] is None):
                if parquet_file is None:
                    parquet_file = pq.ParquetFile(self.get_source())
                num_points = parquet_file.metadata.num_rows
            else:
                num_points = meta[TvMn.NUM_POINTS]
            self._index = FixedFrequencyTimeIndex(
                start,
                meta[TvMn.FREQUENCY],
                num_points,
                is_52_week_years=meta[TvMn.IS_52_WEEK_YEARS],
                extrapolate_first_point=meta[TvMn.EXTRAPOLATE_FISRT_POINT],
                extrapolate_last_point=meta[TvMn.EXTRAPOLATE_LAST_POINT],
            )

        return self._index

    def get_metadata(self, vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]:
        """
        Retrieve and decodes custom metadata from parquet file.

        Args:
            vector_id (str): Not used

        Raises:
            KeyError: If any of the expected metadata keys is not found in file.

        Returns:
            dict: Dictionary with decoded metadata.

        """
        if self._meta is None:
            path = self.get_source()
            raw_meta = pq.ParquetFile(path).schema_arrow.metadata

            self._meta = self._process_meta(raw_meta)
        return self._meta

    def _get_ids(self) -> list[str]:
        parquet_file = pq.ParquetFile(self.get_source())
        time_vector_ids: list[str] = parquet_file.schema_arrow.names
        time_vector_ids.remove(TvMn.DATETIME_COL)
        return time_vector_ids

    def clear_cache(self) -> None:
        """Clear cached data."""
        self._data = None
        self._meta = None
        self._index = None
__init__(source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None

Intitialize loader instance and connect it to an Parquet file containing time vector data.

Parameters:

Name Type Description Default
source Path | str

Absolute Path to database or parquet file.

required
require_whole_years bool

Flag for validating that the time vectors in the source contain data for complete years.

required
relative_loc Path | str | None

Path to parquet file relative to source. Defaults to None.

None
validate bool

Flag to turn on validation of timevectors. NB! Loads all data into memory at once. Defaults to True.

True
Source code in framdata/loaders/time_vector_loaders.py
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
def __init__(self, source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None:
    """
    Intitialize loader instance and connect it to an Parquet file containing time vector data.

    Args:
        source (Path | str): Absolute Path to database or parquet file.
        require_whole_years (bool): Flag for validating that the time vectors in the source contain data for complete years.
        relative_loc (Path | str | None, optional): Path to parquet file relative to source. Defaults to None.
        validate (bool, optional): Flag to turn on validation of timevectors. NB! Loads all data into memory at once. Defaults to True.

    """
    super().__init__(source, require_whole_years, relative_loc)
    self._index: TimeIndex = None
    if validate:
        self.validate_vectors()
clear_cache() -> None

Clear cached data.

Source code in framdata/loaders/time_vector_loaders.py
726
727
728
729
730
def clear_cache(self) -> None:
    """Clear cached data."""
    self._data = None
    self._meta = None
    self._index = None
get_index(vector_id: str) -> TimeIndex

Get the TimeIndex describing the time dimension of the vectors in the file.

Parameters:

Name Type Description Default
vector_id str

Not used since all vectors in the NVE parquet files have the same index.

required

Returns:

Name Type Description
TimeIndex TimeIndex

TimeIndex object describing the parquet file's index.

Source code in framdata/loaders/time_vector_loaders.py
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
def get_index(self, vector_id: str) -> TimeIndex:  # Could be more types of indexes?
    """
    Get the TimeIndex describing the time dimension of the vectors in the file.

    Args:
        vector_id (str): Not used since all vectors in the NVE parquet files have the same index.

    Returns:
        TimeIndex: TimeIndex object describing the parquet file's index.

    """
    if self._index is None:
        meta = self.get_metadata("")

        if TvMn.FREQUENCY not in meta or (TvMn.FREQUENCY in meta and meta[TvMn.FREQUENCY] is None):
            datetime_index = pd.DatetimeIndex(
                pd.read_parquet(self.get_source(), columns=[TvMn.DATETIME_COL])[TvMn.DATETIME_COL],
                tz=meta[TvMn.TIMEZONE],
            ).tolist()
            self._index = self._create_index(
                datetimes=datetime_index,
                is_52_week_years=meta[TvMn.IS_52_WEEK_YEARS],
                extrapolate_first_point=meta[TvMn.EXTRAPOLATE_FISRT_POINT],
                extrapolate_last_point=meta[TvMn.EXTRAPOLATE_LAST_POINT],
            )
            return self._index

        parquet_file = None
        if TvMn.START not in meta or (TvMn.START in meta and meta[TvMn.START] is None):
            parquet_file = pq.ParquetFile(self.get_source())
            start = pd.to_datetime(next(parquet_file.iter_batches(batch_size=1, columns=[TvMn.DATETIME_COL])))
        else:
            start = meta[TvMn.START]

        if TvMn.NUM_POINTS not in meta or (TvMn.NUM_POINTS in meta and meta[TvMn.NUM_POINTS] is None):
            if parquet_file is None:
                parquet_file = pq.ParquetFile(self.get_source())
            num_points = parquet_file.metadata.num_rows
        else:
            num_points = meta[TvMn.NUM_POINTS]
        self._index = FixedFrequencyTimeIndex(
            start,
            meta[TvMn.FREQUENCY],
            num_points,
            is_52_week_years=meta[TvMn.IS_52_WEEK_YEARS],
            extrapolate_first_point=meta[TvMn.EXTRAPOLATE_FISRT_POINT],
            extrapolate_last_point=meta[TvMn.EXTRAPOLATE_LAST_POINT],
        )

    return self._index
get_metadata(vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]

Retrieve and decodes custom metadata from parquet file.

Parameters:

Name Type Description Default
vector_id str

Not used

required

Raises:

Type Description
KeyError

If any of the expected metadata keys is not found in file.

Returns:

Name Type Description
dict dict[str, bool | int | str | datetime | timedelta | tzinfo | None]

Dictionary with decoded metadata.

Source code in framdata/loaders/time_vector_loaders.py
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
def get_metadata(self, vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]:
    """
    Retrieve and decodes custom metadata from parquet file.

    Args:
        vector_id (str): Not used

    Raises:
        KeyError: If any of the expected metadata keys is not found in file.

    Returns:
        dict: Dictionary with decoded metadata.

    """
    if self._meta is None:
        path = self.get_source()
        raw_meta = pq.ParquetFile(path).schema_arrow.metadata

        self._meta = self._process_meta(raw_meta)
    return self._meta
get_values(vector_id: str) -> NDArray

Get numpy array with all the values of a given vector in the Loader's parquet file.

Parameters:

Name Type Description Default
vector_id str

Unique id of the vector in the file.

required

Returns:

Name Type Description
NDArray NDArray

Numpy array with values.

Source code in framdata/loaders/time_vector_loaders.py
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
def get_values(self, vector_id: str) -> NDArray:
    """
    Get numpy array with all the values of a given vector in the Loader's parquet file.

    Args:
        vector_id (str): Unique id of the vector in the file.

    Returns:
        NDArray: Numpy array with values.

    """
    if self._data is None:
        self._data = dict()
    if vector_id not in self._data:
        table = pq.read_table(self.get_source(), columns=[vector_id])
        self._data[vector_id] = table[vector_id].to_numpy()
    # if self._data is None:
    #     self._data = pq.read_table(self.get_source())
    return self._data[vector_id]  # .to_numpy()

NVEYamlTimeVectoroader

Bases: NVETimeVectorLoader

Class for loading time vector data from NVE YAML file sources.

Meant for very sparse time vector data, where the vectors have varying lengths and indexes. Currently all vectors must have the same metadata within each file. Supported format: - Metadata: field containing dictionary with metadata for all vectors. - Other fields are vector IDs with lists for x and y axes.

Source code in framdata/loaders/time_vector_loaders.py
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
class NVEYamlTimeVectoroader(NVETimeVectorLoader):
    """
    Class for loading time vector data from NVE YAML file sources.

    Meant for very sparse time vector data, where the vectors have varying lengths and indexes. Currently all vectors must have the same metadata within each
    file.
    Supported format:
        - Metadata: field containing dictionary with metadata for all vectors.
        - Other fields are vector IDs with lists for x and y axes.

    """

    _SUPPORTED_SUFFIXES: ClassVar[list] = [".yaml", ".yml"]

    def __init__(self, source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None:
        """
        Intitialize loader instance and connect it to an Yaml file containing time vector data.

        Args:
            source (Path | str): Absolute Path to database or excel file.
            require_whole_years (bool): Flag for validating that the time vectors in the source contain data for complete years.
            relative_loc (Path | str | None, optional): Path to excel file relative to source. Defaults to None.
            validate (bool, optional): Flag to turn on validation of timevectors. NB! Loads all data into memory at once. Defaults to True.

        """
        super().__init__(source, require_whole_years, relative_loc)
        self._content_ids: list[str] = None

        self._values_label: str = None
        self._index_label: str = None

        if validate:
            self.validate_vectors()

    def get_values(self, vector_id: str) -> NDArray:
        """
        Get values of vector.

        Args:
            vector_id (str): Unique id of the curve in the Loader source.

        Returns:
            NDArray: Numpy array with values of vector.

        """
        if self._data is None:
            self._parse_file()
        values_list = self._data[vector_id][self._values_label]
        if len(values_list) == 0:
            message = f"Time vector {vector_id} in {self} contains no points."
            raise ValueError(message)
        return np.asarray(values_list)

    def get_index(self, vector_id: str) -> TimeIndex:
        """
        Get index of vector.

        Args:
            vector_id (str): Unique id of the curve in the Loader source.

        Returns:
            NDArray: Numpy array with index of vector.

        """
        meta = self.get_metadata(vector_id)  # also parses data
        try:
            datetime_list = [self._date_to_datetime(index_val) for index_val in self._data[vector_id][self._index_label]]
        except ValueError as e:
            message = f"{self} got non date or none datetime values in index field of vector {vector_id}."
            raise ValueError(message) from e

        if len(datetime_list) == 0:
            message = f"Index of {vector_id} in {self} contains no points."
            raise ValueError(message)

        if (len(datetime_list) == 1 or self.get_values(vector_id).size == 1) and meta[TvMn.EXTRAPOLATE_FISRT_POINT] and meta[TvMn.EXTRAPOLATE_LAST_POINT]:
            # Even though _create_index can now handle ConstantTimeIndexes,
            # we need to consider that YAML time vectors can have the extra end date for its final period stored in its index.
            # That would lead to _create_time_index not creating a constant one when it should.
            # We may remove this feature in the future.
            return ConstantTimeIndex()

        args = (
            datetime_list,
            meta[TvMn.IS_52_WEEK_YEARS],
            meta[TvMn.EXTRAPOLATE_FISRT_POINT],
            meta[TvMn.EXTRAPOLATE_LAST_POINT],
        )

        if len(datetime_list) == len(self.get_values(vector_id)) + 1:
            return ListTimeIndex(*args)
        # create index with added end datetime
        return self._create_index(*args)

    def get_metadata(self, vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]:
        """
        Read YAML file metadata.

        Args:
            vector_id (str): Not used.

        Raises:
            KeyError: If an expected metadata key is missing.

        Returns:
            dict[str, bool|int|str|datetime|timedelta|tzinfo|None]: Metadata dictionary.

        """
        if self._meta is None:
            raw_meta = self._data[YamlNames.metadata_field][YamlNames.x_field]

            self._meta = self._process_meta(raw_meta)
        return self._meta

    def _get_ids(self) -> list[str]:
        if self._content_ids is None:
            if self._data is None:
                self._parse_file()
            ids_list = list(self._data.keys())
            ids_list.remove(YamlNames.metadata_field)
            self._content_ids = ids_list
        return self._content_ids

    def _parse_file(self) -> None:
        with self.get_source().open(encoding=YamlNames.encoding) as f:
            d = yaml.safe_load(f)
            self._x_meta = d[YamlNames.metadata_field][YamlNames.x_field]
            self._y_meta = d[YamlNames.metadata_field][YamlNames.y_field]

            self._values_label = self._x_meta[YamlNames.attribute]
            self._index_label = self._y_meta[YamlNames.attribute]

            self._data = d

    def _date_to_datetime(self, value: date | datetime) -> datetime:
        if isinstance(value, date):
            value = datetime(value.year, value.month, value.day)
        elif not isinstance(value, datetime):
            message = "Value must be date or datetime."
            raise ValueError(message)
        return value

    def clear_cache(self) -> None:
        """Clear cached data."""
        self._data = None
        self._meta = None

        self._content_ids = None

        self._values_label = None
        self._index_label = None
__init__(source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None

Intitialize loader instance and connect it to an Yaml file containing time vector data.

Parameters:

Name Type Description Default
source Path | str

Absolute Path to database or excel file.

required
require_whole_years bool

Flag for validating that the time vectors in the source contain data for complete years.

required
relative_loc Path | str | None

Path to excel file relative to source. Defaults to None.

None
validate bool

Flag to turn on validation of timevectors. NB! Loads all data into memory at once. Defaults to True.

True
Source code in framdata/loaders/time_vector_loaders.py
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
def __init__(self, source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None:
    """
    Intitialize loader instance and connect it to an Yaml file containing time vector data.

    Args:
        source (Path | str): Absolute Path to database or excel file.
        require_whole_years (bool): Flag for validating that the time vectors in the source contain data for complete years.
        relative_loc (Path | str | None, optional): Path to excel file relative to source. Defaults to None.
        validate (bool, optional): Flag to turn on validation of timevectors. NB! Loads all data into memory at once. Defaults to True.

    """
    super().__init__(source, require_whole_years, relative_loc)
    self._content_ids: list[str] = None

    self._values_label: str = None
    self._index_label: str = None

    if validate:
        self.validate_vectors()
clear_cache() -> None

Clear cached data.

Source code in framdata/loaders/time_vector_loaders.py
589
590
591
592
593
594
595
596
597
def clear_cache(self) -> None:
    """Clear cached data."""
    self._data = None
    self._meta = None

    self._content_ids = None

    self._values_label = None
    self._index_label = None
get_index(vector_id: str) -> TimeIndex

Get index of vector.

Parameters:

Name Type Description Default
vector_id str

Unique id of the curve in the Loader source.

required

Returns:

Name Type Description
NDArray TimeIndex

Numpy array with index of vector.

Source code in framdata/loaders/time_vector_loaders.py
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
def get_index(self, vector_id: str) -> TimeIndex:
    """
    Get index of vector.

    Args:
        vector_id (str): Unique id of the curve in the Loader source.

    Returns:
        NDArray: Numpy array with index of vector.

    """
    meta = self.get_metadata(vector_id)  # also parses data
    try:
        datetime_list = [self._date_to_datetime(index_val) for index_val in self._data[vector_id][self._index_label]]
    except ValueError as e:
        message = f"{self} got non date or none datetime values in index field of vector {vector_id}."
        raise ValueError(message) from e

    if len(datetime_list) == 0:
        message = f"Index of {vector_id} in {self} contains no points."
        raise ValueError(message)

    if (len(datetime_list) == 1 or self.get_values(vector_id).size == 1) and meta[TvMn.EXTRAPOLATE_FISRT_POINT] and meta[TvMn.EXTRAPOLATE_LAST_POINT]:
        # Even though _create_index can now handle ConstantTimeIndexes,
        # we need to consider that YAML time vectors can have the extra end date for its final period stored in its index.
        # That would lead to _create_time_index not creating a constant one when it should.
        # We may remove this feature in the future.
        return ConstantTimeIndex()

    args = (
        datetime_list,
        meta[TvMn.IS_52_WEEK_YEARS],
        meta[TvMn.EXTRAPOLATE_FISRT_POINT],
        meta[TvMn.EXTRAPOLATE_LAST_POINT],
    )

    if len(datetime_list) == len(self.get_values(vector_id)) + 1:
        return ListTimeIndex(*args)
    # create index with added end datetime
    return self._create_index(*args)
get_metadata(vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]

Read YAML file metadata.

Parameters:

Name Type Description Default
vector_id str

Not used.

required

Raises:

Type Description
KeyError

If an expected metadata key is missing.

Returns:

Type Description
dict[str, bool | int | str | datetime | timedelta | tzinfo | None]

dict[str, bool|int|str|datetime|timedelta|tzinfo|None]: Metadata dictionary.

Source code in framdata/loaders/time_vector_loaders.py
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
def get_metadata(self, vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]:
    """
    Read YAML file metadata.

    Args:
        vector_id (str): Not used.

    Raises:
        KeyError: If an expected metadata key is missing.

    Returns:
        dict[str, bool|int|str|datetime|timedelta|tzinfo|None]: Metadata dictionary.

    """
    if self._meta is None:
        raw_meta = self._data[YamlNames.metadata_field][YamlNames.x_field]

        self._meta = self._process_meta(raw_meta)
    return self._meta
get_values(vector_id: str) -> NDArray

Get values of vector.

Parameters:

Name Type Description Default
vector_id str

Unique id of the curve in the Loader source.

required

Returns:

Name Type Description
NDArray NDArray

Numpy array with values of vector.

Source code in framdata/loaders/time_vector_loaders.py
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
def get_values(self, vector_id: str) -> NDArray:
    """
    Get values of vector.

    Args:
        vector_id (str): Unique id of the curve in the Loader source.

    Returns:
        NDArray: Numpy array with values of vector.

    """
    if self._data is None:
        self._parse_file()
    values_list = self._data[vector_id][self._values_label]
    if len(values_list) == 0:
        message = f"Time vector {vector_id} in {self} contains no points."
        raise ValueError(message)
    return np.asarray(values_list)

NVETimeVectorLoader

Loader for NVE time vector data.

This module provides the NVETimeVectorLoader class, which extends FileLoader and TimeVectorLoader to handle metadata and validation for time vector data from NVE parquet files.

NVETimeVectorLoader

Bases: FileLoader, TimeVectorLoader

Common interface for metadata in NVE TimeVectorLoaders.

Source code in framdata/loaders/NVETimeVectorLoader.py
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
class NVETimeVectorLoader(FileLoader, TimeVectorLoader):
    """Common interface for metadata in NVE TimeVectorLoaders."""

    def __init__(self, source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None) -> None:
        """
        Initialize NVETimeVectorLoader with source and optional relative location.

        Args:
            source (Path | str): Path or string to the source file.
            require_whole_years (bool): Flag for validating that the time vectors in the source contain data for complete years.
            relative_loc (Path | str | None, optional): Relative location, defaults to None.

        """
        super().__init__(source, relative_loc)

        self._data: dict[str, NDArray] = None
        self._meta: dict[str, bool | int | str | datetime | timedelta | tzinfo] = None

        self._require_whole_years = require_whole_years

    def is_max_level(self, vector_id: str) -> bool | None:
        """
        Check if the time vector is classified as a max level vector.

        Args:
            vector_id (str): ID of the time vector.

        Returns:
            bool | None: True if max level, False otherwise, or None if not specified.

        """
        return self.get_metadata(vector_id)[TvMn.IS_MAX_LEVEL]

    def is_zero_one_profile(self, vector_id: str) -> bool | None:
        """
        Check if the time vector is classified as a zero-one profile vector.

        Args:
            vector_id (str): ID of the time vector.

        Returns:
            bool | None: True if zero-one profile, False otherwise, or None if not specified.

        """
        return self.get_metadata(vector_id)[TvMn.IS_ZERO_ONE_PROFILE]

    def get_unit(self, vector_id: str) -> str:
        """
        Get the unit of the given time vector.

        Args:
            vector_id (str): ID of a time vector. Not used since all time vectors in the NVE parquet files have the same
                             unit.

        Returns:
            str: Unit of the time vector.

        """
        return self.get_metadata(vector_id)[TvMn.UNIT]

    def get_reference_period(self, vector_id: str) -> ReferencePeriod | None:
        """
        Get Reference perod from metadata.

        Args:
            vector_id (str): Not used.

        Raises:
            ValueError: If only one of start year or number of years are set in metadata.

        Returns:
            ReferencePeriod | None

        """
        start_year = self.get_metadata(vector_id)[TvMn.REF_PERIOD_START_YEAR]
        num_years = self.get_metadata(vector_id)[TvMn.REF_PERIOD_NUM_YEARS]

        ref_period = None
        if start_year and num_years:
            ref_period = ReferencePeriod(start_year=start_year, num_years=num_years)
        elif start_year or num_years:
            message = (
                f"{self}: Both {TvMn.REF_PERIOD_START_YEAR} and {TvMn.REF_PERIOD_NUM_YEARS} must be provided for a valid reference period."
                "Alternatively, both must be None for undefined reference period."
            )
            raise ValueError(message)
        return ref_period

    def validate_vectors(self) -> None:
        """
        Validate data in all vectors contained in the Loader.

        Conditions validated:
            - If vector contains negative values.
            (- If vector is a zero one profile and contains values outside the unit interval.) * not in use currently

        Raises:
            ValueError: When conditions are violated.

        """
        errors = set()
        for vector_id in self.get_ids():
            errors |= self._validate_vector(vector_id)

        if errors:
            message = f"Found errors in {self}:"
            for e in errors:
                message += f"\n - {e}."

            raise ValueError(message)

    def _process_meta(self, raw_meta: dict[str | bytes, str | bytes | int | bool | None]) -> dict[str, Any]:
        processed_meta, missing_keys = TvMn.cast_meta(raw_meta)

        optional_keys = {TvMn.ID_COLUMN_NAME, TvMn.FREQUENCY, TvMn.NUM_POINTS, TvMn.START}
        missing_keys -= optional_keys

        if missing_keys:
            msg = f"{self} could not find keys: {missing_keys} in metadata of file {self.get_source()}. Metadata: {processed_meta}"
            raise KeyError(msg)

        return processed_meta

    def _validate_vector(self, vector_id: str) -> set[str]:
        index = self.get_index(vector_id)
        values = self.get_values(vector_id)

        errors = set()

        # validate index length
        if index.get_num_periods() not in range(values.size - 1, values.size + 1):  # Since ListTimeIndex objects' num_periods can vary.
            errors.add(f"{vector_id} - {type(index)} with {index.get_num_periods()} periods and vector with size ({values.size}) do not match.")

        # validate negative and missing values
        negatives = values < 0
        if np.any(negatives):
            errors.add(f"{vector_id} contains {negatives.sum()} negative values.")
        nans = np.isnan(values)
        if np.any(nans):
            errors.add(f"{vector_id} contains {nans.sum()} nan values.")

        # validate that index is whole years if required
        if self._require_whole_years and not index.is_whole_years():
            errors.add(f"{vector_id} is required to contain whole years but its index ({index}) is not classified as is_whole_years.")

        # outside_unit_interval = ((0 <= values) & (values <= 1))
        # if self.is_zero_one_profile(vector_id) and outside_unit_interval.any():
        #     num_outside_range = outside_unit_interval.sum()
        #     errors.add(f"{vector_id} is classified as a zero one vector but contains {num_outside_range} values outside the range 0, 1.")

        # if not self.is_zero_one_profile(vector_id):
        #     ref_period = self.get_reference_period(vector_id)
        #     ref_start_date = ref_period.get_start_year()

        #     index = self.get_index(vector_id)

        return errors

    def _create_index(
        self,
        datetimes: list[datetime] | NDArray[np.datetime64],
        is_52_week_years: bool,
        extrapolate_first_point: bool,
        extrapolate_last_point: bool,
    ) -> ListTimeIndex | FixedFrequencyTimeIndex:
        """
        Check if the index has a fixed frequency and creates wither a Fixed- or List-TimeVectorIndex based on this.

        If a list index is created, the first datetime of the year following the actual final index is added as the end of the final period.
        For example:
            - Actual input index: [2023-1-2, 2029-12-31, 2035-1-1, 2040-1-2, 2050-1-3]
            - Output ListTimeIndex: [2023-1-2, 2029-12-31, 2035-1-1, 2040-1-2, 2050-1-3, 2051-1-2]

        """
        dt64_arrray = np.array(datetimes).astype("datetime64[us]")  # convert to microseconds to match resolution of python tatetime

        if dt64_arrray.size == 1 and extrapolate_first_point and extrapolate_last_point:
            return ConstantTimeIndex()

        diff_array = np.diff(dt64_arrray)  # get period durations between points
        unique_array = np.unique(diff_array)  # get unique durations

        if unique_array.size == 1 and dt64_arrray.size > 1:  # Fixed frequency and more than one value
            dt64_start: np.datetime64 = dt64_arrray[0]
            td64_period_duration: np.timedelta64 = unique_array[0]
            return FixedFrequencyTimeIndex(
                start_time=dt64_start.item(),
                period_duration=td64_period_duration.item(),
                num_periods=dt64_arrray.size,
                is_52_week_years=is_52_week_years,
                extrapolate_first_point=extrapolate_first_point,
                extrapolate_last_point=extrapolate_last_point,
            )

        # add end date to final period
        dt_list = datetimes if isinstance(datetimes, list) else datetimes.astype("datetime64[us]").astype(datetime).tolist()
        end_year = dt_list[-1].isocalendar().year + 1
        end_dt = datetime.fromisocalendar(end_year, 1, 1)

        if len(dt_list) == 1:
            start_dt = dt_list[0]
            period_duration = end_dt - start_dt
            return SinglePeriodTimeIndex(
                start_time=start_dt,
                period_duration=period_duration,
                is_52_week_years=is_52_week_years,
                extrapolate_first_point=extrapolate_first_point,
                extrapolate_last_point=extrapolate_last_point,
            )

        return ListTimeIndex(
            datetime_list=[*dt_list, end_dt],
            is_52_week_years=is_52_week_years,
            extrapolate_first_point=extrapolate_first_point,
            extrapolate_last_point=extrapolate_last_point,
        )
__init__(source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None) -> None

Initialize NVETimeVectorLoader with source and optional relative location.

Parameters:

Name Type Description Default
source Path | str

Path or string to the source file.

required
require_whole_years bool

Flag for validating that the time vectors in the source contain data for complete years.

required
relative_loc Path | str | None

Relative location, defaults to None.

None
Source code in framdata/loaders/NVETimeVectorLoader.py
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
def __init__(self, source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None) -> None:
    """
    Initialize NVETimeVectorLoader with source and optional relative location.

    Args:
        source (Path | str): Path or string to the source file.
        require_whole_years (bool): Flag for validating that the time vectors in the source contain data for complete years.
        relative_loc (Path | str | None, optional): Relative location, defaults to None.

    """
    super().__init__(source, relative_loc)

    self._data: dict[str, NDArray] = None
    self._meta: dict[str, bool | int | str | datetime | timedelta | tzinfo] = None

    self._require_whole_years = require_whole_years
get_reference_period(vector_id: str) -> ReferencePeriod | None

Get Reference perod from metadata.

Parameters:

Name Type Description Default
vector_id str

Not used.

required

Raises:

Type Description
ValueError

If only one of start year or number of years are set in metadata.

Returns:

Type Description
ReferencePeriod | None

ReferencePeriod | None

Source code in framdata/loaders/NVETimeVectorLoader.py
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
def get_reference_period(self, vector_id: str) -> ReferencePeriod | None:
    """
    Get Reference perod from metadata.

    Args:
        vector_id (str): Not used.

    Raises:
        ValueError: If only one of start year or number of years are set in metadata.

    Returns:
        ReferencePeriod | None

    """
    start_year = self.get_metadata(vector_id)[TvMn.REF_PERIOD_START_YEAR]
    num_years = self.get_metadata(vector_id)[TvMn.REF_PERIOD_NUM_YEARS]

    ref_period = None
    if start_year and num_years:
        ref_period = ReferencePeriod(start_year=start_year, num_years=num_years)
    elif start_year or num_years:
        message = (
            f"{self}: Both {TvMn.REF_PERIOD_START_YEAR} and {TvMn.REF_PERIOD_NUM_YEARS} must be provided for a valid reference period."
            "Alternatively, both must be None for undefined reference period."
        )
        raise ValueError(message)
    return ref_period
get_unit(vector_id: str) -> str

Get the unit of the given time vector.

Parameters:

Name Type Description Default
vector_id str

ID of a time vector. Not used since all time vectors in the NVE parquet files have the same unit.

required

Returns:

Name Type Description
str str

Unit of the time vector.

Source code in framdata/loaders/NVETimeVectorLoader.py
67
68
69
70
71
72
73
74
75
76
77
78
79
def get_unit(self, vector_id: str) -> str:
    """
    Get the unit of the given time vector.

    Args:
        vector_id (str): ID of a time vector. Not used since all time vectors in the NVE parquet files have the same
                         unit.

    Returns:
        str: Unit of the time vector.

    """
    return self.get_metadata(vector_id)[TvMn.UNIT]
is_max_level(vector_id: str) -> bool | None

Check if the time vector is classified as a max level vector.

Parameters:

Name Type Description Default
vector_id str

ID of the time vector.

required

Returns:

Type Description
bool | None

bool | None: True if max level, False otherwise, or None if not specified.

Source code in framdata/loaders/NVETimeVectorLoader.py
41
42
43
44
45
46
47
48
49
50
51
52
def is_max_level(self, vector_id: str) -> bool | None:
    """
    Check if the time vector is classified as a max level vector.

    Args:
        vector_id (str): ID of the time vector.

    Returns:
        bool | None: True if max level, False otherwise, or None if not specified.

    """
    return self.get_metadata(vector_id)[TvMn.IS_MAX_LEVEL]
is_zero_one_profile(vector_id: str) -> bool | None

Check if the time vector is classified as a zero-one profile vector.

Parameters:

Name Type Description Default
vector_id str

ID of the time vector.

required

Returns:

Type Description
bool | None

bool | None: True if zero-one profile, False otherwise, or None if not specified.

Source code in framdata/loaders/NVETimeVectorLoader.py
54
55
56
57
58
59
60
61
62
63
64
65
def is_zero_one_profile(self, vector_id: str) -> bool | None:
    """
    Check if the time vector is classified as a zero-one profile vector.

    Args:
        vector_id (str): ID of the time vector.

    Returns:
        bool | None: True if zero-one profile, False otherwise, or None if not specified.

    """
    return self.get_metadata(vector_id)[TvMn.IS_ZERO_ONE_PROFILE]
validate_vectors() -> None

Validate data in all vectors contained in the Loader.

Conditions validated
  • If vector contains negative values. (- If vector is a zero one profile and contains values outside the unit interval.) * not in use currently

Raises:

Type Description
ValueError

When conditions are violated.

Source code in framdata/loaders/NVETimeVectorLoader.py
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
def validate_vectors(self) -> None:
    """
    Validate data in all vectors contained in the Loader.

    Conditions validated:
        - If vector contains negative values.
        (- If vector is a zero one profile and contains values outside the unit interval.) * not in use currently

    Raises:
        ValueError: When conditions are violated.

    """
    errors = set()
    for vector_id in self.get_ids():
        errors |= self._validate_vector(vector_id)

    if errors:
        message = f"Found errors in {self}:"
        for e in errors:
            message += f"\n - {e}."

        raise ValueError(message)

curve_loaders

Contains class for loading Curve data from NVE yaml files.

NVEYamlCurveLoader

Bases: FileLoader, CurveLoader

Handle reading of Curve data from a yaml File of NVE specific format.

Source code in framdata/loaders/curve_loaders.py
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
class NVEYamlCurveLoader(FileLoader, CurveLoader):
    """Handle reading of Curve data from a yaml File of NVE specific format."""

    _SUPPORTED_SUFFIXES: ClassVar[list[str]] = [".yaml", ".yml"]

    def __init__(self, source: Path | str, relative_loc: Path | str | None = None) -> None:
        """
        Handle reading of curves from a single yaml file.

        Args:
            source (Path | str): Absolute Path to database or yaml file path.
            relative_loc (Optional[Union[Path, str]], optional): Path to yaml file relative to source. Defaults to None.

        """
        super().__init__(source, relative_loc)

        self._data = None
        self._x_meta: str = None
        self._y_meta: str = None

        self._x_label: str = None
        self._y_label: str = None

    def get_x_axis(self, curve_id: str) -> NDArray:
        """
        Get values of x axis.

        Args:
            curve_id (str): Unique id of the curve in the Loader source.

        Returns:
            NDArray: Numpy array with values of x axis.

        """
        if self._data is None:
            self._parse_file()
        return np.asarray(self._data[curve_id][self._x_label])

    def get_y_axis(self, curve_id: str) -> NDArray:
        """
        Get values of y axis.

        Args:
            curve_id (str): Unique id of the curve in the Loader source.

        Returns:
            NDArray: Numpy array with values of y axis.

        """
        if self._data is None:
            self._parse_file()
        return np.asarray(self._data[curve_id][self._y_label])

    def get_x_unit(self, curve_id: str) -> str:
        """
        Get the unit of the x axis for the specified curve.

        Args:
            curve_id (str): Unique id of the curve in the Loader source.

        Returns:
            str: Unit of the x axis.

        """
        if self._data is None:
            self._parse_file()
        return self._x_meta[YamlNames.unit]

    def get_y_unit(self, curve_id: str) -> str:
        """
        Get the unit of the y axis for the specified curve.

        Args:
            curve_id (str): Unique id of the curve in the Loader source.

        Returns:
            str: Unit of the y axis.

        """
        if self._data is None:
            self._parse_file()
        return self._y_meta[YamlNames.unit]

    def get_metadata(self, content_id: str) -> dict:
        """
        Retrieve metadata for the specified content ID.

        Args:
            content_id (str): Unique identifier for the content.

        Returns:
            dict: Metadata associated with the content.

        """
        if self._data is None:
            self._parse_file()
        return self._data[YamlNames.metadata_field]

    def _get_ids(self) -> list[str]:
        if self._content_ids is None:
            if self._data is None:
                self._parse_file()
            ids_list = list(self._data.keys())
            ids_list.remove(YamlNames.metadata_field)
            self._content_ids = ids_list
        return self._content_ids

    def _parse_file(self) -> None:
        with self.get_source().open(encoding=YamlNames.encoding) as f:
            d = yaml.safe_load(f)
            self._x_meta = d[YamlNames.metadata_field][YamlNames.x_field]
            self._y_meta = d[YamlNames.metadata_field][YamlNames.y_field]

            self._x_label = self._x_meta[YamlNames.attribute]
            self._y_label = self._y_meta[YamlNames.attribute]

            self._data = d

    def clear_cache(self) -> None:
        """Clear cached data."""
        self._data = None
        self._x_meta = None
        self._y_meta = None

        self._x_label = None
        self._y_label = None
__init__(source: Path | str, relative_loc: Path | str | None = None) -> None

Handle reading of curves from a single yaml file.

Parameters:

Name Type Description Default
source Path | str

Absolute Path to database or yaml file path.

required
relative_loc Optional[Union[Path, str]]

Path to yaml file relative to source. Defaults to None.

None
Source code in framdata/loaders/curve_loaders.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
def __init__(self, source: Path | str, relative_loc: Path | str | None = None) -> None:
    """
    Handle reading of curves from a single yaml file.

    Args:
        source (Path | str): Absolute Path to database or yaml file path.
        relative_loc (Optional[Union[Path, str]], optional): Path to yaml file relative to source. Defaults to None.

    """
    super().__init__(source, relative_loc)

    self._data = None
    self._x_meta: str = None
    self._y_meta: str = None

    self._x_label: str = None
    self._y_label: str = None
clear_cache() -> None

Clear cached data.

Source code in framdata/loaders/curve_loaders.py
132
133
134
135
136
137
138
139
def clear_cache(self) -> None:
    """Clear cached data."""
    self._data = None
    self._x_meta = None
    self._y_meta = None

    self._x_label = None
    self._y_label = None
get_metadata(content_id: str) -> dict

Retrieve metadata for the specified content ID.

Parameters:

Name Type Description Default
content_id str

Unique identifier for the content.

required

Returns:

Name Type Description
dict dict

Metadata associated with the content.

Source code in framdata/loaders/curve_loaders.py
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
def get_metadata(self, content_id: str) -> dict:
    """
    Retrieve metadata for the specified content ID.

    Args:
        content_id (str): Unique identifier for the content.

    Returns:
        dict: Metadata associated with the content.

    """
    if self._data is None:
        self._parse_file()
    return self._data[YamlNames.metadata_field]
get_x_axis(curve_id: str) -> NDArray

Get values of x axis.

Parameters:

Name Type Description Default
curve_id str

Unique id of the curve in the Loader source.

required

Returns:

Name Type Description
NDArray NDArray

Numpy array with values of x axis.

Source code in framdata/loaders/curve_loaders.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
def get_x_axis(self, curve_id: str) -> NDArray:
    """
    Get values of x axis.

    Args:
        curve_id (str): Unique id of the curve in the Loader source.

    Returns:
        NDArray: Numpy array with values of x axis.

    """
    if self._data is None:
        self._parse_file()
    return np.asarray(self._data[curve_id][self._x_label])
get_x_unit(curve_id: str) -> str

Get the unit of the x axis for the specified curve.

Parameters:

Name Type Description Default
curve_id str

Unique id of the curve in the Loader source.

required

Returns:

Name Type Description
str str

Unit of the x axis.

Source code in framdata/loaders/curve_loaders.py
67
68
69
70
71
72
73
74
75
76
77
78
79
80
def get_x_unit(self, curve_id: str) -> str:
    """
    Get the unit of the x axis for the specified curve.

    Args:
        curve_id (str): Unique id of the curve in the Loader source.

    Returns:
        str: Unit of the x axis.

    """
    if self._data is None:
        self._parse_file()
    return self._x_meta[YamlNames.unit]
get_y_axis(curve_id: str) -> NDArray

Get values of y axis.

Parameters:

Name Type Description Default
curve_id str

Unique id of the curve in the Loader source.

required

Returns:

Name Type Description
NDArray NDArray

Numpy array with values of y axis.

Source code in framdata/loaders/curve_loaders.py
52
53
54
55
56
57
58
59
60
61
62
63
64
65
def get_y_axis(self, curve_id: str) -> NDArray:
    """
    Get values of y axis.

    Args:
        curve_id (str): Unique id of the curve in the Loader source.

    Returns:
        NDArray: Numpy array with values of y axis.

    """
    if self._data is None:
        self._parse_file()
    return np.asarray(self._data[curve_id][self._y_label])
get_y_unit(curve_id: str) -> str

Get the unit of the y axis for the specified curve.

Parameters:

Name Type Description Default
curve_id str

Unique id of the curve in the Loader source.

required

Returns:

Name Type Description
str str

Unit of the y axis.

Source code in framdata/loaders/curve_loaders.py
82
83
84
85
86
87
88
89
90
91
92
93
94
95
def get_y_unit(self, curve_id: str) -> str:
    """
    Get the unit of the y axis for the specified curve.

    Args:
        curve_id (str): Unique id of the curve in the Loader source.

    Returns:
        str: Unit of the y axis.

    """
    if self._data is None:
        self._parse_file()
    return self._y_meta[YamlNames.unit]

time_vector_loaders

Contain classes for reading time vector data from various file types with formats specific to NVE.

This module provides
  • NVEExcelTimeVectorLoader: Handle time vectors in excel files.
  • NVEH5TimeVectorLoader: Handle time vectors in HDF5 files.
  • NVEYamlTimeVectorLoader: Handle time vectors in Yaml files.
  • NVEParquetTieVectorLoader: Handle time vectors in Parquet files.
NVEExcelTimeVectorLoader

Bases: NVETimeVectorLoader

Class for loading time vector data from NVE excel file sources.

Meant for short time vectors (e.g. yearly volumes or installed capacities) which are desireable to view and edit easily through Excel. Supports the followinf formats: - 'Horizontal': One column containing IDs, the other column names represents the index. Vector values as rows - 'Vertical': One column as index (DateTime), the oher columns names are vector IDs. Vectors as column values.

Source code in framdata/loaders/time_vector_loaders.py
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
class NVEExcelTimeVectorLoader(NVETimeVectorLoader):
    """
    Class for loading time vector data from NVE excel file sources.

    Meant for short time vectors (e.g. yearly volumes or installed capacities) which are desireable to view and edit easily through Excel.
    Supports the followinf formats:
        - 'Horizontal': One column containing IDs, the other column names represents the index. Vector values as rows
        - 'Vertical': One column as index (DateTime), the oher columns names are vector IDs. Vectors as column values.

    """

    _SUPPORTED_SUFFIXES: ClassVar[list] = [".xlsx"]
    _DATA_SHEET = "Data"
    _METADATA_SHEET = "Metadata"

    def __init__(self, source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None:
        """
        Intitialize loader instance and connect it to an Excel file containing time vector data.

        Args:
            source (Path | str): Absolute Path to database or excel file.
            require_whole_years (bool): Flag for validating that the time vectors in the source contain data for complete years.
            relative_loc (Path | str | None, optional): Path to excel file relative to source. Defaults to None.
            validate (bool, optional): Flag to turn on validation of timevectors. NB! Loads all data into memory at once. Defaults to True.

        """
        super().__init__(source, require_whole_years, relative_loc)
        self._index: TimeIndex = None

        if validate:
            self.validate_vectors()

    def get_unit(self, vector_id: str) -> str:
        """
        Get the unit of the given time vector.

        Args:
            vector_id (str): ID of a time vector. Not used since all time vectors in the NVE excel files have the same
                             unit.

        Returns:
            str: Unit of the time vector.

        """
        return self.get_metadata("")[TvMn.UNIT]

    def get_values(self, vector_id: str) -> NDArray:
        """
        Get numpy array with all the values of a given vector in the Loader's excel file.

        Args:
            vector_id (str): Unique id of the vector in the file.

        Returns:
            NDArray: Numpy array with values.

        """
        if self._data is None:
            self._data = pd.DataFrame()
        if vector_id not in self._data.columns:
            is_horizontal = self._is_horizontal_format()
            column_filter = [vector_id]
            usecols = None
            if not is_horizontal:
                usecols = column_filter

            values_df = pd.read_excel(self.get_source(), sheet_name=self._DATA_SHEET, usecols=usecols)

            if is_horizontal:  # Convert the table to large time series format
                values_df = self._process_horizontal_format(values_df)
                values_df = self._enforce_dtypes(values_df, is_horizontal)
                self._data = values_df
            else:
                values_df = self._enforce_dtypes(values_df, is_horizontal)
                self._data[vector_id] = values_df
        return self._data[vector_id].to_numpy()

    def get_index(self, vector_id: str) -> ListTimeIndex:
        """
        Get the TimeIndex describing the time dimension of the vectors in the file.

        Args:
            vector_id (str): Not used since all vectors in the NVE excel files have the same index.

        Returns:
            TimeIndex: TimeIndex object describing the excel file's index.

        """
        meta = self.get_metadata("")
        if self._index is None:
            self._index = self._create_index(
                self.get_values(TvMn.DATETIME_COL),
                is_52_week_years=meta[TvMn.IS_52_WEEK_YEARS],
                extrapolate_first_point=meta[TvMn.EXTRAPOLATE_FISRT_POINT],
                extrapolate_last_point=meta[TvMn.EXTRAPOLATE_LAST_POINT],
            )
        return self._index

    def get_metadata(self, vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]:
        """
        Read Excel file metadata.

        Args:
            vector_id (str): Not used.

        Raises:
            KeyError: If an expected metadata key is missing.

        Returns:
            dict[str, bool|int|str|datetime|timedelta|tzinfo|None]: Metadata dictionary.

        """
        if self._meta is None:
            path = self.get_source()
            raw_meta = pd.read_excel(path, sheet_name=self._METADATA_SHEET, na_values=[""]).replace([np.nan], [None]).to_dict("records")[0]

            self._meta = self._process_meta(raw_meta)
        return self._meta

    def _enforce_dtypes(self, values_df: pd.DataFrame | pd.Series, issmallformat: bool) -> pd.DataFrame:
        set_dtypes = "float"
        if isinstance(values_df, pd.DataFrame):
            set_dtypes = {c: "float" for c in values_df.columns if c != TvMn.DATETIME_COL}

        # ensure correct dtypes
        try:
            return values_df.astype(set_dtypes)
        except ValueError as e:
            index_column = TvMn.ID_COLUMN_NAME if issmallformat else TvMn.DATETIME_COL
            message = f"Error in {self} while reading file. All columns except '{index_column}' must consist of only float or integer numbers."
            raise RuntimeError(message) from e

    def _process_horizontal_format(self, horizontal_format_df: pd.DataFrame) -> pd.DataFrame:
        # We have to read the whole file to find the correct series

        # Rename the id column name and then transpose to get the correct format
        # Since the columns are counted as indices when transposing, we need to reset the index (but keep the DateTime
        # column)
        reformat_df = horizontal_format_df.rename(columns={TvMn.ID_COLUMN_NAME: TvMn.DATETIME_COL}).T.reset_index(drop=False)

        # after transposing, column names are set a the first row, which is DateTime, IDs
        reformat_df.columns = reformat_df.iloc[0]
        # We reindex by dropping the first row, thus removing the row of DateTime, IDs
        reformat_df = reformat_df.reindex(reformat_df.index.drop(0)).reset_index(drop=True)

        # Since It is possible to write only year or year-month as timestamp in the table,
        # we need to reformat to correct datetime format
        reformat_df[TvMn.DATETIME_COL] = self._to_iso_datetimes(reformat_df[TvMn.DATETIME_COL])

        return reformat_df

    def _to_iso_datetimes(self, series: pd.Series) -> list[datetime]:
        """
        Convert a series of dates to ISO datetime format.

        Args:
            series (pd.Series): Series which values will be converted to ISO format.

        Raises:
            RuntimeError: When an input value which cannot be converted is encountered.

        Returns:
            list[datetime]: List of formatted datetimes.

        """
        reformatted = []
        three_segments = 3
        two_segments = 2
        one_segment = 1
        for i in series:
            new_i = str(i)
            date_split = len(new_i.split("-"))
            space_split = len(new_i.split(" "))
            time_split = len(new_i.split(":"))
            try:
                if date_split == one_segment:  # Only year is defined
                    # get datetime for first week first day
                    new_i = datetime.fromisocalendar(int(new_i), 1, 1)
                elif date_split == two_segments:
                    # Year and month is defined
                    new_i = datetime.strptime(new_i + "-01", "%Y-%m-%d")  # Add first day
                elif date_split == three_segments and space_split == one_segment and time_split == one_segment:
                    # days defined but not time
                    new_i = datetime.strptime(new_i, "%Y-%m-%d")
                elif date_split == three_segments and space_split == two_segments and time_split == one_segment:
                    new_i = datetime.strptime(new_i, "%Y-%m-%d %H")
                elif date_split == three_segments and space_split == two_segments and time_split == two_segments:
                    new_i = datetime.strptime(new_i, "%Y-%m-%d %H:%M")
                elif date_split == three_segments and space_split == two_segments and time_split == three_segments:
                    # Assume time is defined
                    new_i = datetime.strptime(new_i, "%Y-%m-%d %H:%M:%S")
                else:
                    msg = f"Could not convert value '{new_i}' to datetime format."
                    raise ValueError(msg)
            except Exception as e:
                msg = f"Loader {self} could not convert value '{new_i}' to datetime format. Check formatting, for example number of spaces."
                raise RuntimeError(msg) from e
            reformatted.append(new_i)
        return sorted(reformatted)

    def _is_horizontal_format(self) -> bool:
        """Determine if the file strucure is the NVE small format."""
        column_names = pd.read_excel(self.get_source(), nrows=0, sheet_name=self._DATA_SHEET).columns.tolist()
        return TvMn.ID_COLUMN_NAME in column_names

    def _get_ids(self) -> list[str]:
        if self._content_ids is not None:
            return self._content_ids
        try:
            if self._is_horizontal_format():
                self._content_ids = pd.read_excel(
                    self.get_source(),
                    usecols=[TvMn.ID_COLUMN_NAME],
                    sheet_name=self._DATA_SHEET,
                )[TvMn.ID_COLUMN_NAME].tolist()
            else:
                columns_list = pd.read_excel(self.get_source(), nrows=0, sheet_name=self._DATA_SHEET).columns.tolist()
                columns_list.remove(TvMn.DATETIME_COL)
                self._content_ids = columns_list
        except ValueError as e:
            message = f"{self}: found problem with TimeVector IDs."
            raise RuntimeError(message) from e

        return self._content_ids

    def clear_cache(self) -> None:
        """Clear cached data."""
        self._data = None
        self._meta = None
        self._index = None
__init__(source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None

Intitialize loader instance and connect it to an Excel file containing time vector data.

Parameters:

Name Type Description Default
source Path | str

Absolute Path to database or excel file.

required
require_whole_years bool

Flag for validating that the time vectors in the source contain data for complete years.

required
relative_loc Path | str | None

Path to excel file relative to source. Defaults to None.

None
validate bool

Flag to turn on validation of timevectors. NB! Loads all data into memory at once. Defaults to True.

True
Source code in framdata/loaders/time_vector_loaders.py
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
def __init__(self, source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None:
    """
    Intitialize loader instance and connect it to an Excel file containing time vector data.

    Args:
        source (Path | str): Absolute Path to database or excel file.
        require_whole_years (bool): Flag for validating that the time vectors in the source contain data for complete years.
        relative_loc (Path | str | None, optional): Path to excel file relative to source. Defaults to None.
        validate (bool, optional): Flag to turn on validation of timevectors. NB! Loads all data into memory at once. Defaults to True.

    """
    super().__init__(source, require_whole_years, relative_loc)
    self._index: TimeIndex = None

    if validate:
        self.validate_vectors()
clear_cache() -> None

Clear cached data.

Source code in framdata/loaders/time_vector_loaders.py
255
256
257
258
259
def clear_cache(self) -> None:
    """Clear cached data."""
    self._data = None
    self._meta = None
    self._index = None
get_index(vector_id: str) -> ListTimeIndex

Get the TimeIndex describing the time dimension of the vectors in the file.

Parameters:

Name Type Description Default
vector_id str

Not used since all vectors in the NVE excel files have the same index.

required

Returns:

Name Type Description
TimeIndex ListTimeIndex

TimeIndex object describing the excel file's index.

Source code in framdata/loaders/time_vector_loaders.py
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
def get_index(self, vector_id: str) -> ListTimeIndex:
    """
    Get the TimeIndex describing the time dimension of the vectors in the file.

    Args:
        vector_id (str): Not used since all vectors in the NVE excel files have the same index.

    Returns:
        TimeIndex: TimeIndex object describing the excel file's index.

    """
    meta = self.get_metadata("")
    if self._index is None:
        self._index = self._create_index(
            self.get_values(TvMn.DATETIME_COL),
            is_52_week_years=meta[TvMn.IS_52_WEEK_YEARS],
            extrapolate_first_point=meta[TvMn.EXTRAPOLATE_FISRT_POINT],
            extrapolate_last_point=meta[TvMn.EXTRAPOLATE_LAST_POINT],
        )
    return self._index
get_metadata(vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]

Read Excel file metadata.

Parameters:

Name Type Description Default
vector_id str

Not used.

required

Raises:

Type Description
KeyError

If an expected metadata key is missing.

Returns:

Type Description
dict[str, bool | int | str | datetime | timedelta | tzinfo | None]

dict[str, bool|int|str|datetime|timedelta|tzinfo|None]: Metadata dictionary.

Source code in framdata/loaders/time_vector_loaders.py
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
def get_metadata(self, vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]:
    """
    Read Excel file metadata.

    Args:
        vector_id (str): Not used.

    Raises:
        KeyError: If an expected metadata key is missing.

    Returns:
        dict[str, bool|int|str|datetime|timedelta|tzinfo|None]: Metadata dictionary.

    """
    if self._meta is None:
        path = self.get_source()
        raw_meta = pd.read_excel(path, sheet_name=self._METADATA_SHEET, na_values=[""]).replace([np.nan], [None]).to_dict("records")[0]

        self._meta = self._process_meta(raw_meta)
    return self._meta
get_unit(vector_id: str) -> str

Get the unit of the given time vector.

Parameters:

Name Type Description Default
vector_id str

ID of a time vector. Not used since all time vectors in the NVE excel files have the same unit.

required

Returns:

Name Type Description
str str

Unit of the time vector.

Source code in framdata/loaders/time_vector_loaders.py
62
63
64
65
66
67
68
69
70
71
72
73
74
def get_unit(self, vector_id: str) -> str:
    """
    Get the unit of the given time vector.

    Args:
        vector_id (str): ID of a time vector. Not used since all time vectors in the NVE excel files have the same
                         unit.

    Returns:
        str: Unit of the time vector.

    """
    return self.get_metadata("")[TvMn.UNIT]
get_values(vector_id: str) -> NDArray

Get numpy array with all the values of a given vector in the Loader's excel file.

Parameters:

Name Type Description Default
vector_id str

Unique id of the vector in the file.

required

Returns:

Name Type Description
NDArray NDArray

Numpy array with values.

Source code in framdata/loaders/time_vector_loaders.py
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
def get_values(self, vector_id: str) -> NDArray:
    """
    Get numpy array with all the values of a given vector in the Loader's excel file.

    Args:
        vector_id (str): Unique id of the vector in the file.

    Returns:
        NDArray: Numpy array with values.

    """
    if self._data is None:
        self._data = pd.DataFrame()
    if vector_id not in self._data.columns:
        is_horizontal = self._is_horizontal_format()
        column_filter = [vector_id]
        usecols = None
        if not is_horizontal:
            usecols = column_filter

        values_df = pd.read_excel(self.get_source(), sheet_name=self._DATA_SHEET, usecols=usecols)

        if is_horizontal:  # Convert the table to large time series format
            values_df = self._process_horizontal_format(values_df)
            values_df = self._enforce_dtypes(values_df, is_horizontal)
            self._data = values_df
        else:
            values_df = self._enforce_dtypes(values_df, is_horizontal)
            self._data[vector_id] = values_df
    return self._data[vector_id].to_numpy()
NVEH5TimeVectorLoader

Bases: NVETimeVectorLoader

Class for loading time vector data from NVE HDF5 file sources.

Meant for large time vectors (e.g. hourly data over multiple years). Supports differing lengths and metadata of vectors stored in the file.

Specialized to the following format
  • index (h5py.Group, optional): Used to define indexes for vectors if index is supposed to only apply to that vector.
  • common_index (h5py.Dataset): Contains one numpy array for all vectors. This is a fallback index for vectors which have not defined their own index in the index group. Also used on purpose if many or all vectors have the same index.
  • metadata (h5py.Group): Used connect a specific set of metadata to a particular vector.
  • common_metadata (h5py.Group): Contains one set of metadata fields for all vectors. Used in a similar way as common_index.
  • vectors (h5py.Group): Contains numpy arrays containing the vector values connected to a unique ID. The same ID is used to connect the vector to an index or metadata.
Source code in framdata/loaders/time_vector_loaders.py
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
class NVEH5TimeVectorLoader(NVETimeVectorLoader):
    """
    Class for loading time vector data from NVE HDF5 file sources.

    Meant for large time vectors (e.g. hourly data over multiple years). Supports differing lengths and metadata of vectors stored in the file.

    Specialized to the following format:
        - index (h5py.Group, optional): Used to define indexes for vectors if index is supposed to only apply to that vector.
        - common_index (h5py.Dataset): Contains one numpy array for all vectors. This is a fallback index for vectors which have not defined their own index in
                                       the index group. Also used on purpose if many or all vectors have the same index.
        - metadata (h5py.Group): Used connect a specific set of metadata to a particular vector.
        - common_metadata (h5py.Group): Contains one set of metadata fields for all vectors. Used in a similar way as common_index.
        - vectors (h5py.Group): Contains numpy arrays containing the vector values connected to a unique ID. The same ID is used to connect the vector to an
                                index or metadata.

    """

    _SUPPORTED_SUFFIXES: ClassVar[list] = [".h5", ".hdf5"]

    def __init__(self, source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None:
        """
        Intitialize loader instance and connect it to a H5 file containing time vector data.

        Args:
            source (Path | str): Absolute Path to database or HDF5 file.
            require_whole_years (bool): Flag for validating that the time vectors in the source contain data for complete years.
            relative_loc (Path | str | None, optional): Path to HDF5 file relative to source. Defaults to None.
            validate (bool, optional): Whether to validate vectors after loading. NB! Loads all data into memory at once. Defaults to True.

        """
        super().__init__(source, require_whole_years, relative_loc)
        self._index: TimeIndex = None
        self._file_pointer = None

        if validate:
            self.validate_vectors()

    def get_values(self, vector_id: str) -> NDArray:
        """
        Get numpy array with all the values of a given vector in the Loader's HDF5 file.

        Args:
            vector_id (str): Unique id of the vector in the file.

        Returns:
            NDArray: Numpy array with values.

        """
        if self._data is None:
            self._data = dict()
        if vector_id not in self._data:
            with h5py.File(self.get_source(), mode="r") as h5f:
                self._data[vector_id] = self._read_vector_field(h5f, H5Names.VECTORS_GROUP, vector_id, field_type=h5py.Dataset, use_fallback=False)[()]
        return self._data[vector_id]

    def get_index(self, vector_id: str) -> TimeIndex:
        """
        Get the TimeIndex describing the time dimension of the vectors in the file.

        Args:
            vector_id (str): Not used since all vectors in the NVE parquet files have the same index.

        Returns:
            TimeIndex: TimeIndex object describing the parquet file's index.

        """
        if self._index is None:
            meta = self.get_metadata("")

            if TvMn.FREQUENCY not in meta or (TvMn.FREQUENCY in meta and meta[TvMn.FREQUENCY] is None):
                self._index = self._create_index(
                    datetimes=self._read_index(vector_id),
                    is_52_week_years=meta[TvMn.IS_52_WEEK_YEARS],
                    extrapolate_first_point=meta[TvMn.EXTRAPOLATE_FISRT_POINT],
                    extrapolate_last_point=meta[TvMn.EXTRAPOLATE_LAST_POINT],
                )
                return self._index
            index_array = self._read_index(vector_id) if meta[TvMn.START] is None or meta[TvMn.NUM_POINTS] is None else None
            start = meta[TvMn.START] if index_array is None else index_array[0].item()
            num_points = meta[TvMn.NUM_POINTS] if index_array is None else index_array.size

            self._index = FixedFrequencyTimeIndex(
                start,
                meta[TvMn.FREQUENCY],
                num_points,
                is_52_week_years=meta[TvMn.IS_52_WEEK_YEARS],
                extrapolate_first_point=meta[TvMn.EXTRAPOLATE_FISRT_POINT],
                extrapolate_last_point=meta[TvMn.EXTRAPOLATE_LAST_POINT],
            )

        return self._index

    def _read_index(self, vector_id: str) -> NDArray[np.datetime64]:
        with h5py.File(self.get_source(), mode="r") as h5f:
            decoded_index = np.char.decode(self._read_vector_field(h5f, H5Names.INDEX_GROUP, vector_id, h5py.Dataset)[()].astype(np.bytes_), encoding="utf-8")
            return decoded_index.astype(np.datetime64)

    def _read_vector_field(
        self,
        h5file: h5py.File,
        field_name: str,
        vector_id: str,
        field_type: type[h5py.Dataset | h5py.Group],
        use_fallback: bool = True,
    ) -> h5py.Dataset | h5py.Group:
        error = ""
        if field_name in h5file:  # check if group_name exists
            main_group = h5file[field_name]
            if not isinstance(main_group, h5py.Group):
                message = f"{self} expected '{field_name}' to be a {h5py.Group} in {h5file}. Got {type(main_group)}."
                raise TypeError(message)

            if vector_id in main_group:
                vector_field = main_group[vector_id]
                if not isinstance(vector_field, field_type):
                    message = f"{self} expected '{vector_id}' to be a {field_type} in {h5file}. Got {type(vector_field)}"
                    raise TypeError(message)
                return vector_field
            error = f"'{vector_id}' was not found in '{field_name}' group"
        else:
            error = f"'{field_name}' was not found in file"

        no_fallback_message = f"{self} expected '{vector_id}' in {h5py.Group} '{field_name}' "
        if not use_fallback:
            no_fallback_message += f"but {error}."
            raise KeyError(no_fallback_message)

        fallback_name = H5Names.COMMON_PREFIX + field_name
        if fallback_name in h5file:  # check if common_ + group_name exists
            fallback_field = h5file[fallback_name]
            if not isinstance(fallback_field, field_type):
                message = f"{self} expected '{fallback_field}' to be a {field_type} in {h5file}. Got {type(fallback_field)}."
                raise TypeError(message)
            return fallback_field

        message = (
            no_fallback_message
            + f"or a fallback {field_type} '{fallback_name}' in H5 file but "
            + f"{error},"
            + f" and fallback {field_type} '{fallback_name}' not found in file."
        )
        raise KeyError(message)

    def get_metadata(self, vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]:
        """
        Retrieve and decodes custom metadata from parquet file.

        Args:
            vector_id (str): Not used

        Raises:
            KeyError: If any of the expected metadata keys is not found in file.

        Returns:
            dict: Dictionary with decoded metadata.

        """
        if self._meta is None:
            errors = set()
            meta = {}
            with h5py.File(self.get_source(), mode="r") as h5f:
                meta_group = self._read_vector_field(h5f, H5Names.METADATA_GROUP, vector_id, h5py.Group)
                for k, m in meta_group.items():
                    if isinstance(m, h5py.Dataset):
                        meta[k] = m[()]
                    else:
                        errors.add(f"Improper metadata format: Metadata key {k} exists but is a h5 group when it should be a h5 dataset.")
            self._report_errors(errors)
            self._meta = self._process_meta(meta)
        return self._meta

    def _get_ids(self) -> list[str]:
        with h5py.File(self.get_source(), mode="r") as h5f:
            if H5Names.VECTORS_GROUP in h5f:
                return list(h5f[H5Names.VECTORS_GROUP].keys())
            message = f"{self} required key '{H5Names.VECTORS_GROUP}' was not found in file."
            raise KeyError(message)

    def clear_cache(self) -> None:
        """Clear cached data."""
        self._data = None
        self._meta = None
        self._index = None
__init__(source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None

Intitialize loader instance and connect it to a H5 file containing time vector data.

Parameters:

Name Type Description Default
source Path | str

Absolute Path to database or HDF5 file.

required
require_whole_years bool

Flag for validating that the time vectors in the source contain data for complete years.

required
relative_loc Path | str | None

Path to HDF5 file relative to source. Defaults to None.

None
validate bool

Whether to validate vectors after loading. NB! Loads all data into memory at once. Defaults to True.

True
Source code in framdata/loaders/time_vector_loaders.py
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
def __init__(self, source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None:
    """
    Intitialize loader instance and connect it to a H5 file containing time vector data.

    Args:
        source (Path | str): Absolute Path to database or HDF5 file.
        require_whole_years (bool): Flag for validating that the time vectors in the source contain data for complete years.
        relative_loc (Path | str | None, optional): Path to HDF5 file relative to source. Defaults to None.
        validate (bool, optional): Whether to validate vectors after loading. NB! Loads all data into memory at once. Defaults to True.

    """
    super().__init__(source, require_whole_years, relative_loc)
    self._index: TimeIndex = None
    self._file_pointer = None

    if validate:
        self.validate_vectors()
clear_cache() -> None

Clear cached data.

Source code in framdata/loaders/time_vector_loaders.py
440
441
442
443
444
def clear_cache(self) -> None:
    """Clear cached data."""
    self._data = None
    self._meta = None
    self._index = None
get_index(vector_id: str) -> TimeIndex

Get the TimeIndex describing the time dimension of the vectors in the file.

Parameters:

Name Type Description Default
vector_id str

Not used since all vectors in the NVE parquet files have the same index.

required

Returns:

Name Type Description
TimeIndex TimeIndex

TimeIndex object describing the parquet file's index.

Source code in framdata/loaders/time_vector_loaders.py
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
def get_index(self, vector_id: str) -> TimeIndex:
    """
    Get the TimeIndex describing the time dimension of the vectors in the file.

    Args:
        vector_id (str): Not used since all vectors in the NVE parquet files have the same index.

    Returns:
        TimeIndex: TimeIndex object describing the parquet file's index.

    """
    if self._index is None:
        meta = self.get_metadata("")

        if TvMn.FREQUENCY not in meta or (TvMn.FREQUENCY in meta and meta[TvMn.FREQUENCY] is None):
            self._index = self._create_index(
                datetimes=self._read_index(vector_id),
                is_52_week_years=meta[TvMn.IS_52_WEEK_YEARS],
                extrapolate_first_point=meta[TvMn.EXTRAPOLATE_FISRT_POINT],
                extrapolate_last_point=meta[TvMn.EXTRAPOLATE_LAST_POINT],
            )
            return self._index
        index_array = self._read_index(vector_id) if meta[TvMn.START] is None or meta[TvMn.NUM_POINTS] is None else None
        start = meta[TvMn.START] if index_array is None else index_array[0].item()
        num_points = meta[TvMn.NUM_POINTS] if index_array is None else index_array.size

        self._index = FixedFrequencyTimeIndex(
            start,
            meta[TvMn.FREQUENCY],
            num_points,
            is_52_week_years=meta[TvMn.IS_52_WEEK_YEARS],
            extrapolate_first_point=meta[TvMn.EXTRAPOLATE_FISRT_POINT],
            extrapolate_last_point=meta[TvMn.EXTRAPOLATE_LAST_POINT],
        )

    return self._index
get_metadata(vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]

Retrieve and decodes custom metadata from parquet file.

Parameters:

Name Type Description Default
vector_id str

Not used

required

Raises:

Type Description
KeyError

If any of the expected metadata keys is not found in file.

Returns:

Name Type Description
dict dict[str, bool | int | str | datetime | timedelta | tzinfo | None]

Dictionary with decoded metadata.

Source code in framdata/loaders/time_vector_loaders.py
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
def get_metadata(self, vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]:
    """
    Retrieve and decodes custom metadata from parquet file.

    Args:
        vector_id (str): Not used

    Raises:
        KeyError: If any of the expected metadata keys is not found in file.

    Returns:
        dict: Dictionary with decoded metadata.

    """
    if self._meta is None:
        errors = set()
        meta = {}
        with h5py.File(self.get_source(), mode="r") as h5f:
            meta_group = self._read_vector_field(h5f, H5Names.METADATA_GROUP, vector_id, h5py.Group)
            for k, m in meta_group.items():
                if isinstance(m, h5py.Dataset):
                    meta[k] = m[()]
                else:
                    errors.add(f"Improper metadata format: Metadata key {k} exists but is a h5 group when it should be a h5 dataset.")
        self._report_errors(errors)
        self._meta = self._process_meta(meta)
    return self._meta
get_values(vector_id: str) -> NDArray

Get numpy array with all the values of a given vector in the Loader's HDF5 file.

Parameters:

Name Type Description Default
vector_id str

Unique id of the vector in the file.

required

Returns:

Name Type Description
NDArray NDArray

Numpy array with values.

Source code in framdata/loaders/time_vector_loaders.py
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
def get_values(self, vector_id: str) -> NDArray:
    """
    Get numpy array with all the values of a given vector in the Loader's HDF5 file.

    Args:
        vector_id (str): Unique id of the vector in the file.

    Returns:
        NDArray: Numpy array with values.

    """
    if self._data is None:
        self._data = dict()
    if vector_id not in self._data:
        with h5py.File(self.get_source(), mode="r") as h5f:
            self._data[vector_id] = self._read_vector_field(h5f, H5Names.VECTORS_GROUP, vector_id, field_type=h5py.Dataset, use_fallback=False)[()]
    return self._data[vector_id]
NVEParquetTimeVectorLoader

Bases: NVETimeVectorLoader

Class for loading time vector data from NVE parquet file sources.

Meant for large time vectors. All vectors in the file must have the same lenghts and metadata. Supports format: - 'Vertical' with one index collumn (DateTime) and the others containing vector values.

Source code in framdata/loaders/time_vector_loaders.py
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
class NVEParquetTimeVectorLoader(NVETimeVectorLoader):
    """
    Class for loading time vector data from NVE parquet file sources.

    Meant for large time vectors. All vectors in the file must have the same lenghts and metadata.
    Supports format:
        - 'Vertical' with one index collumn (DateTime) and the others containing vector values.

    """

    _SUPPORTED_SUFFIXES: ClassVar[list] = [".parquet"]

    def __init__(self, source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None:
        """
        Intitialize loader instance and connect it to an Parquet file containing time vector data.

        Args:
            source (Path | str): Absolute Path to database or parquet file.
            require_whole_years (bool): Flag for validating that the time vectors in the source contain data for complete years.
            relative_loc (Path | str | None, optional): Path to parquet file relative to source. Defaults to None.
            validate (bool, optional): Flag to turn on validation of timevectors. NB! Loads all data into memory at once. Defaults to True.

        """
        super().__init__(source, require_whole_years, relative_loc)
        self._index: TimeIndex = None
        if validate:
            self.validate_vectors()

    def get_values(self, vector_id: str) -> NDArray:
        """
        Get numpy array with all the values of a given vector in the Loader's parquet file.

        Args:
            vector_id (str): Unique id of the vector in the file.

        Returns:
            NDArray: Numpy array with values.

        """
        if self._data is None:
            self._data = dict()
        if vector_id not in self._data:
            table = pq.read_table(self.get_source(), columns=[vector_id])
            self._data[vector_id] = table[vector_id].to_numpy()
        # if self._data is None:
        #     self._data = pq.read_table(self.get_source())
        return self._data[vector_id]  # .to_numpy()

    def get_index(self, vector_id: str) -> TimeIndex:  # Could be more types of indexes?
        """
        Get the TimeIndex describing the time dimension of the vectors in the file.

        Args:
            vector_id (str): Not used since all vectors in the NVE parquet files have the same index.

        Returns:
            TimeIndex: TimeIndex object describing the parquet file's index.

        """
        if self._index is None:
            meta = self.get_metadata("")

            if TvMn.FREQUENCY not in meta or (TvMn.FREQUENCY in meta and meta[TvMn.FREQUENCY] is None):
                datetime_index = pd.DatetimeIndex(
                    pd.read_parquet(self.get_source(), columns=[TvMn.DATETIME_COL])[TvMn.DATETIME_COL],
                    tz=meta[TvMn.TIMEZONE],
                ).tolist()
                self._index = self._create_index(
                    datetimes=datetime_index,
                    is_52_week_years=meta[TvMn.IS_52_WEEK_YEARS],
                    extrapolate_first_point=meta[TvMn.EXTRAPOLATE_FISRT_POINT],
                    extrapolate_last_point=meta[TvMn.EXTRAPOLATE_LAST_POINT],
                )
                return self._index

            parquet_file = None
            if TvMn.START not in meta or (TvMn.START in meta and meta[TvMn.START] is None):
                parquet_file = pq.ParquetFile(self.get_source())
                start = pd.to_datetime(next(parquet_file.iter_batches(batch_size=1, columns=[TvMn.DATETIME_COL])))
            else:
                start = meta[TvMn.START]

            if TvMn.NUM_POINTS not in meta or (TvMn.NUM_POINTS in meta and meta[TvMn.NUM_POINTS] is None):
                if parquet_file is None:
                    parquet_file = pq.ParquetFile(self.get_source())
                num_points = parquet_file.metadata.num_rows
            else:
                num_points = meta[TvMn.NUM_POINTS]
            self._index = FixedFrequencyTimeIndex(
                start,
                meta[TvMn.FREQUENCY],
                num_points,
                is_52_week_years=meta[TvMn.IS_52_WEEK_YEARS],
                extrapolate_first_point=meta[TvMn.EXTRAPOLATE_FISRT_POINT],
                extrapolate_last_point=meta[TvMn.EXTRAPOLATE_LAST_POINT],
            )

        return self._index

    def get_metadata(self, vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]:
        """
        Retrieve and decodes custom metadata from parquet file.

        Args:
            vector_id (str): Not used

        Raises:
            KeyError: If any of the expected metadata keys is not found in file.

        Returns:
            dict: Dictionary with decoded metadata.

        """
        if self._meta is None:
            path = self.get_source()
            raw_meta = pq.ParquetFile(path).schema_arrow.metadata

            self._meta = self._process_meta(raw_meta)
        return self._meta

    def _get_ids(self) -> list[str]:
        parquet_file = pq.ParquetFile(self.get_source())
        time_vector_ids: list[str] = parquet_file.schema_arrow.names
        time_vector_ids.remove(TvMn.DATETIME_COL)
        return time_vector_ids

    def clear_cache(self) -> None:
        """Clear cached data."""
        self._data = None
        self._meta = None
        self._index = None
__init__(source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None

Intitialize loader instance and connect it to an Parquet file containing time vector data.

Parameters:

Name Type Description Default
source Path | str

Absolute Path to database or parquet file.

required
require_whole_years bool

Flag for validating that the time vectors in the source contain data for complete years.

required
relative_loc Path | str | None

Path to parquet file relative to source. Defaults to None.

None
validate bool

Flag to turn on validation of timevectors. NB! Loads all data into memory at once. Defaults to True.

True
Source code in framdata/loaders/time_vector_loaders.py
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
def __init__(self, source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None:
    """
    Intitialize loader instance and connect it to an Parquet file containing time vector data.

    Args:
        source (Path | str): Absolute Path to database or parquet file.
        require_whole_years (bool): Flag for validating that the time vectors in the source contain data for complete years.
        relative_loc (Path | str | None, optional): Path to parquet file relative to source. Defaults to None.
        validate (bool, optional): Flag to turn on validation of timevectors. NB! Loads all data into memory at once. Defaults to True.

    """
    super().__init__(source, require_whole_years, relative_loc)
    self._index: TimeIndex = None
    if validate:
        self.validate_vectors()
clear_cache() -> None

Clear cached data.

Source code in framdata/loaders/time_vector_loaders.py
726
727
728
729
730
def clear_cache(self) -> None:
    """Clear cached data."""
    self._data = None
    self._meta = None
    self._index = None
get_index(vector_id: str) -> TimeIndex

Get the TimeIndex describing the time dimension of the vectors in the file.

Parameters:

Name Type Description Default
vector_id str

Not used since all vectors in the NVE parquet files have the same index.

required

Returns:

Name Type Description
TimeIndex TimeIndex

TimeIndex object describing the parquet file's index.

Source code in framdata/loaders/time_vector_loaders.py
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
def get_index(self, vector_id: str) -> TimeIndex:  # Could be more types of indexes?
    """
    Get the TimeIndex describing the time dimension of the vectors in the file.

    Args:
        vector_id (str): Not used since all vectors in the NVE parquet files have the same index.

    Returns:
        TimeIndex: TimeIndex object describing the parquet file's index.

    """
    if self._index is None:
        meta = self.get_metadata("")

        if TvMn.FREQUENCY not in meta or (TvMn.FREQUENCY in meta and meta[TvMn.FREQUENCY] is None):
            datetime_index = pd.DatetimeIndex(
                pd.read_parquet(self.get_source(), columns=[TvMn.DATETIME_COL])[TvMn.DATETIME_COL],
                tz=meta[TvMn.TIMEZONE],
            ).tolist()
            self._index = self._create_index(
                datetimes=datetime_index,
                is_52_week_years=meta[TvMn.IS_52_WEEK_YEARS],
                extrapolate_first_point=meta[TvMn.EXTRAPOLATE_FISRT_POINT],
                extrapolate_last_point=meta[TvMn.EXTRAPOLATE_LAST_POINT],
            )
            return self._index

        parquet_file = None
        if TvMn.START not in meta or (TvMn.START in meta and meta[TvMn.START] is None):
            parquet_file = pq.ParquetFile(self.get_source())
            start = pd.to_datetime(next(parquet_file.iter_batches(batch_size=1, columns=[TvMn.DATETIME_COL])))
        else:
            start = meta[TvMn.START]

        if TvMn.NUM_POINTS not in meta or (TvMn.NUM_POINTS in meta and meta[TvMn.NUM_POINTS] is None):
            if parquet_file is None:
                parquet_file = pq.ParquetFile(self.get_source())
            num_points = parquet_file.metadata.num_rows
        else:
            num_points = meta[TvMn.NUM_POINTS]
        self._index = FixedFrequencyTimeIndex(
            start,
            meta[TvMn.FREQUENCY],
            num_points,
            is_52_week_years=meta[TvMn.IS_52_WEEK_YEARS],
            extrapolate_first_point=meta[TvMn.EXTRAPOLATE_FISRT_POINT],
            extrapolate_last_point=meta[TvMn.EXTRAPOLATE_LAST_POINT],
        )

    return self._index
get_metadata(vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]

Retrieve and decodes custom metadata from parquet file.

Parameters:

Name Type Description Default
vector_id str

Not used

required

Raises:

Type Description
KeyError

If any of the expected metadata keys is not found in file.

Returns:

Name Type Description
dict dict[str, bool | int | str | datetime | timedelta | tzinfo | None]

Dictionary with decoded metadata.

Source code in framdata/loaders/time_vector_loaders.py
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
def get_metadata(self, vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]:
    """
    Retrieve and decodes custom metadata from parquet file.

    Args:
        vector_id (str): Not used

    Raises:
        KeyError: If any of the expected metadata keys is not found in file.

    Returns:
        dict: Dictionary with decoded metadata.

    """
    if self._meta is None:
        path = self.get_source()
        raw_meta = pq.ParquetFile(path).schema_arrow.metadata

        self._meta = self._process_meta(raw_meta)
    return self._meta
get_values(vector_id: str) -> NDArray

Get numpy array with all the values of a given vector in the Loader's parquet file.

Parameters:

Name Type Description Default
vector_id str

Unique id of the vector in the file.

required

Returns:

Name Type Description
NDArray NDArray

Numpy array with values.

Source code in framdata/loaders/time_vector_loaders.py
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
def get_values(self, vector_id: str) -> NDArray:
    """
    Get numpy array with all the values of a given vector in the Loader's parquet file.

    Args:
        vector_id (str): Unique id of the vector in the file.

    Returns:
        NDArray: Numpy array with values.

    """
    if self._data is None:
        self._data = dict()
    if vector_id not in self._data:
        table = pq.read_table(self.get_source(), columns=[vector_id])
        self._data[vector_id] = table[vector_id].to_numpy()
    # if self._data is None:
    #     self._data = pq.read_table(self.get_source())
    return self._data[vector_id]  # .to_numpy()
NVEYamlTimeVectoroader

Bases: NVETimeVectorLoader

Class for loading time vector data from NVE YAML file sources.

Meant for very sparse time vector data, where the vectors have varying lengths and indexes. Currently all vectors must have the same metadata within each file. Supported format: - Metadata: field containing dictionary with metadata for all vectors. - Other fields are vector IDs with lists for x and y axes.

Source code in framdata/loaders/time_vector_loaders.py
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
class NVEYamlTimeVectoroader(NVETimeVectorLoader):
    """
    Class for loading time vector data from NVE YAML file sources.

    Meant for very sparse time vector data, where the vectors have varying lengths and indexes. Currently all vectors must have the same metadata within each
    file.
    Supported format:
        - Metadata: field containing dictionary with metadata for all vectors.
        - Other fields are vector IDs with lists for x and y axes.

    """

    _SUPPORTED_SUFFIXES: ClassVar[list] = [".yaml", ".yml"]

    def __init__(self, source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None:
        """
        Intitialize loader instance and connect it to an Yaml file containing time vector data.

        Args:
            source (Path | str): Absolute Path to database or excel file.
            require_whole_years (bool): Flag for validating that the time vectors in the source contain data for complete years.
            relative_loc (Path | str | None, optional): Path to excel file relative to source. Defaults to None.
            validate (bool, optional): Flag to turn on validation of timevectors. NB! Loads all data into memory at once. Defaults to True.

        """
        super().__init__(source, require_whole_years, relative_loc)
        self._content_ids: list[str] = None

        self._values_label: str = None
        self._index_label: str = None

        if validate:
            self.validate_vectors()

    def get_values(self, vector_id: str) -> NDArray:
        """
        Get values of vector.

        Args:
            vector_id (str): Unique id of the curve in the Loader source.

        Returns:
            NDArray: Numpy array with values of vector.

        """
        if self._data is None:
            self._parse_file()
        values_list = self._data[vector_id][self._values_label]
        if len(values_list) == 0:
            message = f"Time vector {vector_id} in {self} contains no points."
            raise ValueError(message)
        return np.asarray(values_list)

    def get_index(self, vector_id: str) -> TimeIndex:
        """
        Get index of vector.

        Args:
            vector_id (str): Unique id of the curve in the Loader source.

        Returns:
            NDArray: Numpy array with index of vector.

        """
        meta = self.get_metadata(vector_id)  # also parses data
        try:
            datetime_list = [self._date_to_datetime(index_val) for index_val in self._data[vector_id][self._index_label]]
        except ValueError as e:
            message = f"{self} got non date or none datetime values in index field of vector {vector_id}."
            raise ValueError(message) from e

        if len(datetime_list) == 0:
            message = f"Index of {vector_id} in {self} contains no points."
            raise ValueError(message)

        if (len(datetime_list) == 1 or self.get_values(vector_id).size == 1) and meta[TvMn.EXTRAPOLATE_FISRT_POINT] and meta[TvMn.EXTRAPOLATE_LAST_POINT]:
            # Even though _create_index can now handle ConstantTimeIndexes,
            # we need to consider that YAML time vectors can have the extra end date for its final period stored in its index.
            # That would lead to _create_time_index not creating a constant one when it should.
            # We may remove this feature in the future.
            return ConstantTimeIndex()

        args = (
            datetime_list,
            meta[TvMn.IS_52_WEEK_YEARS],
            meta[TvMn.EXTRAPOLATE_FISRT_POINT],
            meta[TvMn.EXTRAPOLATE_LAST_POINT],
        )

        if len(datetime_list) == len(self.get_values(vector_id)) + 1:
            return ListTimeIndex(*args)
        # create index with added end datetime
        return self._create_index(*args)

    def get_metadata(self, vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]:
        """
        Read YAML file metadata.

        Args:
            vector_id (str): Not used.

        Raises:
            KeyError: If an expected metadata key is missing.

        Returns:
            dict[str, bool|int|str|datetime|timedelta|tzinfo|None]: Metadata dictionary.

        """
        if self._meta is None:
            raw_meta = self._data[YamlNames.metadata_field][YamlNames.x_field]

            self._meta = self._process_meta(raw_meta)
        return self._meta

    def _get_ids(self) -> list[str]:
        if self._content_ids is None:
            if self._data is None:
                self._parse_file()
            ids_list = list(self._data.keys())
            ids_list.remove(YamlNames.metadata_field)
            self._content_ids = ids_list
        return self._content_ids

    def _parse_file(self) -> None:
        with self.get_source().open(encoding=YamlNames.encoding) as f:
            d = yaml.safe_load(f)
            self._x_meta = d[YamlNames.metadata_field][YamlNames.x_field]
            self._y_meta = d[YamlNames.metadata_field][YamlNames.y_field]

            self._values_label = self._x_meta[YamlNames.attribute]
            self._index_label = self._y_meta[YamlNames.attribute]

            self._data = d

    def _date_to_datetime(self, value: date | datetime) -> datetime:
        if isinstance(value, date):
            value = datetime(value.year, value.month, value.day)
        elif not isinstance(value, datetime):
            message = "Value must be date or datetime."
            raise ValueError(message)
        return value

    def clear_cache(self) -> None:
        """Clear cached data."""
        self._data = None
        self._meta = None

        self._content_ids = None

        self._values_label = None
        self._index_label = None
__init__(source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None

Intitialize loader instance and connect it to an Yaml file containing time vector data.

Parameters:

Name Type Description Default
source Path | str

Absolute Path to database or excel file.

required
require_whole_years bool

Flag for validating that the time vectors in the source contain data for complete years.

required
relative_loc Path | str | None

Path to excel file relative to source. Defaults to None.

None
validate bool

Flag to turn on validation of timevectors. NB! Loads all data into memory at once. Defaults to True.

True
Source code in framdata/loaders/time_vector_loaders.py
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
def __init__(self, source: Path | str, require_whole_years: bool, relative_loc: Path | str | None = None, validate: bool = True) -> None:
    """
    Intitialize loader instance and connect it to an Yaml file containing time vector data.

    Args:
        source (Path | str): Absolute Path to database or excel file.
        require_whole_years (bool): Flag for validating that the time vectors in the source contain data for complete years.
        relative_loc (Path | str | None, optional): Path to excel file relative to source. Defaults to None.
        validate (bool, optional): Flag to turn on validation of timevectors. NB! Loads all data into memory at once. Defaults to True.

    """
    super().__init__(source, require_whole_years, relative_loc)
    self._content_ids: list[str] = None

    self._values_label: str = None
    self._index_label: str = None

    if validate:
        self.validate_vectors()
clear_cache() -> None

Clear cached data.

Source code in framdata/loaders/time_vector_loaders.py
589
590
591
592
593
594
595
596
597
def clear_cache(self) -> None:
    """Clear cached data."""
    self._data = None
    self._meta = None

    self._content_ids = None

    self._values_label = None
    self._index_label = None
get_index(vector_id: str) -> TimeIndex

Get index of vector.

Parameters:

Name Type Description Default
vector_id str

Unique id of the curve in the Loader source.

required

Returns:

Name Type Description
NDArray TimeIndex

Numpy array with index of vector.

Source code in framdata/loaders/time_vector_loaders.py
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
def get_index(self, vector_id: str) -> TimeIndex:
    """
    Get index of vector.

    Args:
        vector_id (str): Unique id of the curve in the Loader source.

    Returns:
        NDArray: Numpy array with index of vector.

    """
    meta = self.get_metadata(vector_id)  # also parses data
    try:
        datetime_list = [self._date_to_datetime(index_val) for index_val in self._data[vector_id][self._index_label]]
    except ValueError as e:
        message = f"{self} got non date or none datetime values in index field of vector {vector_id}."
        raise ValueError(message) from e

    if len(datetime_list) == 0:
        message = f"Index of {vector_id} in {self} contains no points."
        raise ValueError(message)

    if (len(datetime_list) == 1 or self.get_values(vector_id).size == 1) and meta[TvMn.EXTRAPOLATE_FISRT_POINT] and meta[TvMn.EXTRAPOLATE_LAST_POINT]:
        # Even though _create_index can now handle ConstantTimeIndexes,
        # we need to consider that YAML time vectors can have the extra end date for its final period stored in its index.
        # That would lead to _create_time_index not creating a constant one when it should.
        # We may remove this feature in the future.
        return ConstantTimeIndex()

    args = (
        datetime_list,
        meta[TvMn.IS_52_WEEK_YEARS],
        meta[TvMn.EXTRAPOLATE_FISRT_POINT],
        meta[TvMn.EXTRAPOLATE_LAST_POINT],
    )

    if len(datetime_list) == len(self.get_values(vector_id)) + 1:
        return ListTimeIndex(*args)
    # create index with added end datetime
    return self._create_index(*args)
get_metadata(vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]

Read YAML file metadata.

Parameters:

Name Type Description Default
vector_id str

Not used.

required

Raises:

Type Description
KeyError

If an expected metadata key is missing.

Returns:

Type Description
dict[str, bool | int | str | datetime | timedelta | tzinfo | None]

dict[str, bool|int|str|datetime|timedelta|tzinfo|None]: Metadata dictionary.

Source code in framdata/loaders/time_vector_loaders.py
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
def get_metadata(self, vector_id: str) -> dict[str, bool | int | str | datetime | timedelta | tzinfo | None]:
    """
    Read YAML file metadata.

    Args:
        vector_id (str): Not used.

    Raises:
        KeyError: If an expected metadata key is missing.

    Returns:
        dict[str, bool|int|str|datetime|timedelta|tzinfo|None]: Metadata dictionary.

    """
    if self._meta is None:
        raw_meta = self._data[YamlNames.metadata_field][YamlNames.x_field]

        self._meta = self._process_meta(raw_meta)
    return self._meta
get_values(vector_id: str) -> NDArray

Get values of vector.

Parameters:

Name Type Description Default
vector_id str

Unique id of the curve in the Loader source.

required

Returns:

Name Type Description
NDArray NDArray

Numpy array with values of vector.

Source code in framdata/loaders/time_vector_loaders.py
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
def get_values(self, vector_id: str) -> NDArray:
    """
    Get values of vector.

    Args:
        vector_id (str): Unique id of the curve in the Loader source.

    Returns:
        NDArray: Numpy array with values of vector.

    """
    if self._data is None:
        self._parse_file()
    values_list = self._data[vector_id][self._values_label]
    if len(values_list) == 0:
        message = f"Time vector {vector_id} in {self} contains no points."
        raise ValueError(message)
    return np.asarray(values_list)