Source code for mth5.groups.estimate_dataset

# -*- coding: utf-8 -*-
"""
Created on Thu Mar 10 09:02:16 2022

@author: jpeacock
"""

# =============================================================================
# Imports
# =============================================================================
import weakref

import h5py
import numpy as np
import xarray as xr
from loguru import logger

from mt_metadata.transfer_functions.tf import StatisticalEstimate

from mth5.utils.exceptions import MTH5Error
from mth5.helpers import to_numpy_type

# =============================================================================


[docs]class EstimateDataset: """ Holds a statistical estimate This will hold multi-dimensional statistical estimates for transfer functions. :param dataset: hdf5 dataset :type dataset: h5py.Dataset :param dataset_metadata: data set metadata see :class:`mt_metadata.transfer_functions.tf.StatisticalEstimate`, defaults to None :type dataset_metadata: :class:`mt_metadata.transfer_functions.tf.StatisticalEstimate`, optional :param write_metadata: True to write metadata, defaults to True :type write_metadata: Boolean, optional :param **kwargs: DESCRIPTION :type **kwargs: TYPE :raises MTH5Error: When an estimate is not present, or metadata name does not match the given name """ def __init__(self, dataset, dataset_metadata=None, write_metadata=True, **kwargs): if dataset is not None and isinstance(dataset, (h5py.Dataset)): self.hdf5_dataset = weakref.ref(dataset)() self.logger = logger # set metadata to the appropriate class. Standards is not a # Base object so should be skipped. If the class name is not # defined yet set to Base class. self.metadata = StatisticalEstimate() if not hasattr(self.metadata, "mth5_type"): self._add_base_attributes() self.metadata.hdf5_reference = self.hdf5_dataset.ref self.metadata.mth5_type = self._class_name # if the input data set already has filled attributes, namely if the # channel data already exists then read them in with our writing back if "mth5_type" in list(self.hdf5_dataset.attrs.keys()): self.metadata.from_dict( {self.hdf5_dataset.attrs["mth5_type"]: self.hdf5_dataset.attrs} ) # if metadata is input, make sure that its the same class type amd write # to the hdf5 dataset if dataset_metadata is not None: if not isinstance(dataset_metadata, type(self.metadata)): msg = ( f"metadata must be type metadata.{self._class_name} not " "{type(dataset_metadata)}" ) self.logger.error(msg) raise MTH5Error(msg) # load from dict because of the extra attributes for MTH5 self.metadata.from_dict(dataset_metadata.to_dict()) self.metadata.hdf5_reference = self.hdf5_dataset.ref self.metadata.mth5_type = self._class_name # write out metadata to make sure that its in the file. if write_metadata: self.write_metadata() # if the attrs don't have the proper metadata keys yet write them if not "mth5_type" in list(self.hdf5_dataset.attrs.keys()): self.write_metadata() def _add_base_attributes(self): # add 2 attributes that will help with querying # 1) the metadata class name self.metadata.add_base_attribute( "mth5_type", self._class_name, { "type": str, "required": True, "style": "free form", "description": "type of group", "units": None, "options": [], "alias": [], "example": "group_name", "default": None, }, ) # 2) the HDF5 reference that can be used instead of paths self.metadata.add_base_attribute( "hdf5_reference", self.hdf5_dataset.ref, { "type": "h5py_reference", "required": True, "style": "free form", "description": "hdf5 internal reference", "units": None, "options": [], "alias": [], "example": "<HDF5 Group Reference>", "default": None, }, ) def __str__(self): return self.metadata.to_json() def __repr__(self): return self.__str__() @property def _class_name(self): return self.__class__.__name__.split("Dataset")[0]
[docs] def read_metadata(self): """ Read metadata from the HDF5 file into the metadata container, that way it can be validated. """ self.metadata.from_dict({self._class_name: self.hdf5_dataset.attrs})
[docs] def write_metadata(self): """ Write metadata from the metadata container to the HDF5 attrs dictionary. """ meta_dict = self.metadata.to_dict()[self.metadata._class_name.lower()] for key, value in meta_dict.items(): value = to_numpy_type(value) self.hdf5_dataset.attrs.create(key, value)
[docs] def replace_dataset(self, new_data_array): """ replace the entire dataset with a new one, nothing left behind :param new_data_array: new data array :type new_data_array: :class:`numpy.ndarray` """ if not isinstance(new_data_array, np.ndarray): try: new_data_array = np.array(new_data_array) except (ValueError, TypeError) as error: msg = f"{error} Input must be a numpy array not {type(new_data_array)}" self.logger.exception(msg) raise TypeError(msg) if new_data_array.shape != self.hdf5_dataset.shape: self.hdf5_dataset.resize(new_data_array.shape) self.hdf5_dataset[...] = new_data_array
[docs] def to_xarray(self, period): """ :return: an xarray DataArray with appropriate metadata and the appropriate coordinates. :rtype: :class:`xarray.DataArray` .. note:: that metadta will not be validated if changed in an xarray. loads from memory """ return xr.DataArray( data=self.hdf5_dataset[()], dims=["period", "output", "input"], name=self.metadata.name, coords=[ ("period", period), ("output", self.metadata.output_channels), ("input", self.metadata.input_channels), ], attrs=self.metadata.to_dict(single=True), )
[docs] def to_numpy(self): """ :return: a numpy structured array with :rtype: :class:`numpy.ndarray` loads into RAM """ return self.hdf5_dataset[()]
[docs] def from_numpy(self, new_estimate): """ :return: a numpy structured array :rtype: :class:`numpy.ndarray` .. note:: data is a builtin to numpy and cannot be used as a name loads into RAM """ if not isinstance(new_estimate, np.ndarray): try: new_estimate = np.array(new_estimate) except (ValueError, TypeError) as error: msg = f"{error} Input must be a numpy array not {type(new_estimate)}" self.logger.exception(msg) raise TypeError(msg) if new_estimate.dtype != self.hdf5_dataset.dtype: msg = f"Input array must be type {new_estimate.dtype} not {self.hdf5_dataset.dtype}" self.logger.error(msg) raise TypeError(msg) if new_estimate.shape != self.hdf5_dataset.shape: self.hdf5_dataset.resize(new_estimate.shape) self.hdf5_dataset[...] = new_estimate
[docs] def from_xarray(self, data): """ :return: an xarray DataArray with appropriate metadata and the appropriate coordinates base on the metadata. :rtype: :class:`xarray.DataArray` .. note:: that metadta will not be validated if changed in an xarray. loads from memory """ self.metadata.output_channels = data.coords["output"].values.tolist() self.metadata.input_channels = data.coords["input"].values.tolist() self.metadata.name = data.name self.metadata.data_type = data.dtype.name self.write_metadata() self.from_numpy(data.to_numpy())