Source code for mth5.groups.fc_dataset

# -*- coding: utf-8 -*-
"""
Created on Thu Mar 10 09:02:16 2022

@author: jpeacock
"""

# =============================================================================
# Imports
# =============================================================================
import weakref

import h5py
import numpy as np
import xarray as xr
from loguru import logger

from mth5.utils.exceptions import MTH5Error
from mth5.helpers import to_numpy_type
from mth5.timeseries.ts_helpers import make_dt_coordinates

from mt_metadata.transfer_functions.processing.fourier_coefficients import (
    Channel,
)


# =============================================================================


[docs]class FCChannelDataset: """ This will hold multi-dimensional set of Fourier Coefficients Columns - time - frequency [ integer as harmonic index or float ] - fc (complex) - weight_channel (maybe) - weight_band (maybe) - weight_time (maybe) Attributes: - name - start time - end time - acquistion_sample_rate - decimated_sample rate - window_sample_rate (delta_t within the window) - units - [optional] weights or masking - frequency method (integer * window length / delta_t of window) :param dataset: hdf5 dataset :type dataset: h5py.Dataset :param dataset_metadata: data set metadata see :class:`mt_metadata.transfer_functions.tf.StatisticalEstimate`, defaults to None :type dataset_metadata: :class:`mt_metadata.transfer_functions.tf.StatisticalEstimate`, optional :param write_metadata: True to write metadata, defaults to True :type write_metadata: Boolean, optional :param **kwargs: DESCRIPTION :type **kwargs: TYPE :raises MTH5Error: When an estimate is not present, or metadata name does not match the given name """ def __init__( self, dataset, dataset_metadata=None, write_metadata=True, **kwargs ): if dataset is not None and isinstance(dataset, (h5py.Dataset)): self.hdf5_dataset = weakref.ref(dataset)() self.logger = logger # set metadata to the appropriate class. Standards is not a # Base object so should be skipped. If the class name is not # defined yet set to Base class. self.metadata = Channel() if not hasattr(self.metadata, "mth5_type"): self._add_base_attributes() self.metadata.hdf5_reference = self.hdf5_dataset.ref self.metadata.mth5_type = self._class_name # if the input data set already has filled attributes, namely if the # channel data already exists then read them in with our writing back if "mth5_type" in list(self.hdf5_dataset.attrs.keys()): self.metadata.from_dict( {self.hdf5_dataset.attrs["mth5_type"]: self.hdf5_dataset.attrs} ) # if metadata is input, make sure that its the same class type amd write # to the hdf5 dataset if dataset_metadata is not None: if not isinstance(dataset_metadata, type(self.metadata)): msg = ( f"metadata must be type metadata.{self._class_name} not " "{type(dataset_metadata)}" ) self.logger.error(msg) raise MTH5Error(msg) # load from dict because of the extra attributes for MTH5 self.metadata.from_dict(dataset_metadata.to_dict()) self.metadata.hdf5_reference = self.hdf5_dataset.ref self.metadata.mth5_type = self._class_name # write out metadata to make sure that its in the file. if write_metadata: self.write_metadata() # if the attrs don't have the proper metadata keys yet write them if not "mth5_type" in list(self.hdf5_dataset.attrs.keys()): self.write_metadata() def _add_base_attributes(self): # add 2 attributes that will help with querying # 1) the metadata class name self.metadata.add_base_attribute( "mth5_type", self._class_name, { "type": str, "required": True, "style": "free form", "description": "type of group", "units": None, "options": [], "alias": [], "example": "group_name", "default": None, }, ) # 2) the HDF5 reference that can be used instead of paths self.metadata.add_base_attribute( "hdf5_reference", self.hdf5_dataset.ref, { "type": "h5py_reference", "required": True, "style": "free form", "description": "hdf5 internal reference", "units": None, "options": [], "alias": [], "example": "<HDF5 Group Reference>", "default": None, }, ) def __str__(self): return self.metadata.to_json() def __repr__(self): return self.__str__() @property def _class_name(self): return self.__class__.__name__.split("Dataset")[0]
[docs] def read_metadata(self): """ Read metadata from the HDF5 file into the metadata container, that way it can be validated. """ self.metadata.from_dict({self._class_name: self.hdf5_dataset.attrs})
[docs] def write_metadata(self): """ Write metadata from the metadata container to the HDF5 attrs dictionary. """ meta_dict = self.metadata.to_dict()[self.metadata._class_name.lower()] for key, value in meta_dict.items(): value = to_numpy_type(value) self.hdf5_dataset.attrs.create(key, value)
@property def n_windows(self): """number of time windows""" return self.hdf5_dataset.shape[0] @property def time(self): """ Time array that includes the start of each time window :return: DESCRIPTION :rtype: TYPE """ return make_dt_coordinates( self.metadata.time_period.start, 1.0 / self.metadata.sample_rate_window_step, self.n_windows, ) @property def n_frequencies(self): """number of frequencies (window size)""" return self.hdf5_dataset.shape[1] @property def frequency(self): """ frequency array dictated by window size and sample rate :return: DESCRIPTION :rtype: TYPE """ return np.linspace( 0, self.metadata.sample_rate_decimation_level / 2, self.n_frequencies, )
[docs] def replace_dataset(self, new_data_array): """ replace the entire dataset with a new one, nothing left behind :param new_data_array: new data array :type new_data_array: :class:`numpy.ndarray` """ if not isinstance(new_data_array, np.ndarray): try: new_data_array = np.array(new_data_array) except (ValueError, TypeError) as error: msg = f"{error} Input must be a numpy array not {type(new_data_array)}" self.logger.exception(msg) raise TypeError(msg) if new_data_array.shape != self.hdf5_dataset.shape: self.hdf5_dataset.resize(new_data_array.shape) self.hdf5_dataset[...] = new_data_array
[docs] def to_xarray(self): """ :return: an xarray DataArray with appropriate metadata and the appropriate coordinates. :rtype: :class:`xarray.DataArray` .. note:: that metadta will not be validated if changed in an xarray. loads from memory """ return xr.DataArray( data=self.hdf5_dataset[()], dims=["time", "frequency"], name=self.metadata.component, coords=[ ("time", self.time), ("frequency", self.frequency), ], attrs=self.metadata.to_dict(single=True), )
[docs] def to_numpy(self): """ :return: a numpy structured array with :rtype: :class:`numpy.ndarray` loads into RAM """ return self.hdf5_dataset[()]
[docs] def from_numpy(self, new_estimate): """ :return: a numpy structured array :rtype: :class:`numpy.ndarray` .. note:: data is a builtin to numpy and cannot be used as a name loads into RAM """ if not isinstance(new_estimate, np.ndarray): try: new_estimate = np.array(new_estimate) except (ValueError, TypeError) as error: msg = f"{error} Input must be a numpy array not {type(new_estimate)}" self.logger.exception(msg) raise TypeError(msg) if new_estimate.dtype != self.hdf5_dataset.dtype: msg = ( f"Input array must be type {new_estimate.dtype} not " "{self.hdf5_dataset.dtype}" ) self.logger.error(msg) raise TypeError(msg) if new_estimate.shape != self.hdf5_dataset.shape: self.hdf5_dataset.resize(new_estimate.shape) self.hdf5_dataset[...] = new_estimate
[docs] def from_xarray(self, data): """ :return: an xarray DataArray with appropriate metadata and the appropriate coordinates base on the metadata. :rtype: :class:`xarray.DataArray` .. note:: that metadta will not be validated if changed in an xarray. loads from memory """ self.metadata.time_period.start = data.time[0].values self.metadata.time_period.end = data.time[-1].values self.metadata.sample_rate_decimation_level = ( data.coords["frequency"].values.max() * 2 ) self.metadata.sample_rate_window_step = np.median( np.diff(data.coords["time"].values) ) / np.timedelta64(1, "s") self.metadata.component = data.name try: self.metadata.units = data.units except AttributeError: self.logger.debug("Could not find 'units' in xarray") self.write_metadata() self.from_numpy(data.to_numpy())