# -*- coding: utf-8 -*-
"""
Created on Wed Dec 23 17:05:33 2020
:copyright:
Jared Peacock (jpeacock@usgs.gov)
:license: MIT
"""
# =============================================================================
# Imports
# =============================================================================
from __future__ import annotations
import inspect
from typing import Any, Optional
import numpy as np
from mt_metadata import timeseries
from mt_metadata.base import BaseDict
from mt_metadata.timeseries import filters
from mt_metadata.utils.summarize import summarize_standards
from mt_metadata.utils.validators import validate_attribute
from mth5 import STANDARDS_DTYPE
from mth5.groups.base import BaseGroup
from mth5.tables import MTH5Table
from mth5.utils.exceptions import MTH5TableError
[docs]
ts_classes = dict(inspect.getmembers(timeseries, inspect.isclass))
[docs]
flt_classes = dict(inspect.getmembers(filters, inspect.isclass))
# =============================================================================
# Summarize standards
# =============================================================================
# =============================================================================
# Standards Group
# =============================================================================
[docs]
class StandardsGroup(BaseGroup):
"""
Container for metadata standards documentation stored in the HDF5 file.
Stores metadata standards used throughout the survey in a standardized
summary table. This enables users to understand metadata directly from
the file without requiring external documentation.
The standards are organized in a summary table at ``/Survey/Standards/summary``
with columns for attribute name, type, requirements, style, units, and
descriptions.
Attributes
----------
summary_table : MTH5Table
The standards summary table with metadata definitions.
Notes
-----
Standards include definitions for:
- Survey, Station, Run, Electric, Magnetic, Auxiliary metadata
- Filter types: Coefficient, FIR, FrequencyResponseTable, PoleZero, TimeDelay
- Processing standards from aurora and fourier_coefficients modules
Examples
--------
>>> with MTH5('survey.mth5') as mth5_obj:
... standards = mth5_obj.standards_group
... summary = standards.summary_table
... print(summary.array.dtype.names)
('attribute', 'type', 'required', 'style', 'units', 'description', ...)
Get information about a specific attribute:
>>> standards.get_attribute_information('survey.release_license')
survey.release_license
--------------------------
type : string
required : True
style : controlled vocabulary
...
"""
def __init__(self, group: Any, **kwargs: Any) -> None:
"""
Initialize StandardsGroup.
Parameters
----------
group : h5py.Group
HDF5 group to manage standards data.
**kwargs : Any
Additional keyword arguments passed to BaseGroup.
"""
super().__init__(group, **kwargs)
self._defaults_summary_attrs = {
"name": "summary",
"max_shape": (1000,),
"dtype": STANDARDS_DTYPE,
}
self._modules = [
"common",
"timeseries",
"timeseries.filters",
"transfer_functions.tf",
"features",
"features.weights",
"processing",
"processing.fourier_coefficients",
"processing.aurora",
]
@property
[docs]
def summary_table(self) -> MTH5Table:
return self._get_summary_table()
def _get_summary_table(self) -> MTH5Table:
"""
Get the standards summary table from HDF5.
Returns
-------
MTH5Table
The MTH5Table object wrapping the standards summary dataset.
"""
return MTH5Table(self.hdf5_group["summary"], STANDARDS_DTYPE)
[docs]
def summary_table_from_dict(self, summary_dict: dict[str, Any]) -> None:
"""
Populate summary table from a dictionary of metadata standards.
Converts a flattened dictionary of metadata standards into rows
in the HDF5 summary table.
Parameters
----------
summary_dict : dict[str, Any]
Flattened dictionary of all metadata standards. Keys are
attribute names, values are dictionaries with type, required,
style, units, description, etc.
Notes
-----
Processes dictionary values:
- Lists are converted to comma-separated strings
- None values become empty strings
- Bytes are decoded to UTF-8
TODO
----
Adapt method to accept pandas.DataFrame as alternative input.
Examples
--------
>>> standards = StandardsGroup(group)
>>> metadata = summarize_metadata_standards()
>>> standards.summary_table_from_dict(metadata)
"""
for key, v_dict in summary_dict.items():
key_list = [key]
for dkey in self.summary_table.dtype.names[1:]:
value = v_dict[dkey]
if isinstance(value, list):
if len(value) == 0:
value = ""
else:
value = ",".join(["{0}".format(ii) for ii in value])
if value is None:
value = ""
key_list.append(value)
key_list = np.array([tuple(key_list)], self.summary_table.dtype)
index = self.summary_table.add_row(key_list)
self.logger.debug(f"Added {index} rows to Standards Group")
[docs]
def get_standards_summary(self, modules: Optional[list[str]] = None) -> np.ndarray:
"""
Get standards for specified metadata modules.
Retrieves and concatenates standards arrays from one or more
metadata modules for inclusion in the standards table.
Parameters
----------
modules : list[str], optional
List of module names to include (e.g., 'timeseries', 'filters').
If None, uses default modules: common, timeseries, timeseries.filters,
transfer_functions.tf, features, features.weights, processing,
processing.fourier_coefficients, processing.aurora.
Default is None.
Returns
-------
np.ndarray
Concatenated numpy structured array containing standards for all
requested modules with dtype matching STANDARDS_DTYPE.
Examples
--------
>>> standards = StandardsGroup(group)
>>> ts_standards = standards.get_standards_summary(['timeseries'])
>>> print(ts_standards.shape)
(45,)
Get all default modules:
>>> all_standards = standards.get_standards_summary()
"""
if modules is None:
modules = self._modules
summaries = []
for module in modules:
summaries.append(
summarize_standards(module, output_type="array", dtype=STANDARDS_DTYPE)
)
return np.concatenate(summaries)
[docs]
def summary_table_from_array(self, array: np.ndarray) -> None:
"""
Populate summary table from a numpy structured array.
Converts a structured numpy array into rows in the HDF5 summary table.
Parameters
----------
array : np.ndarray
Structured numpy array with dtype matching STANDARDS_DTYPE.
Each row represents one metadata attribute definition.
Notes
-----
Iterates through all rows of the structured array and adds them
sequentially to the summary table using add_row().
Examples
--------
>>> standards = StandardsGroup(group)
>>> standards_array = standards.get_standards_summary()
>>> standards.summary_table_from_array(standards_array)
"""
summary_table = self._get_summary_table()
for index, row in enumerate(np.nditer(array)):
index = summary_table.add_row(row)
self.logger.debug(f"Added {index} rows to Standards Group")
[docs]
def initialize_group(self) -> None:
"""
Initialize the standards group and create the summary table.
Creates the summary table dataset in the HDF5 file and populates it
with metadata standards from all default modules. Sets appropriate
HDF5 attributes and writes the group metadata.
Notes
-----
Initialization process:
1. Creates HDF5 dataset for summary table with maximum expandable shape
2. Applies compression if configured in dataset_options
3. Sets HDF5 attributes: type, last_updated, reference
4. Populates table with standards from all default modules
5. Writes group metadata to HDF5
The summary table uses STANDARDS_DTYPE and supports up to 1000 rows.
Examples
--------
>>> mth5_obj.initialize_group()
>>> summary_table = mth5_obj.standards_group.summary_table
>>> print(summary_table.array.shape)
(342,)
"""
if self.dataset_options["compression"] is None:
summary_dataset = self.hdf5_group.create_dataset(
self._defaults_summary_attrs["name"],
(0,),
maxshape=self._defaults_summary_attrs["max_shape"],
dtype=self._defaults_summary_attrs["dtype"],
)
else:
summary_dataset = self.hdf5_group.create_dataset(
self._defaults_summary_attrs["name"],
(0,),
maxshape=self._defaults_summary_attrs["max_shape"],
dtype=self._defaults_summary_attrs["dtype"],
**self.dataset_options,
)
summary_dataset.attrs.update(
{
"type": "summary table",
"last_updated": "date_time",
"reference": summary_dataset.ref,
}
)
self.logger.debug(
f"Created {self._defaults_summary_attrs['name']} table with "
f"max_shape = {self._defaults_summary_attrs['max_shape']}, "
"dtype={self._defaults_summary_attrs['dtype']}"
)
self.logger.debug(
"used options: "
"; ".join([f"{k} = {v}" for k, v in self.dataset_options.items()])
)
self.summary_table_from_array(self.get_standards_summary())
self.write_metadata()