Source code for mth5.groups.survey
# -*- coding: utf-8 -*-
from __future__ import annotations
"""Survey-level HDF5 helpers for MTH5."""
from typing import Any
import h5py
# =============================================================================
# Imports
# =============================================================================
import numpy as np
import pandas as pd
from mt_metadata.timeseries import Survey
from mth5.groups import (
BaseGroup,
FiltersGroup,
MasterStationGroup,
ReportsGroup,
StandardsGroup,
)
from mth5.helpers import to_numpy_type, validate_name
from mth5.utils.exceptions import MTH5Error
# =============================================================================
# Survey Group
# =============================================================================
[docs]
class MasterSurveyGroup(BaseGroup):
"""Collection helper for surveys under ``Experiment/Surveys``.
Provides helpers to add, fetch, or remove surveys and to summarize all
channels in the experiment.
Examples
--------
>>> from mth5 import mth5
>>> m5 = mth5.MTH5()
>>> _ = m5.open_mth5("/tmp/example.mth5", mode="a")
>>> surveys = m5.surveys_group
>>> _ = surveys.add_survey("survey_01")
>>> surveys.channel_summary.head() # doctest: +SKIP
"""
def __init__(self, group: h5py.Group, **kwargs: Any) -> None:
super().__init__(group, **kwargs)
@property
[docs]
def channel_summary(self) -> pd.DataFrame:
"""Return a DataFrame summarizing all channels across surveys.
Returns
-------
pandas.DataFrame
Columns include survey, station, run, location, component,
start/end, sample info, orientation, units, and HDF5 reference.
Examples
--------
>>> summary = surveys.channel_summary
>>> set(summary.columns) >= {"survey", "station", "run", "component"}
True
"""
ch_list = []
for survey in self.groups_list:
survey_group = self.get_survey(survey)
for station in survey_group.stations_group.groups_list:
station_group = survey_group.stations_group.get_station(station)
for run in station_group.groups_list:
run_group = station_group.get_run(run)
for ch in run_group.groups_list:
ch_dataset = run_group.get_channel(ch)
entry = np.array(
[
(
survey_group.metadata.id,
station_group.metadata.id,
run_group.metadata.id,
station_group.metadata.location.latitude,
station_group.metadata.location.longitude,
station_group.metadata.location.elevation,
ch_dataset.metadata.component,
ch_dataset.metadata.time_period.start,
ch_dataset.metadata.time_period.end,
ch_dataset.hdf5_dataset.size,
ch_dataset.metadata.sample_rate,
ch_dataset.metadata.type,
ch_dataset.metadata.measurement_azimuth,
ch_dataset.metadata.measurement_tilt,
ch_dataset.metadata.units,
ch_dataset.hdf5_dataset.ref,
)
],
dtype=np.dtype(
[
("survey", "U10"),
("station", "U10"),
("run", "U11"),
("latitude", float),
("longitude", float),
("elevation", float),
("component", "U20"),
("start", "datetime64[ns]"),
("end", "datetime64[ns]"),
("n_samples", int),
("sample_rate", float),
("measurement_type", "U12"),
("azimuth", float),
("tilt", float),
("units", "U25"),
("hdf5_reference", h5py.ref_dtype),
]
),
)
ch_list.append(entry)
ch_list = np.array(ch_list)
return pd.DataFrame(ch_list.flatten())
[docs]
def add_survey(
self, survey_name: str, survey_metadata: Survey | None = None
) -> "SurveyGroup":
"""Add or fetch a survey at ``/Experiment/Surveys/<name>``.
Parameters
----------
survey_name : str
Survey identifier; validated with ``validate_name``.
survey_metadata : Survey, optional
Metadata container used to seed the survey attributes.
Returns
-------
SurveyGroup
Wrapper for the created or existing survey.
Raises
------
ValueError
If ``survey_name`` is empty.
MTH5Error
If the provided metadata id conflicts with the group name.
Examples
--------
>>> survey = surveys.add_survey("survey_01")
>>> survey.metadata.id
'survey_01'
"""
if not survey_name:
raise ValueError("survey name is None, do not know what to name it")
survey_name = validate_name(survey_name)
try:
survey_group = self.hdf5_group.create_group(survey_name)
self.logger.debug(f"Created group {survey_group.name}")
if survey_metadata is None:
survey_metadata = Survey(id=survey_name)
else:
if validate_name(survey_metadata.id) != survey_name:
msg = (
f"survey group name {survey_name} must be same as "
f"survey id {survey_metadata.id.replace(' ', '_')}"
)
self.logger.error(msg)
raise MTH5Error(msg)
survey_obj = SurveyGroup(
survey_group,
survey_metadata=survey_metadata,
**self.dataset_options,
)
survey_obj.initialize_group()
except ValueError:
msg = f"survey {survey_name} already exists, returning existing group."
self.logger.info(msg)
survey_obj = self.get_survey(survey_name)
return survey_obj
[docs]
def get_survey(self, survey_name: str) -> "SurveyGroup":
"""Return an existing survey by name.
Parameters
----------
survey_name : str
Existing survey name.
Returns
-------
SurveyGroup
Wrapper for the requested survey.
Raises
------
MTH5Error
If the survey does not exist.
Examples
--------
>>> existing = surveys.get_survey("survey_01")
>>> existing.metadata.id
'survey_01'
"""
survey_name = validate_name(survey_name)
try:
return SurveyGroup(self.hdf5_group[survey_name], **self.dataset_options)
except KeyError:
msg = (
f"{survey_name} does not exist, "
+ "check survey_list for existing names"
)
self.logger.exception(msg)
raise MTH5Error(msg)
[docs]
def remove_survey(self, survey_name: str) -> None:
"""Delete a survey reference from the file.
Parameters
----------
survey_name : str
Existing survey name.
Notes
-----
HDF5 deletion removes the reference only; storage is not reclaimed.
Examples
--------
>>> surveys.remove_survey("survey_01")
"""
survey_name = validate_name(survey_name)
try:
del self.hdf5_group[survey_name]
self.logger.info(
"Deleting a survey does not reduce the HDF5"
"file size it simply remove the reference. If "
"file size reduction is your goal, simply copy"
" what you want into another file."
)
except KeyError:
msg = f"{survey_name} does not exist, check survey_list for existing names"
self.logger.exception(msg)
raise MTH5Error(msg)
[docs]
class SurveyGroup(BaseGroup):
"""Wrapper for a single survey at ``Experiment/Surveys/<id>``.
Handles survey-level metadata, child groups (stations, reports, filters,
standards), and synchronization utilities.
Examples
--------
>>> survey = surveys.add_survey("survey_01")
>>> survey.metadata.id
'survey_01'
"""
def __init__(
self,
group: h5py.Group,
survey_metadata: Survey | None = None,
**kwargs: Any,
) -> None:
super().__init__(group, group_metadata=survey_metadata, **kwargs)
self._default_subgroup_names = [
"Stations",
"Reports",
"Filters",
"Standards",
]
[docs]
def initialize_group(self, **kwargs: Any) -> None:
"""Create default subgroups and write survey metadata.
Parameters
----------
**kwargs
Additional attributes to set on the instance before initialization.
Examples
--------
>>> survey.initialize_group()
"""
# need to make groups first because metadata pulls from them.
for group_name in self._default_subgroup_names:
self.hdf5_group.create_group(f"{group_name}")
m5_grp = getattr(self, f"{group_name.lower()}_group")
m5_grp.initialize_group()
for key, value in kwargs.items():
setattr(self, key, value)
self.write_metadata()
@BaseGroup.metadata.getter
[docs]
def metadata(self) -> Survey:
"""Survey metadata enriched with station and filter information."""
if not self._has_read_metadata:
self.read_metadata()
self._has_read_metadata = True
try:
if self.stations_group.groups_list != self._metadata.station_names:
for key in self.stations_group.groups_list:
try:
key_group = self.stations_group.get_station(key)
if key_group.metadata.id in self._metadata.stations.keys():
continue
# skip non-station groups like Features, FCs, TransferFunction
elif key_group.metadata.mth5_type.lower() not in ["station"]:
continue
self._metadata.add_station(key_group.metadata)
except MTH5Error:
self.logger.warning(f"Could not find station {key}")
except KeyError:
self.logger.debug(
"Stations Group does not exists yet. Metadata contains no station information"
)
try:
filters_group = self.filters_group
if list(filters_group.filter_dict.keys()) != list(
self._metadata.filters.keys()
):
for key in self.filters_group.filter_dict.keys():
try:
if key in self._metadata.filters.keys():
continue
filter_obj = filters_group.to_filter_object(key)
self._metadata.filters[key] = filter_obj
except MTH5Error:
self.logger.warning(f"Could not find filter {key}")
except KeyError:
self.logger.debug(
"Filters Group does not exists yet. Metadata contains no filter information"
)
return self._metadata
[docs]
def write_metadata(self) -> None:
"""Write HDF5 attributes from the survey metadata object."""
try:
for key, value in self._metadata.to_dict(single=True).items():
value = to_numpy_type(value)
self.logger.debug(f"wrote metadata {key} = {value}")
self.hdf5_group.attrs.create(key, value)
self._has_read_metadata = True
except KeyError as key_error:
if "no write intent" in str(key_error):
self.logger.warning("File is in read-only mode, cannot write metadata.")
else:
raise KeyError(key_error)
except ValueError as value_error:
if "Unable to synchronously create group" in str(value_error):
self.logger.warning("File is in read-only mode, cannot write metadata.")
else:
raise ValueError(value_error)
@property
[docs]
def stations_group(self) -> MasterStationGroup:
return MasterStationGroup(self.hdf5_group["Stations"])
@property
[docs]
def filters_group(self) -> FiltersGroup:
"""Convenience accessor for ``/Survey/Filters`` group."""
return FiltersGroup(self.hdf5_group["Filters"], **self.dataset_options)
@property
[docs]
def reports_group(self) -> ReportsGroup:
"""Convenience accessor for ``/Survey/Reports`` group."""
return ReportsGroup(self.hdf5_group["Reports"], **self.dataset_options)
@property
[docs]
def standards_group(self) -> StandardsGroup:
"""Convenience accessor for ``/Survey/Standards`` group."""
return StandardsGroup(self.hdf5_group["Standards"], **self.dataset_options)
[docs]
def update_survey_metadata(self, survey_dict: dict[str, Any] | None = None) -> None:
"""Deprecated alias for :py:meth:`update_metadata`.
Raises
------
DeprecationWarning
Always raised to direct callers to ``update_metadata``.
Examples
--------
>>> survey.update_survey_metadata() # doctest: +ELLIPSIS
Traceback (most recent call last):
...
DeprecationWarning: 'update_survey_metadata' has been deprecated use 'update_metadata()'
"""
raise DeprecationWarning(
"'update_survey_metadata' has been deprecated use 'update_metadata()'"
)
[docs]
def update_metadata(self, survey_dict: dict[str, Any] | None = None) -> None:
"""Synchronize survey metadata from station summaries.
Parameters
----------
survey_dict : dict, optional
Additional metadata values to merge before synchronization.
Notes
-----
Updates survey start/end dates and bounding box from station summaries,
then writes metadata to HDF5.
Examples
--------
>>> _ = survey.update_metadata()
>>> survey.metadata.time_period.start_date # doctest: +SKIP
'2020-01-01'
"""
station_summary = self.stations_group.station_summary.copy()
self.logger.debug("Updating survey metadata from stations summary table")
if survey_dict:
self.metadata.from_dict(survey_dict, skip_none=True)
if not len(station_summary): # if station info is empty df, skip parsing
self.write_metadata()
return
self._metadata.time_period.start_date = (
station_summary.start.min().isoformat().split("T")[0]
)
self._metadata.time_period.end_date = (
station_summary.end.max().isoformat().split("T")[0]
)
self._metadata.northwest_corner.latitude = station_summary.latitude.max()
self._metadata.northwest_corner.longitude = station_summary.longitude.min()
self._metadata.southeast_corner.latitude = station_summary.latitude.min()
self._metadata.southeast_corner.longitude = station_summary.longitude.max()
# metadata by default comes with stations and runs, need to remove those
# before writing the metadata.
self.write_metadata()