Source code for mth5.groups.survey

# -*- coding: utf-8 -*-
from __future__ import annotations


"""Survey-level HDF5 helpers for MTH5."""

from typing import Any

import h5py

# =============================================================================
# Imports
# =============================================================================
import numpy as np
import pandas as pd
from mt_metadata.timeseries import Survey

from mth5.groups import (
    BaseGroup,
    FiltersGroup,
    MasterStationGroup,
    ReportsGroup,
    StandardsGroup,
)
from mth5.helpers import to_numpy_type, validate_name
from mth5.utils.exceptions import MTH5Error


# =============================================================================
# Survey Group
# =============================================================================

[docs]
class MasterSurveyGroup(BaseGroup):
    """Collection helper for surveys under ``Experiment/Surveys``.

    Provides helpers to add, fetch, or remove surveys and to summarize all
    channels in the experiment.

    Examples
    --------
    >>> from mth5 import mth5
    >>> m5 = mth5.MTH5()
    >>> _ = m5.open_mth5("/tmp/example.mth5", mode="a")
    >>> surveys = m5.surveys_group
    >>> _ = surveys.add_survey("survey_01")
    >>> surveys.channel_summary.head()  # doctest: +SKIP
    """

    def __init__(self, group: h5py.Group, **kwargs: Any) -> None:
        super().__init__(group, **kwargs)

    @property

[docs]
    def channel_summary(self) -> pd.DataFrame:
        """Return a DataFrame summarizing all channels across surveys.

        Returns
        -------
        pandas.DataFrame
            Columns include survey, station, run, location, component,
            start/end, sample info, orientation, units, and HDF5 reference.

        Examples
        --------
        >>> summary = surveys.channel_summary
        >>> set(summary.columns) >= {"survey", "station", "run", "component"}
        True
        """
        ch_list = []
        for survey in self.groups_list:
            survey_group = self.get_survey(survey)
            for station in survey_group.stations_group.groups_list:
                station_group = survey_group.stations_group.get_station(station)
                for run in station_group.groups_list:
                    run_group = station_group.get_run(run)
                    for ch in run_group.groups_list:
                        ch_dataset = run_group.get_channel(ch)
                        entry = np.array(
                            [
                                (
                                    survey_group.metadata.id,
                                    station_group.metadata.id,
                                    run_group.metadata.id,
                                    station_group.metadata.location.latitude,
                                    station_group.metadata.location.longitude,
                                    station_group.metadata.location.elevation,
                                    ch_dataset.metadata.component,
                                    ch_dataset.metadata.time_period.start,
                                    ch_dataset.metadata.time_period.end,
                                    ch_dataset.hdf5_dataset.size,
                                    ch_dataset.metadata.sample_rate,
                                    ch_dataset.metadata.type,
                                    ch_dataset.metadata.measurement_azimuth,
                                    ch_dataset.metadata.measurement_tilt,
                                    ch_dataset.metadata.units,
                                    ch_dataset.hdf5_dataset.ref,
                                )
                            ],
                            dtype=np.dtype(
                                [
                                    ("survey", "U10"),
                                    ("station", "U10"),
                                    ("run", "U11"),
                                    ("latitude", float),
                                    ("longitude", float),
                                    ("elevation", float),
                                    ("component", "U20"),
                                    ("start", "datetime64[ns]"),
                                    ("end", "datetime64[ns]"),
                                    ("n_samples", int),
                                    ("sample_rate", float),
                                    ("measurement_type", "U12"),
                                    ("azimuth", float),
                                    ("tilt", float),
                                    ("units", "U25"),
                                    ("hdf5_reference", h5py.ref_dtype),
                                ]
                            ),
                        )
                        ch_list.append(entry)
        ch_list = np.array(ch_list)
        return pd.DataFrame(ch_list.flatten())



[docs]
    def add_survey(
        self, survey_name: str, survey_metadata: Survey | None = None
    ) -> "SurveyGroup":
        """Add or fetch a survey at ``/Experiment/Surveys/<name>``.

        Parameters
        ----------
        survey_name : str
            Survey identifier; validated with ``validate_name``.
        survey_metadata : Survey, optional
            Metadata container used to seed the survey attributes.

        Returns
        -------
        SurveyGroup
            Wrapper for the created or existing survey.

        Raises
        ------
        ValueError
            If ``survey_name`` is empty.
        MTH5Error
            If the provided metadata id conflicts with the group name.

        Examples
        --------
        >>> survey = surveys.add_survey("survey_01")
        >>> survey.metadata.id
        'survey_01'
        """
        if not survey_name:
            raise ValueError("survey name is None, do not know what to name it")
        survey_name = validate_name(survey_name)
        try:
            survey_group = self.hdf5_group.create_group(survey_name)
            self.logger.debug(f"Created group {survey_group.name}")

            if survey_metadata is None:
                survey_metadata = Survey(id=survey_name)
            else:
                if validate_name(survey_metadata.id) != survey_name:
                    msg = (
                        f"survey group name {survey_name} must be same as "
                        f"survey id {survey_metadata.id.replace(' ', '_')}"
                    )
                    self.logger.error(msg)
                    raise MTH5Error(msg)
            survey_obj = SurveyGroup(
                survey_group,
                survey_metadata=survey_metadata,
                **self.dataset_options,
            )
            survey_obj.initialize_group()
        except ValueError:
            msg = f"survey {survey_name} already exists, returning existing group."
            self.logger.info(msg)
            survey_obj = self.get_survey(survey_name)
        return survey_obj



[docs]
    def get_survey(self, survey_name: str) -> "SurveyGroup":
        """Return an existing survey by name.

        Parameters
        ----------
        survey_name : str
            Existing survey name.

        Returns
        -------
        SurveyGroup
            Wrapper for the requested survey.

        Raises
        ------
        MTH5Error
            If the survey does not exist.

        Examples
        --------
        >>> existing = surveys.get_survey("survey_01")
        >>> existing.metadata.id
        'survey_01'
        """

        survey_name = validate_name(survey_name)

        try:
            return SurveyGroup(self.hdf5_group[survey_name], **self.dataset_options)
        except KeyError:
            msg = (
                f"{survey_name} does not exist, "
                + "check survey_list for existing names"
            )
            self.logger.exception(msg)
            raise MTH5Error(msg)



[docs]
    def remove_survey(self, survey_name: str) -> None:
        """Delete a survey reference from the file.

        Parameters
        ----------
        survey_name : str
            Existing survey name.

        Notes
        -----
        HDF5 deletion removes the reference only; storage is not reclaimed.

        Examples
        --------
        >>> surveys.remove_survey("survey_01")
        """

        survey_name = validate_name(survey_name)

        try:
            del self.hdf5_group[survey_name]
            self.logger.info(
                "Deleting a survey does not reduce the HDF5"
                "file size it simply remove the reference. If "
                "file size reduction is your goal, simply copy"
                " what you want into another file."
            )
        except KeyError:
            msg = f"{survey_name} does not exist, check survey_list for existing names"
            self.logger.exception(msg)
            raise MTH5Error(msg)





[docs]
class SurveyGroup(BaseGroup):
    """Wrapper for a single survey at ``Experiment/Surveys/<id>``.

    Handles survey-level metadata, child groups (stations, reports, filters,
    standards), and synchronization utilities.

    Examples
    --------
    >>> survey = surveys.add_survey("survey_01")
    >>> survey.metadata.id
    'survey_01'
    """

    def __init__(
        self,
        group: h5py.Group,
        survey_metadata: Survey | None = None,
        **kwargs: Any,
    ) -> None:
        super().__init__(group, group_metadata=survey_metadata, **kwargs)

        self._default_subgroup_names = [
            "Stations",
            "Reports",
            "Filters",
            "Standards",
        ]


[docs]
    def initialize_group(self, **kwargs: Any) -> None:
        """Create default subgroups and write survey metadata.

        Parameters
        ----------
        **kwargs
            Additional attributes to set on the instance before initialization.

        Examples
        --------
        >>> survey.initialize_group()
        """
        # need to make groups first because metadata pulls from them.
        for group_name in self._default_subgroup_names:
            self.hdf5_group.create_group(f"{group_name}")
            m5_grp = getattr(self, f"{group_name.lower()}_group")
            m5_grp.initialize_group()

        for key, value in kwargs.items():
            setattr(self, key, value)
        self.write_metadata()


    @BaseGroup.metadata.getter

[docs]
    def metadata(self) -> Survey:
        """Survey metadata enriched with station and filter information."""

        if not self._has_read_metadata:
            self.read_metadata()
            self._has_read_metadata = True

        try:
            if self.stations_group.groups_list != self._metadata.station_names:
                for key in self.stations_group.groups_list:
                    try:
                        key_group = self.stations_group.get_station(key)
                        if key_group.metadata.id in self._metadata.stations.keys():
                            continue
                        # skip non-station groups like Features, FCs, TransferFunction
                        elif key_group.metadata.mth5_type.lower() not in ["station"]:
                            continue
                        self._metadata.add_station(key_group.metadata)
                    except MTH5Error:
                        self.logger.warning(f"Could not find station {key}")
        except KeyError:
            self.logger.debug(
                "Stations Group does not exists yet. Metadata contains no station information"
            )

        try:
            filters_group = self.filters_group
            if list(filters_group.filter_dict.keys()) != list(
                self._metadata.filters.keys()
            ):
                for key in self.filters_group.filter_dict.keys():
                    try:
                        if key in self._metadata.filters.keys():
                            continue
                        filter_obj = filters_group.to_filter_object(key)
                        self._metadata.filters[key] = filter_obj
                    except MTH5Error:
                        self.logger.warning(f"Could not find filter {key}")

        except KeyError:
            self.logger.debug(
                "Filters Group does not exists yet. Metadata contains no filter information"
            )
        return self._metadata



[docs]
    def write_metadata(self) -> None:
        """Write HDF5 attributes from the survey metadata object."""

        try:
            for key, value in self._metadata.to_dict(single=True).items():
                value = to_numpy_type(value)
                self.logger.debug(f"wrote metadata {key} = {value}")
                self.hdf5_group.attrs.create(key, value)
            self._has_read_metadata = True
        except KeyError as key_error:
            if "no write intent" in str(key_error):
                self.logger.warning("File is in read-only mode, cannot write metadata.")
            else:
                raise KeyError(key_error)
        except ValueError as value_error:
            if "Unable to synchronously create group" in str(value_error):
                self.logger.warning("File is in read-only mode, cannot write metadata.")
            else:
                raise ValueError(value_error)


    @property

[docs]
    def stations_group(self) -> MasterStationGroup:
        return MasterStationGroup(self.hdf5_group["Stations"])


    @property

[docs]
    def filters_group(self) -> FiltersGroup:
        """Convenience accessor for ``/Survey/Filters`` group."""
        return FiltersGroup(self.hdf5_group["Filters"], **self.dataset_options)


    @property

[docs]
    def reports_group(self) -> ReportsGroup:
        """Convenience accessor for ``/Survey/Reports`` group."""
        return ReportsGroup(self.hdf5_group["Reports"], **self.dataset_options)


    @property

[docs]
    def standards_group(self) -> StandardsGroup:
        """Convenience accessor for ``/Survey/Standards`` group."""
        return StandardsGroup(self.hdf5_group["Standards"], **self.dataset_options)



[docs]
    def update_survey_metadata(self, survey_dict: dict[str, Any] | None = None) -> None:
        """Deprecated alias for :py:meth:`update_metadata`.

        Raises
        ------
        DeprecationWarning
            Always raised to direct callers to ``update_metadata``.

        Examples
        --------
        >>> survey.update_survey_metadata()  # doctest: +ELLIPSIS
        Traceback (most recent call last):
        ...
        DeprecationWarning: 'update_survey_metadata' has been deprecated use 'update_metadata()'
        """

        raise DeprecationWarning(
            "'update_survey_metadata' has been deprecated use 'update_metadata()'"
        )



[docs]
    def update_metadata(self, survey_dict: dict[str, Any] | None = None) -> None:
        """Synchronize survey metadata from station summaries.

        Parameters
        ----------
        survey_dict : dict, optional
            Additional metadata values to merge before synchronization.

        Notes
        -----
        Updates survey start/end dates and bounding box from station summaries,
        then writes metadata to HDF5.

        Examples
        --------
        >>> _ = survey.update_metadata()
        >>> survey.metadata.time_period.start_date  # doctest: +SKIP
        '2020-01-01'
        """

        station_summary = self.stations_group.station_summary.copy()
        self.logger.debug("Updating survey metadata from stations summary table")

        if survey_dict:
            self.metadata.from_dict(survey_dict, skip_none=True)

        if not len(station_summary):  # if station info is empty df, skip parsing
            self.write_metadata()
            return

        self._metadata.time_period.start_date = (
            station_summary.start.min().isoformat().split("T")[0]
        )
        self._metadata.time_period.end_date = (
            station_summary.end.max().isoformat().split("T")[0]
        )
        self._metadata.northwest_corner.latitude = station_summary.latitude.max()
        self._metadata.northwest_corner.longitude = station_summary.longitude.min()
        self._metadata.southeast_corner.latitude = station_summary.latitude.min()
        self._metadata.southeast_corner.longitude = station_summary.longitude.max()

        # metadata by default comes with stations and runs, need to remove those
        # before writing the metadata.
        self.write_metadata()