Source code for mth5.io.nims.nims_collection

# -*- coding: utf-8 -*-
"""
LEMI 424 Collection
====================

Collection of TXT files combined into runs

Created on Wed Aug 31 10:32:44 2022

@author: jpeacock
"""

# =============================================================================
# Imports
# =============================================================================
import pandas as pd

from mth5.io.collection import Collection
from mth5.io.nims import NIMS

# =============================================================================


[docs]class NIMSCollection(Collection):
    """
    Collection of NIMS files into runs.


    .. code-block:: python

        >>> from mth5.io.nims import LEMICollection
        >>> lc = NIMSCollection(r"/path/to/single/lemi/station")
        >>> lc.station_id = "mt001"
        >>> lc.survey_id = "test_survey"
        >>> run_dict = lc.get_runs(1)


    """

    def __init__(self, file_path=None, **kwargs):
        super().__init__(file_path=file_path, **kwargs)
        self.file_ext = "bin"

        self.survey_id = "mt"

[docs]    def to_dataframe(
        self, sample_rates=[1], run_name_zeros=2, calibration_path=None
    ):
        """
        Create a data frame of each TXT file in a given directory.

        .. note:: This assumes the given directory contains a single station

        :param sample_rates: sample rate to get, will always be 1 for LEMI data
         defaults to [1]
        :type sample_rates: int or list, optional
        :param run_name_zeros: number of zeros to assing to the run name,
         defaults to 4
        :type run_name_zeros: int, optional
        :param calibration_path: path to calibration files, defaults to None
        :type calibration_path: string or Path, optional
        :return: Dataframe with information of each TXT file in the given
         directory.
        :rtype: :class:`pandas.DataFrame`

        :Example:

            >>> from mth5.io.lemi import LEMICollection
            >>> lc = LEMICollection("/path/to/single/lemi/station")
            >>> lemi_df = lc.to_dataframe()

        """

        dipole_list = []
        entries = []
        for fn in self.get_files(self.file_ext):
            nims_obj = NIMS(fn)
            nims_obj.read_header()

            entry = {}
            entry["survey"] = self.survey_id
            entry["station"] = nims_obj.station
            entry["run"] = nims_obj.run_id
            entry["start"] = nims_obj.start_time.isoformat()
            entry["end"] = nims_obj.end_time.isoformat()
            entry["channel_id"] = 1
            entry["component"] = ",".join(
                ["hx", "hy", "hz", "ex", "ey", "temperature"]
            )
            entry["fn"] = fn
            entry["sample_rate"] = nims_obj.sample_rate
            entry["file_size"] = nims_obj.file_size
            entry["n_samples"] = nims_obj.n_samples
            entry["sequence_number"] = 0
            entry["instrument_id"] = "NIMS"
            entry["calibration_fn"] = None

            entries.append(entry)

            dipole_list.append(nims_obj.ex_length)
            dipole_list.append(nims_obj.ey_length)

        # make pandas dataframe and set data types
        df = self._sort_df(
            self._set_df_dtypes(pd.DataFrame(entries)), run_name_zeros
        )

        return df

[docs]    def assign_run_names(self, df, zeros=2):
        """
        Assign run names assuming a row represents single station

        Run names are assigned as sr{sample_rate}_{run_number:0{zeros}}.

        :param df: Dataframe with the appropriate columns
        :type df: :class:`pandas.DataFrame`
        :param zeros: number of zeros in run name, defaults to 4
        :type zeros: int, optional
        :return: Dataframe with run names
        :rtype: :class:`pandas.DataFrame`

        """

        for station in df.station.unique():
            count = 1
            for row in (
                df[df.station == station].sort_values("start").itertuples()
            ):
                if row.run is None:
                    df.loc[
                        row.Index, "run"
                    ] = f"sr{row.sample_rate}_{count:0{zeros}}"
                df.loc[row.Index, "sequence_number"] = count
                count += 1

        return df