Source code for mth5.io.lemi.lemi_collection

# -*- coding: utf-8 -*-
"""
LEMI 424 Collection
====================

Collection of TXT files combined into runs

Created on Wed Aug 31 10:32:44 2022

@author: jpeacock
"""

# =============================================================================
# Imports
# =============================================================================
import pandas as pd

from mth5.io.collection import Collection
from mth5.io.lemi import LEMI424

# =============================================================================


[docs]class LEMICollection(Collection): """ Collection of LEMI 424 files into runs based on start and end times. Will assign the run name as 'sr1_{index:0{zeros}}' --> 'sr1_0001' for `zeros` = 4. :param file_path: full path to single station LEMI424 directory :type file_path: string or :class`pathlib.Path` :param file_ext: extension of LEMI424 files, default is 'txt' :type file_ext: string :param station_id: station id :type station_id: string :param survey_id: survey id :type survey_id: string .. note:: This class assumes that the given file path contains a single LEMI station. If you want to do multiple stations merge the returned data frames. .. note:: LEMI data comes with little metadata about the station or survey, therefore you should assign `station_id` and `survey_id`. .. code-block:: python >>> from mth5.io.lemi import LEMICollection >>> lc = LEMICollection(r"/path/to/single/lemi/station") >>> lc.station_id = "mt001" >>> lc.survey_id = "test_survey" >>> run_dict = lc.get_runs(1) """ def __init__(self, file_path=None, **kwargs): super().__init__(file_path=file_path, **kwargs) self.file_ext = "txt" self.station_id = "mt001" self.survey_id = "mt"
[docs] def to_dataframe( self, sample_rates=[1], run_name_zeros=4, calibration_path=None ): """ Create a data frame of each TXT file in a given directory. .. note:: This assumes the given directory contains a single station :param sample_rates: sample rate to get, will always be 1 for LEMI data defaults to [1] :type sample_rates: int or list, optional :param run_name_zeros: number of zeros to assing to the run name, defaults to 4 :type run_name_zeros: int, optional :param calibration_path: path to calibration files, defaults to None :type calibration_path: string or Path, optional :return: Dataframe with information of each TXT file in the given directory. :rtype: :class:`pandas.DataFrame` :Example: >>> from mth5.io.lemi import LEMICollection >>> lc = LEMICollection("/path/to/single/lemi/station") >>> lemi_df = lc.to_dataframe() """ entries = [] for fn in self.get_files(self.file_ext): lemi_obj = LEMI424(fn) n_samples = int(lemi_obj.n_samples) lemi_obj.read_metadata() entry = {} entry["survey"] = self.survey_id entry["station"] = self.station_id entry["run"] = None entry["start"] = lemi_obj.start.isoformat() entry["end"] = lemi_obj.end.isoformat() entry["channel_id"] = 1 entry["component"] = ",".join( lemi_obj.run_metadata.channels_recorded_all ) entry["fn"] = fn entry["sample_rate"] = lemi_obj.sample_rate entry["file_size"] = lemi_obj.file_size entry["n_samples"] = n_samples entry["sequence_number"] = 0 entry["instrument_id"] = "LEMI424" entry["calibration_fn"] = None entries.append(entry) # make pandas dataframe and set data types df = self._sort_df( self._set_df_dtypes(pd.DataFrame(entries)), run_name_zeros ) return df
[docs] def assign_run_names(self, df, zeros=4): """ Assign run names based on start and end times, checks if a file has the same start time as the last end time. Run names are assigned as sr{sample_rate}_{run_number:0{zeros}}. :param df: Dataframe with the appropriate columns :type df: :class:`pandas.DataFrame` :param zeros: number of zeros in run name, defaults to 4 :type zeros: int, optional :return: Dataframe with run names :rtype: :class:`pandas.DataFrame` """ count = 1 for row in df.itertuples(): if row.Index == 0: df.loc[row.Index, "run"] = f"sr1_{count:0{zeros}}" previous_end = row.end else: if ( row.start - previous_end ).total_seconds() / row.sample_rate == row.sample_rate: df.loc[row.Index, "run"] = f"sr1_{count:0{zeros}}" else: count += 1 df.loc[row.Index, "run"] = f"sr1_{count:0{zeros}}" previous_end = row.end return df