Source code for mth5.io.usgs_ascii.usgs_ascii_collection

# -*- coding: utf-8 -*-
"""
LEMI 424 Collection
====================

Collection of TXT files combined into runs

Created on Wed Aug 31 10:32:44 2022

@author: jpeacock
"""

# =============================================================================
# Imports
# =============================================================================
import pandas as pd

from mth5.io.collection import Collection
from mth5.io.usgs_ascii import USGSascii

# =============================================================================


[docs]class USGSasciiCollection(Collection): """ Collection of USGS ASCII files. .. code-block:: python >>> from mth5.io.usgs_ascii import USGSasciiCollection >>> lc = USGSasciiCollection(r"/path/to/ascii/files") >>> run_dict = lc.get_runs(1) """ def __init__(self, file_path=None, **kwargs): super().__init__(file_path=file_path, **kwargs) self.file_ext = "asc"
[docs] def to_dataframe( self, sample_rates=[4], run_name_zeros=4, calibration_path=None ): """ Create a data frame of each TXT file in a given directory. .. note:: If a run name is already present it will not be overwritten :param sample_rates: sample rate to get, defaults to [4] :type sample_rates: int or list, optional :param run_name_zeros: number of zeros to assing to the run name, defaults to 4 :type run_name_zeros: int, optional :param calibration_path: path to calibration files, defaults to None :type calibration_path: string or Path, optional :return: Dataframe with information of each TXT file in the given directory. :rtype: :class:`pandas.DataFrame` :Example: >>> from mth5.io.usgs_ascii import USGSasciiCollection >>> lc = USGSasciiCollection("/path/to/ascii/files") >>> ascii_df = lc.to_dataframe() """ entries = [] for fn in self.get_files(self.file_ext): asc_obj = USGSascii(fn) asc_obj.read_metadata() entry = {} entry["survey"] = asc_obj.survey_metadata.id entry["station"] = asc_obj.station_metadata.id entry["run"] = asc_obj.run_metadata.id entry["start"] = asc_obj.start entry["end"] = asc_obj.end entry["channel_id"] = 1 entry["component"] = ",".join( asc_obj.run_metadata.channels_recorded_all ) entry["fn"] = fn entry["sample_rate"] = asc_obj.sample_rate entry["file_size"] = asc_obj.file_size entry["n_samples"] = int(asc_obj.n_samples) entry["sequence_number"] = 0 entry["instrument_id"] = asc_obj.run_metadata.data_logger.id entry["calibration_fn"] = None entries.append(entry) # make pandas dataframe and set data types df = self._sort_df( self._set_df_dtypes(pd.DataFrame(entries)), run_name_zeros ) return df
[docs] def assign_run_names(self, df, zeros=4): """ Assign run names based on start and end times, checks if a file has the same start time as the last end time. Run names are assigned as sr{sample_rate}_{run_number:0{zeros}}. Only if the run name is not assigned already. :param df: Dataframe with the appropriate columns :type df: :class:`pandas.DataFrame` :param zeros: number of zeros in run name, defaults to 4 :type zeros: int, optional :return: Dataframe with run names :rtype: :class:`pandas.DataFrame` """ for station in df.station.unique(): count = 1 for row in ( df[df.station == station].sort_values("start").itertuples() ): if row.run is None: df.loc[ row.Index, "run" ] = f"sr{row.sample_rate}_{count:0{zeros}}" df.loc[row.Index, "sequence_number"] = count count += 1 return df