Source code for mth5.io.conversion

"""
Convert MTH5 to other formats

- MTH5 -> miniSEED + StationXML
"""

from __future__ import annotations

import datetime

# ==================================================================================
# Imports
# ==================================================================================
import re
from pathlib import Path
from typing import Any

from loguru import logger
from mt_metadata.timeseries.stationxml import XMLInventoryMTExperiment
from obspy import read
from obspy.core import UTCDateTime

from mth5.mth5 import MTH5


# ==================================================================================


[docs] class MTH5ToMiniSEEDStationXML: """ Convert MTH5 files to miniSEED and StationXML formats. This class provides functionality to convert magnetotelluric data stored in MTH5 format to industry-standard miniSEED time series files and StationXML metadata files for data exchange and archival purposes. Parameters ---------- mth5_path : str, Path, or None, default None Path to the input MTH5 file to be converted save_path : str, Path, or None, default None Directory path where output files will be saved. If None, uses the parent directory of mth5_path network_code : str, default "ZU" Two-character FDSN network code for the output files use_runs_with_data_only : bool, default True If True, only process runs that contain actual time series data **kwargs : dict Additional keyword arguments to set as instance attributes Attributes ---------- mth5_path : Path or None Path to the MTH5 input file save_path : Path Directory where output files are saved network_code : str FDSN network code for output files use_runs_with_data_only : bool Flag to process only runs with data encoding : str or None Encoding format for miniSEED files Examples -------- >>> converter = MTH5ToMiniSEEDStationXML( ... mth5_path="/path/to/data.h5", ... network_code="MT", ... save_path="/path/to/output" ... ) >>> xml_file, mseed_files = converter.convert_mth5_to_ms_stationxml() """ def __init__( self, mth5_path: str | Path | None = None, save_path: str | Path | None = None, network_code: str = "ZU", use_runs_with_data_only: bool = True, **kwargs: Any, ) -> None: """ Initialize MTH5 to miniSEED/StationXML converter. Parameters ---------- mth5_path : str, Path, or None, default None Path to the input MTH5 file to be converted save_path : str, Path, or None, default None Directory path where output files will be saved. If None, uses the parent directory of mth5_path network_code : str, default "ZU" Two-character FDSN network code for the output files use_runs_with_data_only : bool, default True If True, only process runs that contain actual time series data **kwargs : dict Additional keyword arguments to set as instance attributes """ self._network_code_pattern = r"^[a-zA-Z0-9]{2}$" self.mth5_path = mth5_path self.save_path = save_path self.network_code = network_code
[docs] self.use_runs_with_data_only = use_runs_with_data_only
[docs] self.encoding = None
for key, value in kwargs.items(): setattr(self, key, value) @property
[docs] def mth5_path(self) -> Path | None: """ Path to the MTH5 input file. Returns ------- Path or None Path to the MTH5 file to be converted, or None if not set. """ return self._mth5_path
@mth5_path.setter def mth5_path(self, value: str | Path | None) -> None: """ Set the MTH5 file path with validation. Parameters ---------- value : str, Path, or None Path to the MTH5 file. Must exist if not None. Raises ------ TypeError If value cannot be converted to a Path object. FileNotFoundError If the specified file does not exist. """ if value is None: self._mth5_path = None return try: value = Path(value) except Exception as error: raise TypeError(f"Could not convert value to Path: {error}") if not value.exists(): raise FileExistsError(f"Could not find {value}") self._mth5_path = value @property
[docs] def save_path(self) -> Path: """ Directory path where output files will be saved. Returns ------- Path Directory path for saving miniSEED and StationXML files. """ return self._save_path
@save_path.setter def save_path(self, value: str | Path | None) -> None: """ Set the save directory path with automatic creation. Parameters ---------- value : str, Path, or None Directory path where files will be saved. If None, uses the parent directory of mth5_path or current working directory. Notes ----- Creates the directory if it doesn't exist. """ """Set the save path, if None set to parent directory of mth5_path""" if value is None: if self._mth5_path is None: self._save_path = Path().cwd() else: self._save_path = self._mth5_path.parent else: self._save_path = Path(value) if not self._save_path.exists(): self._save_path.mkdir(exists_ok=True) @property
[docs] def network_code(self) -> str: """ Two-character FDSN network code. Returns ------- str Alphanumeric string of exactly 2 characters as required by FDSN DMC. """ return self._network_code
@network_code.setter def network_code(self, value: str) -> None: """ Set the FDSN network code with validation. Parameters ---------- value : str Two-character alphanumeric network code. Raises ------ ValueError If value is None or doesn't match the required 2-character pattern. Notes ----- Request temporary codes from https://www.fdsn.org/networks/request/temp/ """ if value is None: raise ValueError( "Must input a network code. " "Request a temporary code from https://www.fdsn.org/networks/request/temp/" ) if not re.match(self._network_code_pattern, value): raise ValueError( f"{value} is not a valid network code. It must be 2 alphanumeric characters" ) self._network_code = value @classmethod
[docs] def convert_mth5_to_ms_stationxml( cls, mth5_path: str | Path, save_path: str | Path | None = None, network_code: str = "ZU", use_runs_with_data_only: bool = True, **kwargs: Any, ) -> tuple[Path, list[Path]]: """ Convert an MTH5 file to miniSEED and StationXML formats. Class method that provides a convenient interface to convert MTH5 data to standard seismological formats for data exchange and archival. Parameters ---------- mth5_path : str or Path Path to the input MTH5 file to be converted save_path : str, Path, or None, default None Directory where output files will be saved. If None, uses the parent directory of mth5_path network_code : str, default "ZU" Two-character FDSN network code for output files use_runs_with_data_only : bool, default True If True, only process runs containing actual time series data **kwargs : dict Additional keyword arguments passed to converter initialization Returns ------- tuple[Path, list[Path]] Tuple containing: - Path to the generated StationXML file - List of paths to generated miniSEED files (one per day per channel) Examples -------- >>> xml_file, mseed_files = MTH5ToMiniSEEDStationXML.convert_mth5_to_ms_stationxml( ... "/path/to/data.h5", ... network_code="MT", ... save_path="/output/directory" ... ) >>> print(f"Created {len(mseed_files)} miniSEED files and {xml_file}") """ converter = cls( mth5_path=mth5_path, save_path=save_path, network_code=network_code, use_runs_with_data_only=use_runs_with_data_only, **kwargs, ) with MTH5() as m: m.open_mth5(converter.mth5_path) experiment = m.to_experiment(has_data=converter.use_runs_with_data_only) stream_list = [] for row in m.run_summary.itertuples(): if row.has_data: run_ts = m.from_reference(row.run_hdf5_reference).to_runts() if converter.encoding is None: encoding = get_encoding(run_ts) else: encoding = converter.encoding stream = run_ts.to_obspy_stream( network_code=converter.network_code, encoding=encoding ) # write to miniseed files stream_list += converter.split_ms_to_days( stream, converter.save_path, encoding ) # write StationXML experiment.surveys[0].fdsn.network = converter.network_code translator = XMLInventoryMTExperiment() xml_fn = converter.save_path.joinpath(f"{converter.mth5_path.stem}.xml") stationxml = translator.mt_to_xml( experiment, stationxml_fn=xml_fn, ) logger.info(f"Wrote StationXML to {xml_fn}") return xml_fn, stream_list
[docs] def split_ms_to_days(self, streams, save_path: Path, encoding: str) -> list[Path]: """ Split miniSEED traces into daily files. Splits continuous time series traces into separate files for each day to conform with standard seismological data archiving practices. Parameters ---------- streams : obspy.Stream Stream object containing traces to be split by day save_path : Path Directory where daily miniSEED files will be saved encoding : str Data encoding format for miniSEED files (e.g., 'INT32', 'FLOAT64') Returns ------- list[Path] List of paths to the generated daily miniSEED files Notes ----- Files are named using the pattern: {network}_{station}_{location}_{channel}_{YYYY_MM_DDTHH_MM_SS}.mseed """ fn_list = [] for tr in streams: start_time = tr.stats.starttime end_time = tr.stats.endtime # Split the trace by day current_time = start_time while current_time < end_time: next_day = UTCDateTime(current_time.date + datetime.timedelta(days=1)) if next_day > end_time: next_day = end_time # Slice the trace for the current day tr_day = tr.slice(current_time, next_day) # Generate the output file name output_file = save_path.joinpath( f"{tr.stats.network}_{tr.stats.station}_{tr.stats.location}_{tr.stats.channel}_{current_time.isoformat().replace('-', '_').replace(':', '_')}.mseed" ) logger.info(f"Wrote miniseed file to: {output_file}") fn_list.append(output_file) # Write the sliced trace to a new MiniSEED file tr_day.write(output_file, format="MSEED", reclen=256, encoding=encoding) # Move to the next day current_time = next_day return fn_list
[docs] def get_encoding(run_ts) -> str: """ Determine consistent data encoding for miniSEED files across channels. Analyzes data types across all channels in a run and selects a median encoding to ensure compatibility in miniSEED file generation. Parameters ---------- run_ts : RunTS Run time series object containing multiple channels of data Returns ------- str String identifier for miniSEED encoding format (e.g., 'INT32', 'FLOAT64') Notes ----- Uses median data type to handle mixed precision datasets. Automatically converts INT64 to INT32 for miniSEED compatibility since some readers don't support 64-bit integers. Examples -------- >>> encoding = get_encoding(run_timeseries) >>> print(f"Selected encoding: {encoding}") """ dtypes = [run_ts.dataset[ch].data.dtype.name for ch in run_ts.channels] encoding = sorted(dtypes)[int(len(dtypes) / 2)].upper() if encoding in ["INT64"]: encoding = "INT32" logger.warning("Casting INT64 to INT32") return encoding
[docs] def split_miniseed_by_day(input_file: str | Path) -> list[Path]: """ Split an existing miniSEED file into daily files. Utility function to split a multi-day miniSEED file into separate files for each calendar day, following standard seismological archiving practices. Parameters ---------- input_file : str or Path Path to the input miniSEED file to be split Returns ------- list[Path] List of paths to the generated daily miniSEED files Notes ----- Output files are named using the pattern: {network}.{station}.{location}.{channel}.{YYYY-MM-DD}.mseed Files are saved in the same directory as the input file. Examples -------- >>> daily_files = split_miniseed_by_day("/path/to/continuous.mseed") >>> print(f"Created {len(daily_files)} daily files") """ save_path = Path(input_file).parent # Read the MiniSEED file st = read(input_file) tr_list = [] # Iterate over each trace in the stream for tr in st: start_time = tr.stats.starttime end_time = tr.stats.endtime # Split the trace by day current_time = start_time while current_time < end_time: next_day = UTCDateTime(current_time.date + datetime.timedelta(days=1)) if next_day > end_time: next_day = end_time # Slice the trace for the current day tr_day = tr.slice(current_time, next_day) # Generate the output file name output_file = save_path.joinpath( f"{tr.stats.network}.{tr.stats.station}.{tr.stats.location}.{tr.stats.channel}.{current_time.date}.mseed" ) # Write the sliced trace to a new MiniSEED file tr_day.write(output_file, format="MSEED") tr_list.append(output_file) # Move to the next day current_time = next_day return tr_list