# -*- coding: utf-8 -*-
"""
Universal reader for magnetotelluric time series data files.
This module provides a plugin-like system for reading various MT data formats
and returning appropriate :class:`mth5.timeseries` objects. The reader
automatically detects file types and dispatches to the correct parser.
Plugin Structure
----------------
If you are writing your own reader, implement the following structure:
* Class object that reads the given file format
* A reader function named read_{file_type} (e.g., read_nims)
* Return value should be a :class:`mth5.timeseries.MTTS` or
:class:`mth5.timeseries.RunTS` object plus extra metadata as a
dictionary with keys formatted as {level.attribute}
Example Implementation
----------------------
.. code-block:: python
class NewFile:
def __init__(self, fn):
self.fn = fn
def read_header(self):
return header_information
def read_newfile(self):
ex, ey, hx, hy, hz = read_in_channels_as_MTTS
return RunTS([ex, ey, hx, hy, hz])
def read_newfile(fn):
new_file_obj = NewFile(fn)
run_obj = new_file_obj.read_newfile()
return run_obj, extra_metadata
Then add your reader to the readers dictionary for automatic detection.
See Also
--------
Existing readers in `mth5.io` for implementation guidance.
Created on Wed Aug 26 10:32:45 2020
:author: Jared Peacock
:license: MIT
"""
# =============================================================================
# Imports
# =============================================================================
from __future__ import annotations
from pathlib import Path
from typing import Any, Callable
from loguru import logger
from mth5.io import lemi, metronix, miniseed, nims, phoenix, usgs_ascii, zen
# =============================================================================
# Reader registry for MT data formats
# =============================================================================
[docs]
readers: dict[str, dict[str, Any]] = {
"zen": {"file_types": ["z3d"], "reader": zen.read_z3d},
"nims": {"file_types": ["bin", "bnn"], "reader": nims.read_nims},
"usgs_ascii": {
"file_types": ["asc", "zip"],
"reader": usgs_ascii.read_ascii,
},
"miniseed": {
"file_types": ["miniseed", "ms", "mseed"],
"reader": miniseed.read_miniseed,
},
"lemi424": {
"file_types": ["txt"],
"reader": lemi.read_lemi424,
},
"phoenix": {
"file_types": ["bin", "td_30", "td_150", "td_24k"],
"reader": phoenix.read_phoenix,
},
"metronix": {
"file_types": ["atss"],
"reader": metronix.read_atss,
},
}
[docs]
def get_reader(extension: str) -> tuple[str, Callable]:
"""
Get the appropriate reader function for a file extension.
Searches the reader registry to find the correct parser function
for the given file extension. Handles ambiguous extensions by
issuing warnings when multiple readers might apply.
Parameters
----------
extension : str
File extension (without the dot) to find a reader for
Returns
-------
tuple[str, Callable]
Tuple containing:
- Reader name (str): Identifier for the reader type
- Reader function (Callable): Function to parse files of this type
Raises
------
ValueError
If no reader is found for the given file extension
Examples
--------
>>> reader_name, reader_func = get_reader("z3d")
>>> print(reader_name) # "zen"
>>> data = reader_func("/path/to/file.z3d")
Notes
-----
Some extensions like "bin" are ambiguous and could match multiple
readers (NIMS or Phoenix). A warning is issued in such cases.
"""
if extension in ["bin"]:
logger.warning("Suggest inputing file type, bin could be nims or phoenix")
for key, vdict in readers.items():
if extension.lower() in vdict["file_types"]:
return key, vdict["reader"]
msg = f"Could not find a reader for file type {extension}"
logger.error(msg)
raise ValueError(msg)
[docs]
def read_file(
fn: str | Path | list[str | Path], file_type: str | None = None, **kwargs: Any
) -> Any:
"""
Universal reader for magnetotelluric time series data files.
Automatically detects the file type based on extension and dispatches
to the appropriate reader function. Supports both single files and
lists of files for multi-file formats.
Parameters
----------
fn : str, Path, or list of str/Path
Full path(s) to data file(s) to be read. For multi-file formats,
pass a list of file paths.
file_type : str, optional
Specific reader type to use if file extension is ambiguous.
Must be one of the keys in the readers registry, by default None
**kwargs : dict
Additional keyword arguments passed to the specific reader function.
Supported arguments depend on the file format and reader.
Returns
-------
MTTS or RunTS
Time series object containing the data:
- :class:`mth5.timeseries.MTTS` for single channel data
- :class:`mth5.timeseries.RunTS` for multi-channel run data
Raises
------
IOError
If any specified file does not exist
KeyError
If the specified file_type is not supported
ValueError
If no reader can be found for the file extension
Examples
--------
Read a single Z3D file (auto-detected)
>>> data = read_file("/path/to/station_001.z3d")
>>> print(type(data)) # <class 'mth5.timeseries.ChannelTS'>
Read with explicit file type for ambiguous extensions
>>> data = read_file("/path/to/data.bin", file_type="nims")
>>> print(data.n_channels)
Read multiple files for a multi-file format
>>> files = ["/path/to/file1.asc", "/path/to/file2.asc"]
>>> run_data = read_file(files, sample_rate=1.0)
Notes
-----
Supported file types and extensions:
- zen: .z3d (Zonge Z3D files)
- nims: .bin, .bnn (USGS NIMS files)
- usgs_ascii: .asc, .zip (USGS ASCII format)
- miniseed: .miniseed, .ms, .mseed (miniSEED format)
- lemi424: .txt (LEMI-424 format)
- phoenix: .bin, .td_30, .td_150, .td_24k (Phoenix formats)
- metronix: .atss (Metronix ADU format)
For ambiguous extensions like .bin, specify file_type explicitly.
"""
if isinstance(fn, (list, tuple)):
fn = [Path(ff) for ff in fn]
if not fn[0].exists():
msg = f"Could not find file {fn}. Check path."
logger.error(msg)
raise IOError(msg)
file_ext = fn[0].suffix[1:]
else:
fn = Path(fn)
if not fn.exists():
msg = f"Could not find file {fn}. Check path."
logger.error(msg)
raise IOError(msg)
file_ext = fn.suffix[1:]
if file_type is not None:
try:
file_reader = readers[file_type]["reader"]
except KeyError:
msg = (
f"Reader for the file type {file_type} does not currently exist. "
f"Current readers {list(readers.keys())}"
)
logger.error(msg)
raise KeyError(msg)
else:
file_type, file_reader = get_reader(file_ext)
return file_reader(fn, **kwargs)