Source code for mth5.timeseries.ts_helpers

# -*- coding: utf-8 -*-
"""
Time series helper functions.

Provides utilities for time series processing including decimation planning,
significant figure counting, and datetime coordinate generation.

Notes
-----
Created on Wed Mar 29 14:27:22 2023

Author: jpeacock
"""

from __future__ import annotations

# =============================================================================
# Imports
# =============================================================================
import numpy as np
import pandas as pd
from loguru import logger
from mt_metadata.common.mttime import MTime


# =============================================================================


[docs] def get_decimation_sample_rates( old_sample_rate: float, new_sample_rate: float, max_decimation: int ) -> list[float]: """ Compute decimation steps from old to new sample rate. Generates a list of intermediate sample rates to decimate from `old_sample_rate` to `new_sample_rate` without exceeding `max_decimation` at any single step. Uses geometric series to distribute decimation evenly. Parameters ---------- old_sample_rate : float Original sample rate in samples per second. new_sample_rate : float Target sample rate in samples per second. max_decimation : int Maximum allowed decimation factor per step. Returns ------- list[float] List of intermediate sample rates for multi-stage decimation. If total decimation <= max_decimation, returns [new_sample_rate]. Examples -------- Single-step decimation (factor <= max):: >>> get_decimation_sample_rates(100, 10, 13) [10] Multi-step decimation (factor > max):: >>> get_decimation_sample_rates(1000, 10, 13) [77, 10] >>> # Decimates 1000 -> 77 -> 10 Three-step decimation:: >>> get_decimation_sample_rates(10000, 10, 13) [770, 60, 10] """ if (old_sample_rate / new_sample_rate) > max_decimation: # get the number of entries in a geometric series n_levels = int( np.ceil(np.log(old_sample_rate / new_sample_rate) / np.log(max_decimation)) ) # make a geometric series sr_list = [ int(np.ceil(old_sample_rate / max_decimation ** (ii))) for ii in range(1, n_levels) ] + [new_sample_rate] return sr_list return [new_sample_rate]
def _count_decimal_sig_figs(digits: float | int | str) -> int: """ Count decimal significant figures. Returns the number of significant figures in the fractional part of a number. Trailing zeros are ignored. Parameters ---------- digits : float | int | str Number or string representation of a number. Returns ------- int Number of significant decimal places (excluding trailing zeros). Examples -------- Count decimal places:: >>> _count_decimal_sig_figs(1.234) 3 >>> _count_decimal_sig_figs(0.00100) 3 >>> _count_decimal_sig_figs("1.500") 1 >>> _count_decimal_sig_figs(42) 0 """ _, _, fractional = str(digits).partition(".") return len(fractional.rstrip("0"))
[docs] def make_dt_coordinates( start_time: str | MTime | None, sample_rate: float | None, n_samples: int, end_time: str | MTime | None = None, ) -> pd.DatetimeIndex: """ Generate datetime coordinates for time series data. Creates a pandas DatetimeIndex with proper rounding based on sample rate precision. Handles edge cases like invalid sample rates and missing start times with sensible defaults. Parameters ---------- start_time : str | MTime | None Start time in ISO format or MTime object. If None, defaults to '1980-01-01T00:00:00'. sample_rate : float | None Sample rate in samples per second. If None or 0, defaults to 1 and issues a warning. n_samples : int Number of samples in the time series. Must be >= 1. end_time : str | MTime | None, default None End time in ISO format or MTime object. If None, computed from start_time, sample_rate, and n_samples. Returns ------- pandas.DatetimeIndex DatetimeIndex with `n_samples` timestamps, rounded to appropriate precision (ms, us, or ns) based on significant figures. Raises ------ ValueError If `n_samples` < 1. Warnings -------- UserWarning If sample_rate is invalid (None or 0) or start_time is None. Notes ----- - End time is calculated as: start_time + (n_samples - 1) / sample_rate - Rounding precision determined by significant figures in start time and sample period (1 / sample_rate): * < 3 sig figs: millisecond rounding * 3-5 sig figs: microsecond rounding * 6-8 sig figs: nanosecond rounding * >= 9 sig figs: no rounding Examples -------- Create 100 Hz time index for 1000 samples:: >>> dt = make_dt_coordinates('2023-01-01T00:00:00', 100.0, 1000) >>> len(dt) 1000 >>> dt[0] Timestamp('2023-01-01 00:00:00') >>> dt[-1] Timestamp('2023-01-01 00:00:09.990000') With explicit end time:: >>> dt = make_dt_coordinates( ... '2023-01-01T00:00:00', ... 10.0, ... 100, ... end_time='2023-01-01T00:00:09.9' ... ) Handle invalid sample rate (uses default of 1 Hz):: >>> dt = make_dt_coordinates('2023-01-01T00:00:00', None, 10) # Warning: Need to input a valid sample rate... """ if sample_rate in [0, None]: msg = ( f"Need to input a valid sample rate. Not {sample_rate}, " "returning a time index assuming a sample rate of 1" ) logger.warning(msg) sample_rate = 1.0 if start_time is None: msg = ( f"Need to input a start time. Not {start_time}, " "returning a time index with start time of " "1980-01-01T00:00:00" ) logger.warning(msg) start_time = "1980-01-01T00:00:00" if n_samples < 1: msg = f"Need to input a valid n_samples. Not {n_samples}" logger.error(msg) raise ValueError(msg) if not isinstance(start_time, MTime): start_time = MTime(time_stamp=start_time) # At this point, sample_rate is guaranteed to be float (not None) # and start_time is guaranteed to be MTime assert sample_rate is not None # Type narrowing for static analysis # there is something screwy that happens when your sample rate is not a # nice value that can easily fit into the 60 base. For instance if you # have a sample rate of 24000 the dt_freq will be '41667N', but that is # not quite right since the rounding clips some samples and your # end time will be incorrect (short).\n # FIX: therefore estimate the end time based on the decimal sample rate. # need to account for the fact that the start time is the first sample # need n_samples - 1 if end_time is None: end_time = start_time + (n_samples - 1) / sample_rate else: if not isinstance(end_time, MTime): end_time = MTime(time_stamp=end_time) # dt_freq = "{0:.0f}N".format(1.0e9 / (sample_rate)) dt_index = pd.date_range( start=start_time.iso_no_tz, end=end_time.iso_no_tz, periods=int(round(n_samples)), ) ## need to enforce some rounding errors otherwise an expected time step ## will have a rounding error, messes things up when reindexing. start_sig_figs = _count_decimal_sig_figs(str(start_time)) sr_sig_figs = _count_decimal_sig_figs(1 / sample_rate) if start_sig_figs > sr_sig_figs: test_sf = start_sig_figs else: test_sf = sr_sig_figs if test_sf < 3: dt_index = dt_index.round(freq="ms") elif test_sf >= 3 and test_sf < 6: dt_index = dt_index.round(freq="us") elif test_sf >= 6 and test_sf < 9: dt_index = dt_index.round(freq="ns") else: pass return dt_index