Source code for mth5.timeseries.ts_filters

#!/usr/bin/env python

"""
time series filters

"""

# =================================================================
import numpy as np
from scipy import signal

from matplotlib import pyplot as plt
from matplotlib.lines import Line2D
from mth5.utils.mth5_logger import setup_logger

logger = setup_logger(__file__)
# =================================================================


[docs]def butter_bandpass(lowcut, highcut, fs, order=5):
    """
    Butterworth bandpass filter using scipy.signal

    :param lowcut: low cut frequency in Hz
    :type lowcut: float
    :param highcut: high cut frequency in Hz
    :type highcut: float
    :param fs: Sample rate
    :type fs: float
    :param order: Butterworth order, defaults to 5
    :type order: int, optional
    :return: SOS scipy.signal format
    :rtype: scipy.signal.SOS?

    """
    nyq = 0.5 * fs

    if lowcut is not None:
        low = lowcut / nyq
    if highcut is not None:
        high = highcut / nyq
    if lowcut and highcut:
        sos = signal.butter(
            order, [low, high], analog=False, btype="band", output="sos"
        )
    elif highcut is None:
        sos = signal.butter(order, low, analog=False, btype="low", output="sos")
    elif lowcut is None:
        sos = signal.butter(
            order, high, analog=False, btype="high", output="sos"
        )
    return sos


[docs]def butter_bandpass_filter(data, lowcut, highcut, fs, order=5):
    """

    :param data: 1D time series data
    :type data: np.ndarray
    :param lowcut: low cut frequency in Hz
    :type lowcut: float
    :param highcut: high cut frequency in Hz
    :type highcut: float
    :param fs: Sample rate
    :type fs: float
    :param order: Butterworth order, defaults to 5
    :type order: int, optional
    :return: filtered data
    :rtype: np.ndarray

    """
    sos = butter_bandpass(lowcut, highcut, fs, order=order)
    y = signal.sosfiltfilt(sos, data)
    return y


[docs]def low_pass(data, low_pass_freq, cutoff_freq, sampling_rate):
    """

    :param data: 1D time series data
    :type data: np.ndarray
    :param low_pass_freq: low pass frequency in Hz
    :type low_pass_freq: float
    :param cutoff_freq: cut off frequency in Hz
    :type cutoff_freq: float
    :param sampling_rate: Sample rate in samples per second
    :type sampling_rate: float
    :return: lowpass filtered data
    :rtype: np.ndarray

    """
    nyq = 0.5 * sampling_rate
    filt_order, wn = signal.buttord(
        low_pass_freq / nyq, cutoff_freq / nyq, 3, 40
    )

    b, a = signal.butter(filt_order, wn, btype="low")
    data_filtered = signal.filtfilt(b, a, data)

    return data_filtered


[docs]def zero_pad(input_array, power=2, pad_fill=0):
    """

    :param input_array: 1D array
    :type input_array: np.ndarray
    :param power: base power to used to pad to, defaults to 2 which is optimal
    for the FFT
    :type power: int, optional
    :param pad_fill: fill value for padded values, defaults to 0
    :type pad_fill: float, optional
    :return: zero padded array
    :rtype: np.ndarray

    """

    len_array = input_array.shape[0]
    if power == 2:
        npow = int(np.ceil(np.log2(len_array)))
    if power == 10:
        npow = int(np.ceil(np.log10(len_array)))
    if npow > 32:
        logger.warning(
            "Exceeding memory allocation inherent in your computer 2**32. "
            "Limiting the zero pad to 2**32"
        )
    pad_array = np.zeros(power**npow)
    if pad_fill != 0:
        pad_array[:] = pad_fill
    pad_array[0:len_array] = input_array

    return pad_array


[docs]class RemoveInstrumentResponse:
    """
    Remove instrument response from the given channel response filter

    The order of operations is important (if applied):

        1) detrend
        2) zero mean
        3) zero pad
        4) time window
        5) frequency window
        6) remove response
        7) undo time window
        8) bandpass

    :param ts: time series data to remove response from
    :type ts: np.ndarray((N,) , dtype=float)
    :param time_array: time index that corresponds to the time series
    :type time_array: np.ndarray((N,) , dtype=np.datetime[ns])
    :param sample_interval: seconds per sample (time interval between samples)
    :type sample_interval: float
    :param channel_response_filter: Channel response filter with all filters
    included to convert from counts to physical units
    :type channel_response_filter: `class`:mt_metadata.timeseries.filters.ChannelResponseFilter`

    **kwargs**

    :param plot: to plot the calibration process [ False | True ]
    :type plot: boolean, default True
    :param detrend: Remove linar trend of the time series
    :type detrend: boolean, default True
    :param zero_mean: Remove the mean of the time series
    :type zero_mean: boolean, default True
    :param zero_pad: pad the time series to the next power of 2 for efficiency
    :type zero_pad: boolean, default True
    :param t_window: Time domain windown name see `scipy.signal.windows` for options
    :type t_window: string, default None
    :param t_window_params: Time domain window parameters, parameters can be
    found in `scipy.signal.windows`
    :type t_window_params: dictionary
    :param f_window: Frequency domain windown name see `scipy.signal.windows` for options
    :type f_window: string, defualt None
    :param f_window_params: Frequency window parameters, parameters can be
    found in `scipy.signal.windows`
    :type f_window_params: dictionary
    :param bandpass: bandpass freequency and order {"low":, "high":, "order":,}
    :type bandpass: dictionary


    """

    def __init__(
        self,
        ts,
        time_array,
        sample_interval,
        channel_response_filter,
        **kwargs,
    ):
        self.logger = setup_logger(f"{__name__}.{self.__class__.__name__}")
        self.ts = ts
        self.time_array = time_array
        self.sample_interval = sample_interval
        self.channel_response_filter = channel_response_filter
        self.plot = False
        self.detrend = True
        self.zero_mean = True
        self.zero_pad = True
        self.t_window = None
        self.t_window_params = {}
        self.f_window = None
        self.f_window_params = {}
        self.bandpass = {}
        self.fig = None
        self.nrows = None
        self.subplot_dict = {}

        for key, value in kwargs.items():
            setattr(self, key, value)

    def _subplots(self, x, y, color, num, label):
        """
        helper function to make subplots for if plotting is desired

        :param x: x array
        :type x: np.ndarray
        :param y: y array
        :type y: np.ndarray
        :param color: color of the line
        :type color: tuple
        :param num: subplot number
        :type num: integer
        :param label: legend label
        :type label: string

        """
        ax_t = self.fig.get_axes()[0]
        ax_f = self.fig.get_axes()[1]
        ax = self.fig.add_subplot(self.nrows, 2, num, sharex=ax_t)
        ax2 = self.fig.add_subplot(self.nrows, 2, num + 1, sharex=ax_f)

        # plot time series
        line = Line2D([0], [0], color=color)
        ax.plot(x, y, color=color)
        f = np.fft.rfftfreq(x.size, d=self.sample_interval)

        # plot spectra
        data = np.fft.rfft(y)
        ax2.loglog(f, abs(data), color=color)
        ax.legend(
            [line],
            [label],
            loc="upper left",
            borderaxespad=0.01,
            borderpad=0.1,
            markerscale=0.02,
        ).set_zorder(1000)

[docs]    @staticmethod
    def get_window(window, window_params, size):
        """
        Get window from scipy.signal

        :param window: name of the window
        :type window: string
        :param window_params: dictionary of window parameters
        :type window_params: dictionary
        :param size: number of points in the window
        :type size: integer
        :return: window function
        :rtype: class:`scipy.signal`

        """
        return getattr(signal.windows, window)(size, **window_params)

[docs]    def apply_detrend(self, ts):
        """
        Detrend time series using scipy.detrend('linear')

        :param ts: input time series
        :type ts: np.ndarray
        :return: detrended time series
        :rtype: np.ndarray

        """
        ts = signal.detrend(np.nan_to_num(ts), type="linear")

        if self.plot:
            self._subplots(
                self.time_array,
                ts,
                (0.45, 0.1, 0.5),
                self.subplot_dict["detrend"],
                "Detrended",
            )
        return ts

[docs]    def apply_zero_mean(self, ts):
        """
        Remove the mean from the time series

        :param ts: input time series
        :type ts: np.ndarray
        :return: zero mean time series
        :rtype: np.ndarray

        """
        ts = ts - ts.mean()

        if self.plot:
            self._subplots(
                self.time_array,
                ts,
                (0.55, 0.1, 0.4),
                self.subplot_dict["zero_mean"],
                "Zero Mean",
            )
        return ts

[docs]    def apply_zero_pad(self, ts):
        """
        zero pad to power of 2, at the end of the time series to make the
        FFT more efficient

        :param ts: input time series
        :type ts: np.ndarray
        :return: zero padded time series
        :rtype: np.ndarray

        """

        pad_ts = zero_pad(ts)

        if self.plot:
            self._subplots(
                self.time_array,
                pad_ts[0 : self.time_array.size],
                (0.7, 0.1, 0.25),
                self.subplot_dict["pad"],
                "Zero Pad",
            )
        return pad_ts

[docs]    def apply_t_window(self, ts):
        """
        Apply a window in the time domain. Get the available windows from
        `scipy.signal.windows`

        :param ts: input time series
        :type ts: np.ndarray
        :return: windowed time series
        :rtype: np.ndarray

        """
        w = self.get_window(self.t_window, self.t_window_params, self.ts.size)
        ts = ts * w

        if self.plot:
            self._subplots(
                self.time_array,
                ts,
                (0.7, 0.1, 0.25),
                self.subplot_dict["t_window"],
                f"t_window {self.t_window.capitalize()}",
            )
        return ts

[docs]    def apply_f_window(self, data):
        """
        Apply a frequency domain window. Get the available windows from
        `scipy.signal.windows`

        Need to create a window twice the size of the input because we are
        only taking the rfft which gives just half the spectra
        and then take only half the window

        :param data: input spectra
        :type data: np.ndarray
        :return: windowed spectra
        :rtype: np.ndarray

        """

        w = self.get_window(self.f_window, self.f_window_params, 2 * data.size)[
            data.size :
        ]
        data = data * w
        if self.plot:
            f = np.fft.rfftfreq(2 * data.size, d=self.sample_interval)[1:]
            ax_t = self.fig.get_axes()[0]
            ax_f = self.fig.get_axes()[1]
            ax = self.fig.add_subplot(
                self.nrows, 2, self.subplot_dict["f_window"], sharex=ax_t
            )
            ax2 = self.fig.add_subplot(
                self.nrows, 2, self.subplot_dict["f_window"] + 1, sharex=ax_f
            )

            ax.plot(
                self.time_array,
                abs(np.fft.irfft(data)[0 : self.time_array.size]),
                color=(0.85, 0.1, 0.15),
            )
            ax2.loglog(f, abs(data), color=(0.85, 0.1, 0.15))
            axt2 = ax2.twinx()
            axt2.semilogx(f, w, color=(0.5, 0.5, 0.5), zorder=0)
            line = Line2D([0], [0], color=(0.85, 0.1, 0.15))
            ax.legend(
                [line],
                [f"Freq Window {self.f_window.capitalize()}"],
                loc="upper left",
                borderaxespad=0.01,
                borderpad=0.1,
            )
        return data

[docs]    def apply_bandpass(self, ts):
        """
        apply a bandpass filter to the calibrated data


        :param ts: calibrated time series
        :type ts: np.ndarray
        :return: bandpassed time series
        :rtype: np.ndarray

        """
        ts = butter_bandpass_filter(
            ts,
            self.bandpass["low"],
            self.bandpass["high"],
            self.sample_interval,
            order=self.bandpass["order"],
        )

        if self.plot:
            self._subplots(
                self.time_array,
                ts,
                (0.875, 0.1, 0.1),
                self.subplot_dict["bandpass"],
                "Band Pass",
            )
        return ts

    def _get_subplot_count(self):
        """
        helper function to get subplot information

        :return: dictionary of subplot information
        :rtype: dictionary

        """
        order = [
            "detrend",
            "zero_mean",
            "t_window",
            "pad",
            "f_window",
            "bandpass",
        ]
        pdict = {
            "pad": self.zero_pad,
            "zero_mean": self.zero_mean,
            "detrend": self.detrend,
            "t_window": self.t_window,
            "f_window": self.f_window,
            "bandpass": self.bandpass,
        }
        subplot_dict = {}
        count = 3
        nrows = 2
        for key in order:
            value = pdict[key]
            if value:
                subplot_dict[key] = count
                count += 2
                nrows += 1
        self.nrows = nrows

        return subplot_dict

[docs]    def remove_instrument_response(self, operation="divide"):
        """
        Remove instrument response following the recipe provided

        :return: calibrated time series
        :rtype: np.ndarray

        """
        ts = np.copy(self.ts)
        f = np.fft.rfftfreq(ts.size, d=self.sample_interval)
        step = 1
        if self.plot:
            self.subplot_dict = self._get_subplot_count()
            self.fig = plt.figure(figsize=[10, 12])
            self.fig.clf()
            ax = self.fig.add_subplot(self.nrows, 2, 1)
            ax2 = self.fig.add_subplot(self.nrows, 2, 2)
            (l1,) = ax.plot(self.time_array, ts, color="k", lw=2)
            ax.set_xlim((self.time_array[0], self.time_array[-1]))
            (l2,) = ax2.loglog(f, abs(np.fft.rfft(ts)), "k", lw=2)
            ax2.set_xlim((f[0], f[-1]))
            ax.legend(
                [l1],
                ["Original"],
                loc="upper left",
                borderaxespad=0.01,
                borderpad=0.1,
            )
        # detrend
        if self.detrend:
            ts = self.apply_detrend(ts)
            self.logger.debug(f"Step {step}: Applying Linear Detrend")
            step += 1
        # zero mean
        if self.zero_mean:
            ts = self.apply_zero_mean(ts)
            self.logger.debug(f"Step {step}: Removing Mean")
            step += 1
        # filter in time domain
        if self.t_window is not None:
            ts = self.apply_t_window(ts)
            self.logger.debug(
                f"Step {step}: Applying {self.t_window} Time Window"
            )
            step += 1
            if self.plot:
                wax = self.fig.get_axes()[
                    self.subplot_dict["t_window"] - 1
                ].twinx()
                (tw,) = wax.plot(
                    self.time_array,
                    self.get_window(
                        self.t_window, self.t_window_params, ts.size
                    ),
                    color=(0.75, 0.75, 0.75),
                    zorder=0,
                )
        if self.zero_pad:
            # pad the time series to a power of 2, this may be overkill, especially for long time series
            ts = self.apply_zero_pad(ts)
            self.logger.debug(f"Step {step}: Applying Zero Padding")
            step += 1
        # get the real frequencies of the FFT
        f = np.fft.rfftfreq(ts.size, d=self.sample_interval)

        if self.channel_response_filter.filters_list is []:
            raise ValueError(
                "There are no filters in channel_response to remove"
            )
        # compute the complex response given the frequency range of the FFT
        # the complex response assumes frequencies are in reverse order and flip them on input
        # so we need to flip the complex reponse so it aligns with the fft.
        cr = self.channel_response_filter.complex_response(f)[::-1]
        # remove the DC term at frequency == 0
        cr[-1] = abs(cr[-2]) + 0.0j

        data = np.fft.rfft(ts)
        # remove DC term
        data[-1] = abs(data[-1]) + 0.0j

        # if a window is requested then create it here and mulitply by the data
        # the windows are designed to be symmetrical about frequency = 0
        # here we are taking only the real part of the FFT so we cut the window in half
        if self.f_window is not None:
            data = self.apply_f_window(data)
            self.logger.debug(
                f"Step {step}: Applying {self.f_window} Frequency Window"
            )
            step += 1

        if operation == "divide":
            # calibrate the time series, compute real part of fft, divide out
            # channel response, inverse fft
            calibrated_ts = np.fft.irfft(data / cr)[0 : self.ts.size]
            self.logger.debug(
                f"Step {step}: Removing Calibration by {operation}"
            )
            step += 1

        elif operation == "multiply":
            # calibrate the time series, compute real part of fft, multiply out
            # channel response, inverse fft
            calibrated_ts = np.fft.irfft(data * cr)[0 : self.ts.size]
            self.logger.debug(
                f"Step {step}: Removing Calibration  by {operation}"
            )
            step += 1

        # If a time window was applied, need to un-apply it to reconstruct the signal.
        if self.t_window is not None:
            w = self.get_window(
                self.t_window, self.t_window_params, calibrated_ts.size
            )
            calibrated_ts = calibrated_ts / w
            self.logger.debug(f"Step {step}: Un-applying Time Window")
            step += 1
        if self.bandpass:
            calibrated_ts = self.apply_bandpass(calibrated_ts)
            self.logger.debug(f"Step {step}: Applying Bandpass Filter")
            step += 1
        if self.plot:
            self._subplots(
                self.time_array[0 : calibrated_ts.size],
                calibrated_ts,
                (1, 0.1, 0.1),
                self.nrows * 2 - 1,
                "Calibrated",
            )
            self.fig.get_axes()[-2].set_ylabel(
                self.channel_response_filter.units_in
            )
            if self.t_window is not None:
                wax = self.fig.get_axes()[-2].twinx()
                (tw,) = wax.plot(
                    self.time_array, 1.0 / w, color=(0.5, 0.5, 0.5), zorder=0
                )
            self.fig.tight_layout()
            plt.show()
        return calibrated_ts


[docs]def adaptive_notch_filter(
    bx,
    df=100,
    notches=[50, 100],
    notchradius=0.5,
    freqrad=0.9,
    rp=0.1,
    dbstop_limit=5.0,
):
    """

    :param bx: time series to filter
    :type bx: np.ndarray
    :param df: sample rate in samples per second, defaults to 100
    :type df: float, optional
    :param notches: list of frequencies to locate notches at in Hz,
     defaults to [50, 100]
    :type notches: list, optional
    :param notchradius: notch radius, defaults to 0.5
    :type notchradius: float, optional
    :param freqrad: radius to search for a peak at the notch frequency,
     defaults to 0.9
    :type freqrad: float, optional
    :param rp: ripple of Chebyshev type 1 filter, lower numbers means less
     ripples, defaults to 0.1
    :type rp: float, optional
    :param dbstop_limit: limits the difference between the peak at the
     notch and surrounding spectra.  Any difference
     above dbstop_limit will be filtered, anything
     less will not, defaults to 5.0
    :type dbstop_limit: float, optional
    :return: notch filtered data
    :rtype: np.ndarray
    :return: list of notch frequencies
    :rtype: list


    Example
    ---------

        >>> import RemovePeriodicNoise_Kate as rmp
        >>> # make a variable for the file to load in
        >>> fn = r"/home/MT/mt01_20130101_000000.BX"
        >>> # load in file, if the time series is not an ascii file
        >>> # might need to add keywords to np.loadtxt or use another
        >>> # method to read in the file
        >>> bx = np.loadtxt(fn)
        >>> # create a list of frequencies to filter out
        >>> freq_notches = [50, 150, 200]
        >>> # filter data
        >>> bx_filt, filt_lst = rmp.adaptiveNotchFilter(bx, df=100.
        >>> ...                                         notches=freq_notches)
        >>> #save the filtered data into a file
        >>> np.savetxt(r"/home/MT/Filtered/mt01_20130101_000000.BX", bx_filt)

    Notes:

        Most of the time the default parameters work well, the only thing
        you need to change is the notches and perhaps the radius.  I would
        test it out with a few time series to find the optimum parameters.
        Then make a loop over all you time series data. Something like

        >>> import os
        >>> dirpath = r"/home/MT"
        >>> #make a director to save filtered time series
        >>> save_path = r"/home/MT/Filtered"
        >>> if not os.path.exists(save_path):
        >>>     os.mkdir(save_path)
        >>> for fn in os.listdir(dirpath):
        >>>     bx = np.loadtxt(os.path.join(dirpath, fn)
        >>>     bx_filt, filt_lst = rmp.adaptiveNotchFilter(bx, df=100.
        >>>     ...                                         notches=freq_notches)
        >>>     np.savetxt(os.path.join(save_path, fn), bx_filt)

    """

    bx = np.array(bx)

    if type(notches) is list:
        notches = np.array(notches)
    elif type(notches) in [float, int]:
        notches = np.array([notches], dtype=np.float)
    df = float(df)  # make sure df is a float
    dt = 1.0 / df  # sampling rate

    # transform data into frequency domain to find notches
    BX = np.fft.fft(zero_pad(bx))
    n = len(BX)  # length of array
    dfn = df / n  # frequency step
    dfnn = int(freqrad / dfn)  # radius of frequency search
    fn = notchradius  # filter radius
    freq = np.fft.fftfreq(n, dt)

    filtlst = []
    for notch in notches:
        if notch > freq.max():
            break
        else:
            fspot = int(round(notch / dfn))
            nspot = np.where(
                abs(BX)
                == max(abs(BX[max([fspot - dfnn, 0]) : min([fspot + dfnn, n])]))
            )[0][0]

            med_bx = np.median(
                abs(
                    BX[
                        max([nspot - dfnn * 10, 0]) : min(
                            [nspot + dfnn * 10, n]
                        )
                    ]
                )
                ** 2
            )

            # calculate difference between peak and surrounding spectra in dB
            dbstop = 10 * np.log10(abs(BX[nspot]) ** 2 / med_bx)
            if np.nan_to_num(dbstop) == 0.0 or dbstop < dbstop_limit:
                filtlst.append("No need to filter \n")
                pass
            else:
                filtlst.append([freq[nspot], dbstop])
                ws = 2 * np.array([freq[nspot] - fn, freq[nspot] + fn]) / df
                wp = (
                    2
                    * np.array([freq[nspot] - 2 * fn, freq[nspot] + 2 * fn])
                    / df
                )
                ford, wn = signal.cheb1ord(wp, ws, 1, dbstop)
                b, a = signal.cheby1(1, 0.5, wn, btype="bandstop")
                bx = signal.filtfilt(b, a, bx)
    return bx, filtlst