diff --git a/src/wela/butterworth_filter.py b/src/wela/butterworth_filter.py deleted file mode 100644 index 3c8a903546283a9062e3139f9b84578e03a36bf7..0000000000000000000000000000000000000000 --- a/src/wela/butterworth_filter.py +++ /dev/null @@ -1,38 +0,0 @@ -import numpy as np -from scipy import signal -import pandas as pd - - -def butterworth_filter(data, params=None): - """Apply Butterworth filter to time series arranged in rows.""" - # second-order-sections output - # by default, using a digital filter - if params is None: - params = { - "order": 2, - "critical_freqs": (1 / 350), - "filter_type": "highpass", - "sampling_freq": 1 / 3, - } - sos = signal.butter( - N=params["order"], - Wn=params["critical_freqs"], - btype=params["filter_type"], - fs=params["sampling_freq"], - output="sos", - ) - # fill NaNs between data points - data = pd.DataFrame(data).interpolate(axis=1).to_numpy() - # subtract time series by mean - data_norm = data - np.nanmean(data, axis=1).reshape(data.shape[0], 1) - if np.any(np.isnan(data_norm)): - # filter one at a time because of NaNs at different times - filtered_timeseries = np.nan * np.ones(data_norm.shape) - for i in range(data_norm.shape[0]): - s_data = data_norm[i, :] - filtered = signal.sosfiltfilt(sos, s_data[~np.isnan(s_data)]) - filtered_timeseries[i, ~np.isnan(s_data)] = filtered - else: - # filter all at once - filtered_timeseries = signal.sosfiltfilt(sos, data_norm, axis=1) - return filtered_timeseries diff --git a/src/wela/fft_filtering.py b/src/wela/fft_filtering.py new file mode 100644 index 0000000000000000000000000000000000000000..7469f01ce13a62df9fcdf842486cf79d5e09b73a --- /dev/null +++ b/src/wela/fft_filtering.py @@ -0,0 +1,140 @@ +import numpy as np +import pandas as pd +from scipy import signal +from scipy.integrate import simpson + + +def classic_periodogram(data, sampling_interval, oversampling_factor=1): + """ + Estimate the power spectral density using a periodogram. + + This is based on a fast Fourier transform. + + The power spectrum is normalised so that the area under the power + spectrum is constant across different time series, thus allowing users + to easily compare spectra across time series. See: + * Scargle (1982). Astrophysical Journal 263 p. 835-853 + * Glynn et al (2006). Bioinformatics 22(3) 310-316 + + Parameters + --------- + data: 2D array + Time series of measurement values as rows. + sampling_interval: float + Sampling interval of measurement values. + oversampling_factor: float + Indicates how many times a signal is sampled over the Nyquist rate. + For example, if the oversampling factor is 2, the signal is sampled + at twice the Nyquist rate. + + Returns + ------- + freqs: 1D array + Array of sample frequencies. + power: 2D array + Power spectral density or power spectrum of the time series. + + Example + ------- + >>> rng = np.random.default_rng() + >>> t = np.arange(0, 10, 0.1) + >>> ys = np.stack([np.sin(2*np.pi*t) + 3*np.sin(4*np.pi*t) for _ in range(10)]) + >>> data = ys + rng.standard_normal(ys.shape)*0.3 + >>> f, p = classic_periodogram(data, 0.1) + >>> plt.plot(f, p[3,:]) + """ + # interpolate any NaN between data values + data = pd.DataFrame(data).interpolate(axis=1).to_numpy() + # number of time points + nt = data.shape[1] + # parameters + params = { + "fs": 1 / (oversampling_factor * sampling_interval), + "nfft": nt * oversampling_factor, + "detrend": "constant", + "return_onesided": True, + "scaling": "spectrum", + } + # find periodogram + if np.any(np.isnan(data)): + power = np.empty((data.shape[0], int(nt / 2) + 1)) + for i in range(data.shape[0]): + freqs, power[i, :] = signal.periodogram( + data[i, ~np.isnan(data[i, :])], **params + ) + else: + freqs, power = signal.periodogram(data, **params) + # required to correct unit + freqs = oversampling_factor * freqs + # normalisation (Scargle, 1982; Glynn et al., 2006) + power = power * (0.5 * nt) + # normalisation by variance to compare different time series (Scargle, 1982) + power = power / np.nanvar(data, axis=1).reshape((data.shape[0], 1)) + return freqs, power + + +def snr(fft_freqs, fft_power, cutoff_freq): + """ + Get signal-to-noise ratio from a Fourier spectrum. + + Get signal-to-noise ratio from a Fourier spectrum. Frequencies lower than + the cut-off are considered signal; frequencies above the cut-off are + considered noise. The signal-to-noise ratio is defined as the area under + the Fourier spectrum to the left of the cut-off divided by the area under + the Fourier spectrum to the right of the cut-off. + + Parameters + ---------- + fft_freqs: 1D array + The frequencies of the Fourier spectra. + fft_power: 2D array + The periodogram for time series as rows. + cutoff_freq : float + Cut-off frequency to divide signal from noise. + """ + snr = np.empty(fft_power.shape[0]) + cutoff_index = np.argmin(np.abs(fft_freqs - cutoff_freq)) + for i in range(fft_power.shape[0]): + area_all = simpson(y=fft_power[i, :], x=fft_freqs) + area_signal = simpson( + y=fft_power[i, 0:cutoff_index], + x=fft_freqs[0:cutoff_index], + ) + area_noise = area_all - area_signal + snr[i] = area_signal / area_noise + return snr + + +def butterworth_filter(data, params=None): + """Apply Butterworth filter to time series arranged in rows.""" + # second-order-sections output + # by default, using a digital filter + if params is None: + params = { + "order": 2, + "critical_freqs": (1 / 350), + "filter_type": "highpass", + "sampling_freq": 1 / 3, + } + sos = signal.butter( + N=params["order"], + Wn=params["critical_freqs"], + btype=params["filter_type"], + fs=params["sampling_freq"], + output="sos", + ) + # fill NaNs between data points + data = pd.DataFrame(data).interpolate(axis=1).to_numpy() + # subtract time series by mean + data_norm = data - np.nanmean(data, axis=1).reshape(data.shape[0], 1) + if np.any(np.isnan(data_norm)): + # filter one at a time because of NaNs at different times + filtered_timeseries = np.nan * np.ones(data_norm.shape) + for i in range(data_norm.shape[0]): + s_data = data_norm[i, :] + filtered = signal.sosfiltfilt(sos, s_data[~np.isnan(s_data)]) + filtered_timeseries[i, ~np.isnan(s_data)] = filtered + else: + # filter all at once + filtered_timeseries = signal.sosfiltfilt(sos, data_norm, axis=1) + return filtered_timeseries diff --git a/src/wela/plotting.py b/src/wela/plotting.py index 99189d942c026a50ad8f4c4597c221fce4f7628f..8e7c2d182474144f84d9e37e2f21efffc1a1760c 100644 --- a/src/wela/plotting.py +++ b/src/wela/plotting.py @@ -4,7 +4,6 @@ import matplotlib.cm import matplotlib.pylab as plt import numpy as np import numpy.matlib -from wela.butterworth_filter import butterworth_filter def kymograph( @@ -538,7 +537,7 @@ def bud_to_bud_plot( return_signal=False, df=None, title=None, - filter=False, + filter_func=None, ): """ Plot the median and percentiles of a signal between consecutive buddings. @@ -570,13 +569,15 @@ def bud_to_bud_plot( If passed, use this data rather than dataloader's. title: str, optional Title for plot. - filter: boolean - If True, apply a butterworth filter to each time series. + filter_func: fn + Filter to apply to each time series. Example ------- >>> from wela.plotting import bud_to_bud_plot + >>> from wela.fft_filtering import butterworth_filter >>> bud_to_bud_plot(8.4, "bud_growth_rate", dl) + >>> bud_to_bud_plot(4, "flavin", dl, group="fy4", filter_func=butterworth_filter) """ if df is None: t, signal_data = dl.get_time_series(signal, group=group) @@ -584,8 +585,8 @@ def bud_to_bud_plot( else: t, signal_data = dl.get_time_series(signal, group=group, df=df) t, buddings = dl.get_time_series("buddings", group=group, df=df) - if filter: - signal_data = butterworth_filter(signal_data) + if filter_func is not None: + signal_data = filter_func(signal_data) if np.max(t) > 48: # convert to hours t = t / 60