From 4e35dcf3563370e060f3ada8d682be3b8c2d829a Mon Sep 17 00:00:00 2001
From: Arin Wongprommoon <arin.wongprommoon@ed.ac.uk>
Date: Tue, 11 Jul 2023 16:38:55 +0100
Subject: [PATCH] refactor: non process-parameter version of fft process

WHY IS THIS CHANGE NEEDED?:
- send code to ritesh
---
 processes/fft_function.py | 113 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 113 insertions(+)
 create mode 100644 processes/fft_function.py

diff --git a/processes/fft_function.py b/processes/fft_function.py
new file mode 100644
index 0000000..650d402
--- /dev/null
+++ b/processes/fft_function.py
@@ -0,0 +1,113 @@
+from collections import namedtuple
+
+import numpy as np
+import pandas as pd
+from scipy.signal import periodogram
+
+def classical_periodogram(
+    self,
+    timeseries,
+    sampling_period,
+    oversampling_factor,
+):
+    """
+    Estimates the power spectral density using a periodogram.
+
+    This is based on a fast Fourier transform.
+
+    The power spectrum is normalised so that the area under the power
+    spectrum is constant across different time series, thus allowing users
+    to easily compare spectra across time series.  See:
+    * Scargle (1982). Astrophysical Journal 263 p. 835-853
+    * Glynn et al (2006). Bioinformatics 22(3) 310-316
+
+    Parameters
+    ---------
+    timeseries: array_like
+        Time series of measurement values.
+
+    sampling_period: float
+        Sampling period of measurement values, in unit time.
+
+    oversampling_factor: float
+        Oversampling factor, which indicates how many times a signal is
+        sampled over the Nyquist rate.  For example, if the oversampling
+        factor is 2, the signal is sampled at 2 times the Nyquist rate.
+
+    Returns
+    -------
+    freqs: ndarray
+        Array of sample frequencies, unit time-1.
+
+    power: ndarray
+        Power spectral density or power spectrum of the time series,
+        arbitrary units.
+    """
+    freqs, power = periodogram(
+        timeseries,
+        fs=1 / (oversampling_factor * sampling_period),
+        nfft=len(timeseries) * oversampling_factor,
+        detrend="constant",
+        return_onesided=True,
+        scaling="spectrum",
+    )
+    # Required to correct units; units will be expressed in min-1 (or any other
+    # unit)
+    freqs = oversampling_factor * freqs
+    # Normalisation (Scargle, 1982; Glynn et al., 2006)
+    power = power * (0.5 * len(timeseries))
+    # Normalisation by variance to allow comparing different time series
+    # (Scargle, 1982)
+    power = power / np.var(timeseries, ddof=1)
+    return freqs, power
+
+
+def fft_run(signal: pd.DataFrame, sampling_period=5, oversampling_factor=1):
+    """
+    Estimates the power spectral density of a dataframe of time series.
+
+    This uses the classical periodogram.
+
+    All NaNs are dropped as the Fourier transform used does not afford
+    missing time points or time points with uneven spacing in the time
+    dimension.  For time series with missing values, the Lomb-Scargle
+    periodogram is suggested (processes/lsp.py)
+
+    Parameters
+    ----------
+    signal: pandas.DataFrame
+        Time series, with rows indicating individiual time series (e.g. from
+        each cell), and columns indicating time points.
+
+    Returns
+    -------
+    freqs_df: pandas.DataFrame
+        Sample frequencies from each time series, with labels preserved from
+        'signal'.
+
+    power_df: pandas.DataFrame
+        Power spectrum from each time series, with labels preserved from
+        'signal'.
+    """
+    FftAxes = namedtuple("FftAxes", ["freqs", "power"])
+    # Each element in this list is a named tuple: (freqs, power)
+    axes = [
+        FftAxes(
+            *classical_periodogram(
+                timeseries=signal.iloc[row_index, :].dropna().values,
+                sampling_period=sampling_period,
+                oversampling_factor=oversampling_factor,
+            )
+        )
+        for row_index in range(len(signal))
+    ]
+
+    freqs_df = pd.DataFrame(
+        [element.freqs for element in axes], index=signal.index
+    )
+
+    power_df = pd.DataFrame(
+        [element.power for element in axes], index=signal.index
+    )
+
+    return freqs_df, power_df
-- 
GitLab