Skip to content
Snippets Groups Projects
Commit 97ed1cbb authored by Arin Wongprommoon's avatar Arin Wongprommoon
Browse files

feat(postproc): crosscorr has stationary option

WHY IS THIS CHANGE NEEDED?:
- crosscorr process does not have stationary option and therefore will
  not perform calculations correctly if all signals are in-phase.

HOW DOES THE CHANGE SOLVE THE PROBLEM?:
- Additional 'stationary' parameter that tells the process to compute
  the mean over replicates AND time points.

WHAT SIDE EFFECTS DOES THIS CHANGE HAVE?:
- crosscorr process not used anywhere else in aliby, so should not
  affect aliby.
- crosscorr process used in multiple places in skeletons (particularly
  arin's projects).  this is an *additional* parameter, and the existing
  behaviour of crosscorr process is same as stationary=False, so i don't
  expect breaking changes.

EVIDENCE THAT COMMIT WORKS:
- new test to be run

REFERENCES:
- #72
parent 9a89fb6d
No related branches found
No related tags found
Loading
...@@ -13,6 +13,10 @@ class crosscorrParameters(ParametersABC): ...@@ -13,6 +13,10 @@ class crosscorrParameters(ParametersABC):
Attributes Attributes
---------- ----------
stationary: boolean
If True, the underlying dynamic process is assumed to be
stationary with the mean a constant, estimated from all
data points.
normalised: boolean (optional) normalised: boolean (optional)
If True, normalise the result for each replicate by the standard If True, normalise the result for each replicate by the standard
deviation over time for that replicate. deviation over time for that replicate.
...@@ -20,7 +24,7 @@ class crosscorrParameters(ParametersABC): ...@@ -20,7 +24,7 @@ class crosscorrParameters(ParametersABC):
If True, return results only for positive lags. If True, return results only for positive lags.
""" """
_defaults = {"normalised": True, "only_pos": False} _defaults = {"stationary": False, "normalised": True, "only_pos": False}
class crosscorr(PostProcessABC): class crosscorr(PostProcessABC):
...@@ -50,6 +54,10 @@ class crosscorr(PostProcessABC): ...@@ -50,6 +54,10 @@ class crosscorr(PostProcessABC):
trace_dfB: dataframe (required for cross-correlation only) trace_dfB: dataframe (required for cross-correlation only)
An array of signal values, with each row a replicate measurement An array of signal values, with each row a replicate measurement
and each column a time point. and each column a time point.
stationary: boolean
If True, the underlying dynamic process is assumed to be
stationary with the mean a constant, estimated from all
data points.
normalised: boolean (optional) normalised: boolean (optional)
If True, normalise the result for each replicate by the standard If True, normalise the result for each replicate by the standard
deviation over time for that replicate. deviation over time for that replicate.
...@@ -84,6 +92,12 @@ class crosscorr(PostProcessABC): ...@@ -84,6 +92,12 @@ class crosscorr(PostProcessABC):
>>> plt.figure() >>> plt.figure()
>>> plt.plot(ac.columns, ac.mean(axis=0, skipna=True)) >>> plt.plot(ac.columns, ac.mean(axis=0, skipna=True))
>>> plt.show() >>> plt.show()
Reference
---------
Dunlop MJ, Cox RS, Levine JH, Murray RM, Elowitz MB (2008). Regulatory
activity revealed by dynamic correlations in gene expression noise.
Nat Genet, 40, 1493-1498.
""" """
df = ( df = (
...@@ -97,25 +111,13 @@ class crosscorr(PostProcessABC): ...@@ -97,25 +111,13 @@ class crosscorr(PostProcessABC):
n_replicates = trace_A.shape[0] n_replicates = trace_A.shape[0]
# number of time points # number of time points
n_tps = trace_A.shape[1] n_tps = trace_A.shape[1]
# deviation from the mean, where the mean is calculated over replicates # autocorrelation if 2nd dataframe is not supplied
# at each time point, which allows for non-stationary behaviour if trace_dfB is None:
dmean_A = trace_A - np.nanmean(trace_A, axis=0).reshape((1, n_tps)) trace_dfB = trace_dfA
# standard deviation over time for each replicate trace_B = trace_A
stdA = np.sqrt( # find deviation from the mean
np.nanmean(dmean_A**2, axis=1).reshape((n_replicates, 1)) dmean_A, stdA = _dev(trace_A, n_replicates, n_tps, self.stationary)
) dmean_B, stdB = _dev(trace_B, n_replicates, n_tps, self.stationary)
if trace_dfB is not None:
trace_B = trace_dfB.to_numpy()
# cross correlation
dmean_B = trace_B - np.nanmean(trace_B, axis=0).reshape((1, n_tps))
stdB = np.sqrt(
np.nanmean(dmean_B**2, axis=1).reshape((n_replicates, 1))
)
else:
# auto correlation
dmean_B = dmean_A
stdB = stdA
# calculate correlation
# lag r runs over positive lags # lag r runs over positive lags
pos_corr = np.nan * np.ones(trace_A.shape) pos_corr = np.nan * np.ones(trace_A.shape)
for r in range(n_tps): for r in range(n_tps):
...@@ -148,3 +150,16 @@ class crosscorr(PostProcessABC): ...@@ -148,3 +150,16 @@ class crosscorr(PostProcessABC):
) )
else: else:
return pd.DataFrame(corr, index=df.index, columns=lags) return pd.DataFrame(corr, index=df.index, columns=lags)
def _dev(y, nr, nt, stationary=False):
# calculate deviation from the mean
if stationary:
# mean calculated over time and over replicates
dy = y - np.nanmean(y)
else:
# mean calculated over replicates at each time point
dy = y - np.nanmean(y, axis=0).reshape((1, nt))
# standard deviation calculated for each replicate
stdy = np.sqrt(np.nanmean(dy**2, axis=1).reshape((nr, 1)))
return dy, stdy
import numpy as np
import pandas as pd
import pytest
from postprocessor.core.multisignal.crosscorr import (
crosscorr,
crosscorrParameters,
)
def generate_sinusoids_df(
time_axis,
num_replicates,
):
t = time_axis
ts = np.tile(t, num_replicates).reshape((num_replicates, len(t)))
s = 3 * np.sin(
2 * np.pi * ts + 2 * np.pi * np.random.rand(num_replicates, 1)
)
s_df = pd.DataFrame(s)
return s_df
@pytest.mark.parametrize("time_axis", [np.linspace(0, 4, 200)])
@pytest.mark.parametrize("num_replicates", [333])
def test_crosscorr(
time_axis,
num_replicates,
):
"""Tests croscorr.
Tests whether a crosscorr runner can be initialised with default
parameters and runs without errors.
"""
dummy_signal1 = generate_sinusoids_df(time_axis, num_replicates)
dummy_signal2 = generate_sinusoids_df(time_axis, num_replicates)
crosscorr_runner = crosscorr(crosscorrParameters.default())
_ = crosscorr_runner.run(dummy_signal1, dummy_signal2)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment