Skip to content
Snippets Groups Projects
Commit 155d7c20 authored by Alán Muñoz's avatar Alán Muñoz
Browse files

Merge branch 'issue-030' into 'dev'

feat!(crosscorr): options to compute negative lags and normalise

See merge request postprocessor!22
parents 421d8717 d75f897a
No related branches found
No related tags found
No related merge requests found
...@@ -9,18 +9,18 @@ from postprocessor.core.abc import PostProcessABC ...@@ -9,18 +9,18 @@ from postprocessor.core.abc import PostProcessABC
class crosscorrParameters(ParametersABC): class crosscorrParameters(ParametersABC):
""" """
Parameters for the 'align' process. Parameters for the 'crosscorr' process.
Attributes Attributes
---------- ----------
t: int normalised: boolean (optional)
Lag, in time points If True, normalise the result for each replicate by the standard
FIXME: clarify with Peter. deviation over time for that replicate.
only_pos: boolean (optional)
If True, return results only for positive lags.
""" """
_defaults = { _defaults = {"normalised": True, "only_pos": False}
"lagtime": None,
}
class crosscorr(PostProcessABC): class crosscorr(PostProcessABC):
...@@ -30,12 +30,17 @@ class crosscorr(PostProcessABC): ...@@ -30,12 +30,17 @@ class crosscorr(PostProcessABC):
super().__init__(parameters) super().__init__(parameters)
def run(self, trace_dfA: pd.DataFrame, trace_dfB: pd.DataFrame = None): def run(self, trace_dfA: pd.DataFrame, trace_dfB: pd.DataFrame = None):
"""Calculates normalised cross-correlations as a function of time. """Calculates normalised cross-correlations as a function of lag.
Calculates normalised auto- or cross-correlations as a function of time. Calculates normalised auto- or cross-correlations as a function of lag.
Normalisation is by the product of the standard deviation for each Lag is given in multiples of the unknown time interval between data points.
variable calculated across replicates at each time point.
With zero lag, the normalised correlation should be one. Normalisation is by the product of the standard deviation over time for
each replicate for each variable.
For the cross-correlation between sA and sB, the closest peak to zero lag
should in the positive lags if sA is delayed compared to signal B and in the
negative lags if sA is advanced compared to signal B.
Parameters Parameters
---------- ----------
...@@ -45,12 +50,40 @@ class crosscorr(PostProcessABC): ...@@ -45,12 +50,40 @@ class crosscorr(PostProcessABC):
trace_dfB: dataframe (required for cross-correlation only) trace_dfB: dataframe (required for cross-correlation only)
An array of signal values, with each row a replicate measurement An array of signal values, with each row a replicate measurement
and each column a time point. and each column a time point.
normalised: boolean (optional)
If True, normalise the result for each replicate by the standard
deviation over time for that replicate.
only_pos: boolean (optional)
If True, return results only for positive lags.
Returns Returns
------- -------
norm_corr: array or aliby dataframe corr: dataframe
An array of the correlations with each row the result for the An array of the correlations with each row the result for the
corresponding replicate and each column a time point corresponding replicate and each column a time point
lags: array
A 1D array of the lags in multiples of the unknown time interval
Example
-------
>>> import matplotlib.pyplot as plt
>>> import numpy as np
>>> import pandas as pd
>>> from postprocessor.core.multisignal.crosscorr import crosscorr
Define a sine signal with 200 time points and 333 replicates
>>> t = np.linspace(0, 4, 200)
>>> ts = np.tile(t, 333).reshape((333, 200))
>>> s = 3*np.sin(2*np.pi*ts + 2*np.pi*np.random.rand(333, 1))
>>> s_df = pd.DataFrame(s)
Find and plot the autocorrelaton
>>> ac = crosscorr.as_function(s_df)
>>> plt.figure()
>>> plt.plot(ac.columns, ac.mean(axis=0, skipna=True))
>>> plt.show()
""" """
df = ( df = (
...@@ -60,11 +93,12 @@ class crosscorr(PostProcessABC): ...@@ -60,11 +93,12 @@ class crosscorr(PostProcessABC):
) )
# convert from aliby dataframe to arrays # convert from aliby dataframe to arrays
trace_A = trace_dfA.to_numpy() trace_A = trace_dfA.to_numpy()
# number of time points
n_tps = trace_A.shape[1]
# number of replicates # number of replicates
n_replicates = trace_A.shape[0] n_replicates = trace_A.shape[0]
# deviation from mean at each time point # number of time points
n_tps = trace_A.shape[1]
# deviation from the mean, where the mean is calculated over replicates
# at each time point, which allows for non-stationary behaviour
dmean_A = trace_A - np.nanmean(trace_A, axis=0).reshape((1, n_tps)) dmean_A = trace_A - np.nanmean(trace_A, axis=0).reshape((1, n_tps))
# standard deviation over time for each replicate # standard deviation over time for each replicate
stdA = np.sqrt( stdA = np.sqrt(
...@@ -82,14 +116,35 @@ class crosscorr(PostProcessABC): ...@@ -82,14 +116,35 @@ class crosscorr(PostProcessABC):
dmean_B = dmean_A dmean_B = dmean_A
stdB = stdA stdB = stdA
# calculate correlation # calculate correlation
corr = np.nan * np.ones(trace_A.shape) # lag r runs over positive lags
# lag r runs over time points pos_corr = np.nan * np.ones(trace_A.shape)
for r in np.arange(0, n_tps): for r in range(n_tps):
prods = [ prods = [
dmean_A[:, self.lagtime] * dmean_B[:, self.lagtime + r] dmean_A[:, lagtime] * dmean_B[:, lagtime + r]
for self.lagtime in range(n_tps - r) for lagtime in range(n_tps - r)
] ]
corr[:, r] = np.nansum(prods, axis=0) / (n_tps - r) pos_corr[:, r] = np.nanmean(prods, axis=0)
norm_corr = np.array(corr) / stdA / stdB # lag r runs over negative lags
# return as a df if trace_dfA is a df else as an array # use corr_AB(-k) = corr_BA(k)
return pd.DataFrame(norm_corr, index=df.index, columns=df.columns) neg_corr = np.nan * np.ones(trace_A.shape)
for r in range(n_tps):
prods = [
dmean_B[:, lagtime] * dmean_A[:, lagtime + r]
for lagtime in range(n_tps - r)
]
neg_corr[:, r] = np.nanmean(prods, axis=0)
if self.normalised:
# normalise by standard deviation
pos_corr = pos_corr / stdA / stdB
neg_corr = neg_corr / stdA / stdB
# combine lags
lags = np.arange(-n_tps + 1, n_tps)
corr = np.hstack((np.flip(neg_corr[:, 1:], axis=1), pos_corr))
if self.only_pos:
return pd.DataFrame(
corr[:, int(lags.size / 2) :],
index=df.index,
columns=lags[int(lags.size / 2) :],
)
else:
return pd.DataFrame(corr, index=df.index, columns=lags)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment