From 89a9be7fa5e5538a7a78299ca0d905130f043a8c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Al=C3=A1n=20Mu=C3=B1oz?= <alan.munoz@ed.ac.uk>
Date: Tue, 5 Oct 2021 23:00:56 +0100
Subject: [PATCH] Updated from master and add Gaussian Process.

Gaussian Process currently fixed to growth rate, could be used for
any signal in theory so the names might be changed later.
Also edited the processor so it can deal with a process that returns
multiple signals.


Former-commit-id: cd1322c7e17c1e041d9cb99186ca26c8451935c5
---
 __init__.py                |  0
 core/processes/base.py     | 55 +--------------------
 core/processes/dsignal.py  |  1 -
 core/processes/gpsignal.py | 97 ++++++++++++++++++++++++++++++++++++++
 core/processes/merger.py   |  2 +-
 core/processes/picker.py   |  4 +-
 core/processor.py          | 20 +++++---
 7 files changed, 115 insertions(+), 64 deletions(-)
 create mode 100644 __init__.py
 create mode 100644 core/processes/gpsignal.py

diff --git a/__init__.py b/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/core/processes/base.py b/core/processes/base.py
index 9dadb047..142bd6c3 100644
--- a/core/processes/base.py
+++ b/core/processes/base.py
@@ -1,54 +1 @@
-from pathlib import Path, PosixPath
-from typing import Union
-from abc import ABC, abstractmethod
-
-from yaml import safe_load, dump
-
-
-class ParametersABC(ABC):
-    """
-    Base class to add yaml functionality to parameters
-
-    """
-
-    def to_dict(self):
-        return self.__dict__
-
-    @classmethod
-    def from_dict(cls, d: dict):
-        return cls(**d)
-
-    def to_yaml(self, path: Union[PosixPath, str] = None):
-        if path:
-            with open(Path(path), "w") as f:
-                dump(self.to_dict(), f)
-        return dump(self.to_dict())
-
-    @classmethod
-    def from_yaml(cls, path: Union[PosixPath, str]):
-        with open(Path(file)) as f:
-            params = safe_load(f)
-        return cls(**params)
-
-    @classmethod
-    @abstractmethod
-    def default(cls):
-        pass
-
-
-class ProcessABC(ABC):
-    "Base class for processes"
-
-    def __init__(self, parameters):
-        self._parameters = parameters
-
-        for k, v in parameters.to_dict().items():  # access parameters directly
-            setattr(self, k, v)
-
-    @property
-    def parameters(self):
-        return self._parameters
-
-    @abstractmethod
-    def run(self):
-        pass
+from agora.base import ParametersABC, ProcessABC
\ No newline at end of file
diff --git a/core/processes/dsignal.py b/core/processes/dsignal.py
index c5960757..484ad599 100644
--- a/core/processes/dsignal.py
+++ b/core/processes/dsignal.py
@@ -1,7 +1,6 @@
 import pandas as pd
 
 from postprocessor.core.processes.base import ParametersABC, ProcessABC
-from postprocessor.core.functions.tracks import clean_tracks, merge_tracks, join_tracks
 
 
 class dsignalParameters(ParametersABC):
diff --git a/core/processes/gpsignal.py b/core/processes/gpsignal.py
new file mode 100644
index 00000000..f6cdb45f
--- /dev/null
+++ b/core/processes/gpsignal.py
@@ -0,0 +1,97 @@
+"""Gaussian process fit of a Signal."""
+import logging
+
+from postprocessor.core.processes.base import ParametersABC, ProcessABC
+import numpy as np
+import pandas as pd
+
+import gaussian_processes.gaussianprocess as gp
+
+def estimate_gr(volume, dt, noruns, bounds, verbose):
+    """
+    Parameters
+    ----------
+
+    volume : pd.Series
+        The volume series of a given cell
+    dt : float
+        The time interval in hours
+    noruns : int
+        The number of runs for optimisation
+    bounds : dict
+        The hyperparameter bounds used for optimisation
+    verbose : bool
+        If True, prints results
+
+    Returns
+    -------
+    """
+    volume = volume.values
+    n = len(volume)
+    idx = np.arange(n)
+    t = idx * dt
+    y = volume[volume > 0]
+    x = t[volume > 0]
+    idx = idx[volume > 0]
+    # Fit the GP
+    mg = gp.maternGP(bounds, x, y)
+    mg.findhyperparameters(noruns=noruns)
+    if verbose:
+        mg.results()  # Prints out the hyperparameters
+    mg.predict(x, derivs=2)  # Saves the predictions to object
+    # Save the predictions to a csv file so they can be reused
+    results = dict(time=mg.x, volume=mg.y, fit_time=mg.xnew, fit_volume=mg.f,
+                   growth_rate=mg.df, d_growth_rate=mg.ddf,
+                   volume_var=mg.fvar, growth_rate_var=mg.dfvar,
+                   d_growth_rate_var=mg.ddfvar)
+    for name, value in results.items():
+        results[name] = np.full((n, ), np.nan)
+        results[name][idx] = value
+    return results
+
+# Give that to a writer: NOTE the writer needs to learn how to write the
+# output of a process that returns multiple signals like this one does.
+
+class gpParameters(ParametersABC):
+    default_dict = dict(dt=5,
+             noruns=5,
+             bounds={0: (-2, 3),
+                     1: (-2, 1),
+                     2: (-4, -1)},
+             verbose=False)
+    def __init__(self, dt, noruns, bounds, verbose):
+        """
+        Parameters
+        ----------
+            dt : float
+                The time step between time points, in minutes
+            noruns : int
+                The number of times the optimisation is tried
+            bounds : dict
+                Hyperparameter bounds for the Matern Kernel
+            verbose : bool
+                Determines whether to print hyperparameter results
+        """
+        self.dt = dt
+        self.noruns = noruns
+        self.bounds = bounds
+        self.verbose = verbose
+
+    @classmethod
+    def default(cls):
+        return cls.from_dict(cls.default_dict)
+
+
+class GPSignal(ProcessABC):
+    """Gaussian process fit of a Signal.
+    """
+    def __init__(self, parameters: gpParameters):
+        super().__init__(parameters)
+
+    def run(self, signal: pd.DataFrame):
+        results = signal.apply(lambda x: estimate_gr(x,
+                                                     **self.parameters.to_dict()),
+                               result_type='expand').T
+        multi_signal = {name: pd.DataFrame(np.vstack(results[name]))
+                        for name in results.columns}
+        return multi_signal
diff --git a/core/processes/merger.py b/core/processes/merger.py
index 1fa07b91..f7e90a5c 100644
--- a/core/processes/merger.py
+++ b/core/processes/merger.py
@@ -1,4 +1,4 @@
-from postprocessor.core.processes.base import ParametersABC, ProcessABC
+from agora.base import ParametersABC, ProcessABC
 from postprocessor.core.functions.tracks import get_joinable
 
 
diff --git a/core/processes/picker.py b/core/processes/picker.py
index 2c057df9..3668e922 100644
--- a/core/processes/picker.py
+++ b/core/processes/picker.py
@@ -6,7 +6,7 @@ import pandas as pd
 
 from core.cells import CellsHDF
 
-from postprocessor.core.processes.base import ParametersABC, ProcessABC
+from agora.base import ParametersABC, ProcessABC
 from postprocessor.core.functions.tracks import max_ntps, max_nonstop_ntps
 
 
@@ -106,7 +106,7 @@ class picker(ProcessABC):
     ):
         threshold_asint = _as_int(threshold, signals.shape[1])
         case_mgr = {
-            "present": signals.apply(max_ntps, axis=1) > threshold_asint,
+            "present": signals.notna().sum(axis=1) > threshold_asint,
             "nonstoply_present": signals.apply(max_nonstop_ntps, axis=1)
             > threshold_asint,
             "quantile": [np.quantile(signals.values[signals.notna()], threshold)],
diff --git a/core/processor.py b/core/processor.py
index e5eb4995..239edd99 100644
--- a/core/processor.py
+++ b/core/processor.py
@@ -5,13 +5,13 @@ from pydoc import locate
 import numpy as np
 import pandas as pd
 
-from postprocessor.core.processes.base import ParametersABC
-from postprocessor.core.processes.merger import mergerParameters, merger
-from postprocessor.core.processes.picker import pickerParameters, picker
+from agora.base import ParametersABC
 from core.io.writer import Writer
 from core.io.signal import Signal
 
 from core.cells import Cells
+from postprocessor.core.processes.merger import mergerParameters, merger
+from postprocessor.core.processes.picker import pickerParameters, picker
 
 
 class PostProcessorParameters(ParametersABC):
@@ -165,9 +165,17 @@ class PostProcessor:
                 else:
                     raise ("Outpath not defined", type(dataset))
 
-                self.write_result(
-                    "/postprocessing/" + process + "/" + outpath, result, metadata={}
-                )
+                if isinstance(result, dict): # Multiple Signals as output
+                    for k, v in result:
+                        self.write_result(
+                            "/postprocessing/" + process + "/" + outpath +
+                            f'/{k}',
+                            v, metadata={}
+                        )
+                else:
+                    self.write_result(
+                        "/postprocessing/" + process + "/" + outpath, result, metadata={}
+                    )
 
     def write_result(
         self, path: str, result: Union[List, pd.DataFrame, np.ndarray], metadata: Dict
-- 
GitLab