Skip to content
Snippets Groups Projects
  • Alán Muñoz's avatar
    89a9be7f
    Updated from master and add Gaussian Process. · 89a9be7f
    Alán Muñoz authored
    Gaussian Process currently fixed to growth rate, could be used for
    any signal in theory so the names might be changed later.
    Also edited the processor so it can deal with a process that returns
    multiple signals.
    
    
    Former-commit-id: cd1322c7e17c1e041d9cb99186ca26c8451935c5
    89a9be7f
    History
    Updated from master and add Gaussian Process.
    Alán Muñoz authored
    Gaussian Process currently fixed to growth rate, could be used for
    any signal in theory so the names might be changed later.
    Also edited the processor so it can deal with a process that returns
    multiple signals.
    
    
    Former-commit-id: cd1322c7e17c1e041d9cb99186ca26c8451935c5
gpsignal.py 3.00 KiB
"""Gaussian process fit of a Signal."""
import logging

from postprocessor.core.processes.base import ParametersABC, ProcessABC
import numpy as np
import pandas as pd

import gaussian_processes.gaussianprocess as gp

def estimate_gr(volume, dt, noruns, bounds, verbose):
    """
    Parameters
    ----------

    volume : pd.Series
        The volume series of a given cell
    dt : float
        The time interval in hours
    noruns : int
        The number of runs for optimisation
    bounds : dict
        The hyperparameter bounds used for optimisation
    verbose : bool
        If True, prints results

    Returns
    -------
    """
    volume = volume.values
    n = len(volume)
    idx = np.arange(n)
    t = idx * dt
    y = volume[volume > 0]
    x = t[volume > 0]
    idx = idx[volume > 0]
    # Fit the GP
    mg = gp.maternGP(bounds, x, y)
    mg.findhyperparameters(noruns=noruns)
    if verbose:
        mg.results()  # Prints out the hyperparameters
    mg.predict(x, derivs=2)  # Saves the predictions to object
    # Save the predictions to a csv file so they can be reused
    results = dict(time=mg.x, volume=mg.y, fit_time=mg.xnew, fit_volume=mg.f,
                   growth_rate=mg.df, d_growth_rate=mg.ddf,
                   volume_var=mg.fvar, growth_rate_var=mg.dfvar,
                   d_growth_rate_var=mg.ddfvar)
    for name, value in results.items():
        results[name] = np.full((n, ), np.nan)
        results[name][idx] = value
    return results

# Give that to a writer: NOTE the writer needs to learn how to write the
# output of a process that returns multiple signals like this one does.

class gpParameters(ParametersABC):
    default_dict = dict(dt=5,
             noruns=5,
             bounds={0: (-2, 3),
                     1: (-2, 1),
                     2: (-4, -1)},
             verbose=False)
    def __init__(self, dt, noruns, bounds, verbose):
        """
        Parameters
        ----------
            dt : float
                The time step between time points, in minutes
            noruns : int
                The number of times the optimisation is tried
            bounds : dict
                Hyperparameter bounds for the Matern Kernel
            verbose : bool
                Determines whether to print hyperparameter results
        """
        self.dt = dt
        self.noruns = noruns
        self.bounds = bounds
        self.verbose = verbose

    @classmethod
    def default(cls):
        return cls.from_dict(cls.default_dict)


class GPSignal(ProcessABC):
    """Gaussian process fit of a Signal.
    """
    def __init__(self, parameters: gpParameters):
        super().__init__(parameters)

    def run(self, signal: pd.DataFrame):
        results = signal.apply(lambda x: estimate_gr(x,
                                                     **self.parameters.to_dict()),
                               result_type='expand').T
        multi_signal = {name: pd.DataFrame(np.vstack(results[name]))
                        for name in results.columns}
        return multi_signal