diff --git a/__init__.py b/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/core/__init__.py b/core/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e5a0d9b4834ec8f46d6e0d1256c6dcaad2e460fe
--- /dev/null
+++ b/core/__init__.py
@@ -0,0 +1 @@
+#!/usr/bin/env python3
diff --git a/core/functions/__init__.py b/core/functions/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e5a0d9b4834ec8f46d6e0d1256c6dcaad2e460fe
--- /dev/null
+++ b/core/functions/__init__.py
@@ -0,0 +1 @@
+#!/usr/bin/env python3
diff --git a/core/functions/test_hdf.py b/core/functions/test_hdf.py
new file mode 100644
index 0000000000000000000000000000000000000000..5bd3b1a12e9b08743b8058d17ef4bb23719ac6a4
--- /dev/null
+++ b/core/functions/test_hdf.py
@@ -0,0 +1,19 @@
+#!/usr/bin/env python3
+# c=CellsHDF.from_source("/home/alan/Documents/sync_docs/PhD/tmp/DO6MS2_003store.h5")
+import h5py
+from core.cells import Cells
+import pandas as pd
+
+# f = h5py.File("/home/alan/Documents/sync_docs/PhD/tmp/DO6MS2_003store.h5")
+fname = "/shared_libs/pipeline-core/scripts/data/20191026_ss_experiments_01/DO6MS2_003store.h5"
+f = h5py.File(fname)
+tracks = f["/extraction/general/None/area"][()]
+cell = Cells.from_source(fname)
+from postprocessor.core.processes.picker import Picker, PickerParameters
+
+picker = Picker(cells=cell, parameters=PickerParameters.default())
+
+from postprocessor.core.processor import PostProcessor, PostProParameters
+
+pp = PostProcessor(filename=fname, parameters=PostProParameters.default())
+pp.run()
diff --git a/core/functions/tracks.py b/core/functions/tracks.py
new file mode 100644
index 0000000000000000000000000000000000000000..e47e60231da31e2c23acf5e97f5b721edbffdfee
--- /dev/null
+++ b/core/functions/tracks.py
@@ -0,0 +1,429 @@
+"""
+Functions to process, filter and merge tracks.
+"""
+
+# from collections import Counter
+
+from copy import copy
+from typing import Union, List
+
+import numpy as np
+import pandas as pd
+from utils_find_1st import find_1st, cmp_larger
+
+import more_itertools as mit
+from scipy.signal import savgol_filter
+
+# from scipy.optimize import linear_sum_assignment
+# from scipy.optimize import curve_fit
+
+from matplotlib import pyplot as plt
+
+
+def load_test_dset():
+    # Load development dataset to test functions
+    return pd.DataFrame(
+        {
+            ("a", 1, 1): [2, 5, np.nan, 6, 8] + [np.nan] * 5,
+            ("a", 1, 2): list(range(2, 12)),
+            ("a", 1, 3): [np.nan] * 8 + [6, 7],
+            ("a", 1, 4): [np.nan] * 5 + [9, 12, 10, 14, 18],
+        },
+        index=range(1, 11),
+    ).T
+
+
+def max_ntps(track: pd.Series) -> int:
+    # Get number of timepoints
+    indices = np.where(track.notna())
+    return np.max(indices) - np.min(indices)
+
+
+def max_nonstop_ntps(track: pd.Series) -> int:
+    nona_tracks = track.notna()
+    consecutive_nonas_grouped = [
+        len(list(x)) for x in mit.consecutive_groups(np.flatnonzero(nona_tracks))
+    ]
+    return max(consecutive_nonas_grouped)
+
+
+def get_tracks_ntps(tracks: pd.DataFrame) -> pd.Series:
+    return tracks.apply(max_ntps, axis=1)
+
+
+def get_avg_gr(track: pd.Series) -> int:
+    """
+    Get average growth rate for a track.
+
+    :param tracks: Series with volume and timepoints as indices
+    """
+    ntps = max_ntps(track)
+    vals = track.dropna().values
+    gr = (vals[-1] - vals[0]) / ntps
+    return gr
+
+
+def get_avg_grs(tracks: pd.DataFrame) -> pd.DataFrame:
+    """
+    Get average growth rate for a group of tracks
+
+    :param tracks: (m x n) dataframe where rows are cell tracks and
+        columns are timepoints
+    """
+    return tracks.apply(get_avg_gr, axis=1)
+
+
+def clean_tracks(tracks, min_len: int = 15, min_gr: float = 1.0) -> pd.DataFrame:
+    """
+    Clean small non-growing tracks and return the reduced dataframe
+
+    :param tracks: (m x n) dataframe where rows are cell tracks and
+        columns are timepoints
+    :param min_len: int number of timepoints cells must have not to be removed
+    :param min_gr: float Minimum mean growth rate to assume an outline is growing
+    """
+    ntps = get_tracks_ntps(tracks)
+    grs = get_avg_grs(tracks)
+
+    growing_long_tracks = tracks.loc[(ntps >= min_len) & (grs > min_gr)]
+
+    return growing_long_tracks
+
+
+def merge_tracks(tracks, drop=False, **kwargs) -> pd.DataFrame:
+    """
+    Join tracks that are contiguous and within a volume threshold of each other
+
+    :param tracks: (m x n) dataframe where rows are cell tracks and
+        columns are timepoints
+    :param kwargs: args passed to get_joinable
+
+    returns
+
+    :joint_tracks: (m x n) Dataframe where rows are cell tracks and
+        columns are timepoints. Merged tracks are still present but filled
+        with np.nans.
+    """
+
+    # calculate tracks that can be merged until no more traps can be merged
+    joinable_pairs = get_joinable(tracks, **kwargs)
+    if joinable_pairs:
+        tracks = join_tracks(tracks, joinable_pairs, drop=drop)
+    joint_ids = get_joint_ids(joinable_pairs)
+
+    return (tracks, joinable_pairs)
+
+
+def get_joint_ids(merging_seqs) -> dict:
+    """
+    Convert a series of merges into a dictionary where
+    the key is the cell_id of destination and the value a list
+    of the other track ids that were merged into the key
+
+    :param merging_seqs: list of tuples of indices indicating the
+    sequence of merging events. It is important for this to be in sequential order
+
+    How it works:
+
+    The order of merging matters for naming, always the leftmost track will keep the id
+
+    For example, having tracks (a, b, c, d) and the iterations of merge events:
+
+    0 a b c d
+    1 a b cd
+    2 ab cd
+    3 abcd
+
+    We shold get:
+
+    output {a:a, b:a, c:a, d:a}
+
+    """
+    if not merging_seqs:
+        return {}
+
+    targets, origins = list(zip(*merging_seqs))
+    static_tracks = set(targets).difference(origins)
+
+    joint = {track_id: track_id for track_id in static_tracks}
+    for target, origin in merging_seqs:
+        joint[origin] = target
+
+    moved_target = [
+        k for k, v in joint.items() if joint[v] != v and v in joint.values()
+    ]
+
+    for orig in moved_target:
+        joint[orig] = rec_bottom(joint, orig)
+
+    return {
+        k: v for k, v in joint.items() if k != v
+    }  # remove ids that point to themselves
+
+
+def rec_bottom(d, k):
+    if d[k] == k:
+        return k
+    else:
+        return rec_bottom(d, d[k])
+
+
+def join_tracks(tracks, joinable_pairs, drop=True) -> pd.DataFrame:
+    """
+    Join pairs of tracks from later tps towards the start.
+
+    :param tracks: (m x n) dataframe where rows are cell tracks and
+        columns are timepoints
+
+    returns (copy)
+
+    :param joint_tracks: (m x n) Dataframe where rows are cell tracks and
+        columns are timepoints. Merged tracks are still present but filled
+        with np.nans.
+    :param drop: bool indicating whether or not to drop moved rows
+
+    """
+
+    tmp = copy(tracks)
+    for target, source in joinable_pairs:
+        tmp.loc[target] = join_track_pair(tmp.loc[target], tmp.loc[source])
+
+        if drop:
+            tmp = tmp.drop(source)
+
+    return tmp
+
+
+def join_track_pair(target, source):
+    tgt_copy = copy(target)
+    end = find_1st(target.values[::-1], 0, cmp_larger)
+    tgt_copy.iloc[-end:] = source.iloc[-end:].values
+    return tgt_copy
+
+
+def get_joinable(tracks, smooth=False, tol=0.1, window=5, degree=3) -> dict:
+    """
+    Get the pair of track (without repeats) that have a smaller error than the
+    tolerance. If there is a track that can be assigned to two or more other
+    ones, it chooses the one with a lowest error.
+
+    :param tracks: (m x n) dataframe where rows are cell tracks and
+        columns are timepoints
+    :param tol: float or int threshold of average (prediction error/std) necessary
+        to consider two tracks the same. If float is fraction of first track,
+        if int it is absolute units.
+    :param window: int value of window used for savgol_filter
+    :param degree: int value of polynomial degree passed to savgol_filter
+
+    """
+
+    # Commented because we are not smoothing in this step yet
+    # candict = {k:v for d in contig.values for k,v in d.items()}
+
+    # smooth all relevant tracks
+
+    if smooth:  # Apply savgol filter TODO fix nans affecting edge placing
+        clean = clean_tracks(
+            tracks, min_len=window + 1, min_gr=0.9
+        )  # get useful tracks
+        savgol_on_srs = lambda x: non_uniform_savgol(x.index, x.values, window, degree)
+        contig = clean.groupby(["trap"]).apply(get_contiguous_pairs)
+        contig = contig.loc[contig.apply(len) > 0]
+        flat = set([k for v in contig.values for i in v for j in i for k in j])
+        smoothed_tracks = clean.loc[flat].apply(savgol_on_srs, 1)
+    else:
+        contig = tracks.groupby(["trap"]).apply(get_contiguous_pairs)
+        contig = contig.loc[contig.apply(len) > 0]
+        flat = set([k for v in contig.values for i in v for j in i for k in j])
+        smoothed_tracks = tracks.loc[flat].apply(lambda x: np.array(x.values), axis=1)
+
+    # fetch edges from ids TODO (IF necessary, here we can compare growth rates)
+    idx_to_edge = lambda preposts: [
+        (
+            [get_val(smoothed_tracks.loc[pre], -1) for pre in pres],
+            [get_val(smoothed_tracks.loc[post], 0) for post in posts],
+        )
+        for pres, posts in preposts
+    ]
+    edges = contig.apply(idx_to_edge)
+
+    closest_pairs = edges.apply(get_vec_closest_pairs, tol=tol)
+
+    # match local with global ids
+    joinable_ids = [
+        localid_to_idx(closest_pairs.loc[i], contig.loc[i]) for i in closest_pairs.index
+    ]
+
+    return [pair for pairset in joinable_ids for pair in pairset]
+
+
+get_val = lambda x, n: x[~np.isnan(x)][n] if len(x[~np.isnan(x)]) else np.nan
+
+
+def localid_to_idx(local_ids, contig_trap):
+    """Fetch then original ids from a nested list with joinable local_ids
+
+    input
+    :param local_ids: list of list of pairs with cell ids to be joint
+    :param local_ids: list of list of pairs with corresponding cell ids
+
+    return
+    list of pairs with (experiment-level) ids to be joint
+    """
+    lin_pairs = []
+    for i, pairs in enumerate(local_ids):
+        if len(pairs):
+            for left, right in pairs:
+                lin_pairs.append((contig_trap[i][0][left], contig_trap[i][1][right]))
+    return lin_pairs
+
+
+def get_vec_closest_pairs(lst: List, **kwags):
+    return [get_closest_pairs(*l, **kwags) for l in lst]
+
+
+def get_closest_pairs(pre: List[float], post: List[float], tol: Union[float, int] = 1):
+    """Calculate a cost matrix the Hungarian algorithm to pick the best set of
+    options
+
+    input
+    :param pre: list of floats with edges on left
+    :param post: list of floats with edges on right
+    :param tol: int or float if int metrics of tolerance, if float fraction
+
+    returns
+    :: list of indices corresponding to the best solutions for matrices
+
+    """
+    if len(pre) > len(post):
+        dMetric = np.abs(np.subtract.outer(post, pre))
+    else:
+        dMetric = np.abs(np.subtract.outer(pre, post))
+    # dMetric[np.isnan(dMetric)] = tol + 1 + np.nanmax(dMetric) # nans will be filtered
+    # ids = linear_sum_assignment(dMetric)
+    dMetric[np.isnan(dMetric)] = tol + 1 + np.nanmax(dMetric)  # nans will be filtered
+
+    ids = solve_matrix(dMetric)
+    if not len(ids[0]):
+        return []
+
+    norm = (
+        np.array(pre)[ids[len(pre) > len(post)]] if tol < 1 else 1
+    )  # relative or absolute tol
+    result = dMetric[ids] / norm
+    ids = ids if len(pre) < len(post) else ids[::-1]
+
+    return [idx for idx, res in zip(zip(*ids), result) if res <= tol]
+
+
+def solve_matrix(dMetric):
+    """
+    Solve cost matrix focusing on getting the smallest cost at each iteration.
+
+    input
+    :param dMetric: np.array cost matrix
+
+    returns
+    tuple of np.arrays indicating picks with lowest individual value
+    """
+    glob_is = []
+    glob_js = []
+    if (~np.isnan(dMetric)).any():
+        tmp = copy(dMetric)
+        std = sorted(tmp[~np.isnan(tmp)])
+        while (~np.isnan(std)).any():
+            v = std[0]
+            i_s, j_s = np.where(tmp == v)
+            i = i_s[0]
+            j = j_s[0]
+            tmp[i, :] += np.nan
+            tmp[:, j] += np.nan
+            glob_is.append(i)
+            glob_js.append(j)
+
+            std = sorted(tmp[~np.isnan(tmp)])
+
+    return (np.array(glob_is), np.array(glob_js))
+
+
+def plot_joinable(tracks, joinable_pairs, max=64):
+    """
+    Convenience plotting function for debugging and data vis
+    """
+
+    nx = 8
+    ny = 8
+    _, axes = plt.subplots(nx, ny)
+    for i in range(nx):
+        for j in range(ny):
+            if i * ny + j < len(joinable_pairs):
+                ax = axes[i, j]
+                pre, post = joinable_pairs[i * ny + j]
+                pre_srs = tracks.loc[pre].dropna()
+                post_srs = tracks.loc[post].dropna()
+                ax.plot(pre_srs.index, pre_srs.values, "b")
+                # try:
+                #     totrange = np.arange(pre_srs.index[0],post_srs.index[-1])
+                #     ax.plot(totrange, interpolate(pre_srs, totrange), 'r-')
+                # except:
+                #     pass
+                ax.plot(post_srs.index, post_srs.values, "g")
+
+    plt.show()
+
+
+def get_contiguous_pairs(tracks: pd.DataFrame) -> list:
+    """
+    Get all pair of contiguous track ids from a tracks dataframe.
+
+    :param tracks: (m x n) dataframe where rows are cell tracks and
+        columns are timepoints
+    :param min_dgr: float minimum difference in growth rate from the interpolation
+    """
+    # indices = np.where(tracks.notna())
+
+    mins, maxes = [
+        tracks.notna().apply(np.where, axis=1).apply(fn) for fn in (np.min, np.max)
+    ]
+
+    mins_d = mins.groupby(mins).apply(lambda x: x.index.tolist())
+    mins_d.index = mins_d.index - 1  # make indices equal
+    # TODO add support for skipping time points
+    maxes_d = maxes.groupby(maxes).apply(lambda x: x.index.tolist())
+
+    common = sorted(set(mins_d.index).intersection(maxes_d.index), reverse=True)
+
+    return [(maxes_d[t], mins_d[t]) for t in common]
+
+
+# def fit_track(track: pd.Series, obj=None):
+#     if obj is None:
+#         obj = objective
+
+#     x = track.dropna().index
+#     y = track.dropna().values
+#     popt, _ = curve_fit(obj, x, y)
+
+#     return popt
+
+# def interpolate(track, xs) -> list:
+#     '''
+#     Interpolate next timepoint from a track
+
+#     :param track: pd.Series of volume growth over a time period
+#     :param t: int timepoint to interpolate
+#     '''
+#     popt = fit_track(track)
+#     # perr = np.sqrt(np.diag(pcov))
+#     return objective(np.array(xs), *popt)
+
+
+# def objective(x,a,b,c,d) -> float:
+#     # return (a)/(1+b*np.exp(c*x))+d
+#     return (((x+d)*a)/((x+d)+b))+c
+
+# def cand_pairs_to_dict(candidates):
+#     d={x:[] for x,_ in candidates}
+#     for x,y in candidates:
+#         d[x].append(y)
+#     return d
diff --git a/core/group.py b/core/group.py
new file mode 100644
index 0000000000000000000000000000000000000000..99cf00cf775f034d69be6f29fd7d9bb77dba5f54
--- /dev/null
+++ b/core/group.py
@@ -0,0 +1,130 @@
+"""
+Class to group multiple positions into one using one of several criteria.
+"""
+
+from pathlib import Path
+import re
+
+import h5py
+import pandas as pd
+
+from postprocessor.core.io.base import groupsort
+from postprocessor.core.io.signal import Signal
+
+from postprocessor.core.processes.base import ParametersABC, ProcessABC
+
+
+class GroupParameters(ParametersABC):
+    def __init__(self, by="name", processes=[], signals=[]):
+        self.by = by
+        self.signals = signals
+        self.processes = processes
+
+    @classmethod
+    def default(cls):
+        return cls.from_dict({"by": "name", "signals": [], "processes": []})
+
+
+class Group(ProcessABC):
+    def __init__(self, parameters):
+        super().__init__(parameters)
+
+    def get_position_filenames(self, exp_root, poses):
+        """
+        Get filenames as a dictionary where the key is the position and value the filename.
+        """
+        central_store = Path(exp_root) / "store.h5"
+        if central_store.exists():
+            hdf = h5py.File(central_store, "r")
+            self.filenames = [pos.attrs["filename"] for pos in hdf["/positions/"]]
+            hdf.close()
+        else:  # If no central store just list position files in expt root folder
+            fullfiles = [x for x in Path(exp_root).glob("*store.h5")]
+            files = [x.name for x in Path(exp_root).glob("*store.h5")]
+            filenames = [False for _ in poses]
+            for i, pos in enumerate(poses):
+                matches = [
+                    True if re.match(pos + ".*.h5", fname) else False for fname in files
+                ]
+                if any(matches):
+                    assert sum(matches) == 1, "More than one match"
+                    filenames[i] = (pos, fullfiles[matches.index(True)])
+
+            self.filenames = {fname[0]: fname[1] for fname in filenames if fname}
+
+        self.positions = list(self.filenames.keys())
+        return self.filenames
+
+    def get_signals(self):
+        # hdf = h5py.File(central_store, "r")
+        # keys_d = groupsort(keys)
+        self.signals = {pos: {} for pos in self.filenames.keys()}
+        for pos, fname in self.filenames.items():
+            for signal in self.parameters.signals:
+                self.signals[pos][signal] = pd.read_hdf(fname, signal)
+
+        return self.signals
+
+    def gen_groups(self):
+        if self.by == "group":  # Use group names in metadata
+            pass
+        elif self.by == "name":  # Infer groups from signal concatenation
+            # Remove last four characters and find commonalities larger than 4
+            # characters between posnames and group them that way.
+            groupnames = list(set([x[:-3] for x in self.positions]))
+            self.group_signal_tree = {group: [] for group in groupnames}
+            self.poses_grouped = {group: [] for group in groupnames}
+            for pos in self.positions:
+                group = groupnames[groupnames.index(pos[:-3])]
+                self.group_signal_tree[group].append(self.signals[pos])
+                self.poses_grouped[group].append(pos)
+
+        elif (
+            type(self.by) == tuple
+        ):  # Manually give groups as tuple or list of positions
+            pass
+
+    def concat_signals(self):
+        self.concated_signals = {group: {} for group in self.group_signal_tree}
+        for k, group in self.group_signal_tree.items():
+            for signal in self.parameters.signals:
+                self.concated_signals[k][signal] = pd.concat(
+                    [g[signal] for g in group], keys=self.poses_grouped[k]
+                )
+
+        return self.concated_signals
+
+    def process_signals(self, grouped_signals):
+        pass
+
+    def run(self, central_store, poses):
+
+        self.get_position_filenames(central_store, poses)
+        self.get_signals()
+        self.gen_groups()
+        self.concat_signals()
+        # processed_signals = self.process_signals(grouped_signals)
+
+        return self.concated_signals
+        # return processed_signals
+
+
+poses = [
+    x.name.split("store")[0]
+    for x in Path(
+        "/shared_libs/pipeline-core/scripts/data/ph_calibration_dual_phl_ura8_5_04_5_83_7_69_7_13_6_59__01"
+    ).rglob("*")
+    if x.name != "images.h5"
+]
+gr = Group(
+    GroupParameters(
+        signals=["/extraction/general/None/area", "/extraction/mCherry/np_max/median"]
+    )
+)
+gr.run(
+    central_store="/shared_libs/pipeline-core/scripts/data/ph_calibration_dual_phl_ura8_5_04_5_83_7_69_7_13_6_59__01",
+    poses=poses,
+)
+signal = Signal(
+    "/shared_libs/pipeline-core/scripts/data/ph_calibration_dual_phl_ura8_5_04_5_83_7_69_7_13_6_59__01/ph_5_04_001store.h5"
+)
diff --git a/core/old/cell.py b/core/old/cell.py
new file mode 100644
index 0000000000000000000000000000000000000000..f96f30d53e8bc462044eb40bb6417d1454c3f742
--- /dev/null
+++ b/core/old/cell.py
@@ -0,0 +1,23 @@
+"""
+Main data processing functions. The inputs for this functions must be data series.
+"""
+
+import numpy as np
+from pandas import Series, DataFrames
+
+def bg_sub(data:Series, background:float):
+    return data - background
+
+def ratio(data1:Series, data2:Series):
+    return data1 / data2
+
+def norm_tps(data:Series, tps:list, fun=np.nanmean):
+    return data / fun(data.loc(axis=1)[tps])
+
+def growth_rate(data:Series, alg=None, filt = 'savgol'):
+    if alg is None:
+        alg='standard'
+
+    
+
+    
diff --git a/core/old/ph.py b/core/old/ph.py
new file mode 100644
index 0000000000000000000000000000000000000000..a28a8055c8c64fe1ff84f9a6b5692f5d722bb260
--- /dev/null
+++ b/core/old/ph.py
@@ -0,0 +1,364 @@
+from typing import Dict, List, Union
+import re
+
+import numpy as np
+import pandas as pd
+from pandas import Series, DataFrame
+from sklearn.cluster import KMeans
+from matplotlib import pyplot as plt
+import seaborn as sns
+
+import compress_pickle
+
+from postprocessor.core.postprocessor import PostProcessor
+from postprocessor.core.tracks import get_avg_grs, non_uniform_savgol
+from postprocessor.core.ph import *
+
+
+def filter_by_gfp(dfs):
+    gfps = pd.concat([t[("GFPFast_bgsub", np.maximum, "median")] for t in dfs])
+    avgs_gfp = gfps.mean(axis=1)
+    high_gfp = get_high_k2(avgs_gfp)
+    # high_gfp = avgs_gfp[avgs_gfp > 200]
+
+    return high_gfp
+
+
+def filter_by_area(dfs, min=50):
+    areas = pd.concat([t[("general", None, "area")] for t in dfs])
+    avgs_areas = areas[(areas.notna().sum(axis=1) > areas.shape[1] // (1.25))].mean(
+        axis=1
+    )
+    avgs_areas = avgs_areas[(avgs_areas > min)]
+
+    return avgs_areas
+
+
+def get_high_k2(df):
+    kmeans = KMeans(n_clusters=2)
+    vals = df.values.reshape(-1, 1)
+    kmeans.fit(vals)
+    high_clust_id = kmeans.cluster_centers_.argmax()
+
+    return df.loc[kmeans.predict(vals) == high_clust_id]
+
+
+def get_concats(dfs, keys):
+    return pd.concat([t.get((keys), pd.DataFrame()) for t in dfs])
+
+
+def get_dfs(pp):
+    dfs = [pp.extraction[pp.expt.positions[i]] for i in range(len(pp.expt.positions))]
+    return dfs
+
+
+combine_dfs = lambda dfs: {k: get_concats(dfs, k) for k in dfs[0].keys()}
+
+
+def merge_channels(pp, min_area=50):
+    dfs = get_dfs(pp)
+    # rats = get_concats(dfs, ("em_ratio_bgsub", np.maximum, "median"))
+    # gfps = filter_by_gfp(dfs)
+
+    avgs_area = filter_by_area(dfs, min=50)
+    # ids = [x for x in set(gfps.index).intersection(avgs_area.index)]
+    ids = avgs_area.index
+
+    new_dfs = combine_dfs(dfs)
+
+    h = pd.DataFrame(
+        {
+            k[0] + "_" + k[2]: v.loc[ids].mean(axis=1)
+            for k, v in new_dfs.items()
+            if k[-1] != "imBackground"
+        }
+    )
+    return h
+
+
+def process_phs(pp, min_area=200):
+    h = merge_channels(pp, min_area)
+    h.index.names = ["pos", "trap", "cell"]
+    ids = h.index
+    h = h.reset_index()
+
+    h["ph"] = h["pos"].apply(lambda x: float(x[3:7].replace("_", ".")))
+    h["max5_d_med"] = h["mCherry_max2p5pc"] / h["mCherry_median"]
+
+    h = h.set_index(ids)
+    h = h.drop(["pos", "trap", "cell"], axis=1)
+    return h
+
+
+def growth_rate(
+    data: Series, alg=None, filt={"kind": "savgol", "window": 5, "degree": 3}
+):
+    window = filt["window"]
+    degree = filt["degree"]
+    if alg is None:
+        alg = "standard"
+
+    if filt:  # TODO add support for multiple algorithms
+        data = Series(
+            non_uniform_savgol(
+                data.dropna().index, data.dropna().values, window, degree
+            ),
+            index=data.dropna().index,
+        )
+
+    return Series(np.convolve(data, diff_kernel, "same"), index=data.dropna().index)
+
+
+# import numpy as np
+
+# diff_kernel = np.array([1, -1])
+# gr = clean.apply(growth_rate, axis=1)
+# from postprocessor.core.tracks import non_uniform_savgol, clean_tracks
+
+
+def sort_df(df, by="first", rev=True):
+    nona = df.notna()
+    if by == "len":
+        idx = nona.sum(axis=1)
+    elif by == "first":
+        idx = nona.idxmax(axis=1)
+    idx = idx.sort_values().index
+
+    if rev:
+        idx = idx[::-1]
+
+    return df.loc[idx]
+
+
+# test = tmp[("GFPFast", np.maximum, "median")]
+# test2 = tmp[("pHluorin405", np.maximum, "median")]
+# ph = test / test2
+# ph = ph.stack().reset_index(1)
+# ph.columns = ["tp", "fl"]
+
+
+def m2p5_med(ext, ch, red=np.maximum):
+    m2p5pc = ext[(ch, red, "max2p5pc")]
+    med = ext[(ch, red, "median")]
+
+    result = m2p5pc / med
+
+    return result
+
+
+def plot_avg(df):
+    df = df.stack().reset_index(1)
+    df.columns = ["tp", "val"]
+
+    sns.relplot(x=df["tp"], y=df["val"], kind="line")
+    plt.show()
+
+
+def split_data(df: DataFrame, splits: List[int]):
+    dfs = [df.iloc[:, i:j] for i, j in zip((0,) + splits, splits + (df.shape[1],))]
+    return dfs
+
+
+def growth_rate(
+    data: Series, alg=None, filt={"kind": "savgol", "window": 7, "degree": 3}
+):
+    if alg is None:
+        alg = "standard"
+
+    if filt:  # TODO add support for multiple algorithms
+        window = filt["window"]
+        degree = filt["degree"]
+        data = Series(
+            non_uniform_savgol(
+                data.dropna().index, data.dropna().values, window, degree
+            ),
+            index=data.dropna().index,
+        )
+
+    diff_kernel = np.array([1, -1])
+
+    return Series(np.convolve(data, diff_kernel, "same"), index=data.dropna().index)
+
+
+# pp = PostProcessor(source=19831)
+# pp.load_tiler_cells()
+# f = "/home/alan/Documents/sync_docs/libs/postproc/gluStarv_2_0_x2_dual_phl_ura8_00/extraction"
+# pp.load_extraction(
+#     "/home/alan/Documents/sync_docs/libs/postproc/postprocessor/"
+#     + pp.expt.name
+#     + "/extraction/"
+# )
+# tmp = pp.extraction["phl_ura8_002"]
+
+
+def _check_bg(data):
+    for k in list(pp.extraction.values())[0].keys():
+        for p in pp.expt.positions:
+            if k not in pp.extraction[p]:
+                print(p, k)
+
+
+# data = {
+#     k: pd.concat([pp.extraction[pos][k] for pos in pp.expt.positions[:-3]])
+#     for k in list(pp.extraction.values())[0].keys()
+# }
+
+
+def hmap(df, **kwargs):
+    g = sns.heatmap(sort_df(df), robust=True, cmap="mako_r", **kwargs)
+    plt.xlabel("")
+    return g
+
+
+# from random import randint
+# x = randint(0, len(smooth))
+# plt.plot(clean.iloc[x], 'b')
+# plt.plot(smooth.iloc[x], 'r')
+# plt.show()
+
+
+# data = tmp
+# df = data[("general", None, "area")]
+# clean = clean_tracks(df, min_len=160)
+# clean = clean.loc[clean.notna().sum(axis=1) > 9]
+# gr = clean.apply(growth_rate, axis=1)
+# splits = (72, 108, 180)
+# gr_sp = split_data(gr, splits)
+
+# idx = gr.index
+
+# bg = get_bg(data)
+# test = data[("GFPFast", np.maximum, "median")]
+# test2 = data[("pHluorin405", np.maximum, "median")]
+# ph = (test / test2).loc[idx]
+# c = pd.concat((ph.mean(1), gr.max(1)), axis=1)
+# c.columns = ["ph", "gr_max"]
+# # ph = ph.stack().reset_index(1)
+# # ph.columns = ['tp', 'fl']
+
+# ph_sp = split_data(gr, splits)
+
+
+def get_bg(data):
+    bg = {}
+    fl_subkeys = [
+        x
+        for x in data.keys()
+        if x[0] in ["GFP", "GFPFast", "mCherry", "pHluorin405"]
+        and x[-1] != "imBackground"
+    ]
+    for k in fl_subkeys:
+        nk = list(k)
+        bk = tuple(nk[:-1] + ["imBackground"])
+        nk = tuple(nk[:-1] + [nk[-1] + "_BgSub"])
+        tmp = []
+        for i, v in data[bk].iterrows():
+            if i in data[k].index:
+                newdf = data[k].loc[i] / v
+                newdf.index = pd.MultiIndex.from_tuples([(*i, c) for c in newdf.index])
+            tmp.append(newdf)
+        bg[nk] = pd.concat(tmp)
+
+    return bg
+
+
+def calc_ph(bg):
+    fl_subkeys = [x for x in bg.keys() if x[0] in ["GFP", "GFPFast", "pHluorin405"]]
+    chs = list(set([x[0] for x in fl_subkeys]))
+    assert len(chs) == 2, "Too many channels"
+    ch1 = [x[1:] for x in fl_subkeys if x[0] == chs[0]]
+    ch2 = [x[1:] for x in fl_subkeys if x[0] == chs[1]]
+    inter = list(set(ch1).intersection(ch2))
+    ph = {}
+    for red_fld in inter:
+        ph[tuple(("ph",) + red_fld)] = (
+            bg[tuple((chs[0],) + red_fld)] / bg[tuple((chs[1],) + red_fld)]
+        )
+
+
+def get_traps(pp):
+    t0 = {}
+    for pos in pp.tiler.positions:
+        pp.tiler.current_position = pos
+        t0[pos] = pp.tiler.get_traps_timepoint(
+            0, channels=[0, pp.tiler.channels.index("mCherry")], z=[0, 1, 2, 3, 4]
+        )
+
+    return t0
+
+
+def get_pos_ph(pp):
+    pat = re.compile(r"ph_([0-9]_[0-9][0-9])")
+    return {
+        pos: float(pat.findall(pos)[0].replace("_", ".")) for pos in pp.tiler.positions
+    }
+
+
+def plot_sample_bf_mch(pp):
+    bf_mch = get_traps(pp)
+    ts = [{i: v[:, j, ...] for i, v in bf_mch.items()} for j in [0, 1]]
+    tsbf = {i: v[:, 0, ...] for i, v in bf_mch.items()}
+
+    posdict = {k: v for k, v in get_pos_ph(pp).items()}
+    posdict = {v: k for k, v in posdict.items()}
+    posdict = {v: k for k, v in posdict.items()}
+    ph = np.unique(list(posdict.values())).tolist()
+    counters = {ph: 0 for ph in ph}
+    n = [np.random.randint(ts[0][k].shape[0]) for k in posdict.keys()]
+
+    fig, axes = plt.subplots(2, 5)
+    for k, (t, name) in enumerate(zip(ts, ["Bright field", "mCherry"])):
+        for i, (pos, ph) in enumerate(posdict.items()):
+            # i = ph.index(posdict[pos])
+            axes[k, i].grid(False)
+            axes[k, i].set(
+                xticklabels=[],
+                yticklabels=[],
+            )
+            axes[k, i].set_xlabel(posdict[pos] if k else None, fontsize=28)
+            axes[k, i].imshow(
+                np.maximum.reduce(t[pos][n[i], 0], axis=2),
+                cmap="gist_gray" if not k else None,
+            )
+            # counters[posdict[pos]] += 1
+        plt.tick_params(
+            axis="x",  # changes apply to the x-axis
+            which="both",  # both major and minor ticks are affected
+            bottom=False,  # ticks along the bottom edge are off
+            top=False,  # ticks along the top edge are off
+            labelbottom=False,
+        )  # labels along the bottom edge are off
+        axes[k, 0].set_ylabel(name, fontsize=28)
+    plt.tight_layout()
+    plt.show()
+
+
+# Plotting calibration curve
+from scipy.optimize import curve_fit
+
+
+def fit_calibration(h):
+    ycols = [x for x in h.columns if "em_ratio" in x]
+    xcol = "ph"
+
+    def objective(x, a, b):
+        return a * x + b
+
+    # fig, axes = plt.subplots(1, len(ycols))
+    # for i, ycol in enumerate(ycols):
+    #     d = h[[xcol, ycol]]
+    #     params, _ = curve_fit(objective, *[d[col].values for col in d.columns])
+    #     sns.lineplot(x=xcol, y=ycol, data=h, alpha=0.5, err_style="bars", ax=axes[i])
+    #     # sns.lineplot(d[xcol], objective(d[xcol].values, *params), ax=axes[i])
+    # plt.show()
+
+    ycol = "em_ratio_mean"
+    d = h[[xcol, *ycols]]
+    tmp = d.groupby("ph").mean()
+    calibs = {ycol: curve_fit(objective, tmp.index, tmp[ycol])[0] for ycol in ycols}
+    # sns.lineplot(x=xcol, y=ycol, data=d, alpha=0.5, err_style="bars")
+    # plt.xlabel("pH")
+    # plt.ylabel("pHluorin emission ratio")
+    # sns.lineplot(d[xcol], objective(d[xcol], *params))
+
+    return calibs
diff --git a/core/old/postprocessor.py b/core/old/postprocessor.py
new file mode 100644
index 0000000000000000000000000000000000000000..7e024d2656d5885025cfe0bb4314cdc7499a3da5
--- /dev/null
+++ b/core/old/postprocessor.py
@@ -0,0 +1,106 @@
+import pkg_resources
+from pathlib import Path, PosixPath
+from typing import Union, List, Dict
+from datetime import datetime
+
+from compress_pickle import load, dump
+
+from core.experiment import Experiment
+from core.cells import Cells
+from core.segment import Tiler
+from core.io.matlab import matObject
+from core.experiment import Experiment
+
+
+class PostProcessor:
+    '''
+    Base class to perform feature extraction.
+    :param parameters: Parameters class with channels, reduction functions and
+        extraction functions to use.
+    :param source: Origin of experiment, if int it is assumed from Omero, if str
+        or Path
+    '''
+    def __init__(self, parameters=None, source: Union[int, str, Path] = None):
+        # self.params = parameters
+        if source is not None:
+            if type(source) is int:
+                self.expt_id = source
+                self.load_expt(source, omero=True)
+            elif type(source) is str or PosixPath:
+                self.load_expt(source, omero=False)
+
+    @property
+    def channels(self):
+        if not hasattr(self, '_channels'):
+            if type(self.params.tree) is dict:
+                self._channels = tuple(self.params.tree.keys())
+
+        return self._channels
+
+    @property
+    def current_position(self):
+        assert hasattr(self, 'expt'), 'No experiment loaded.'
+
+        if hasattr(self, 'tiler'):
+            assert self.expt.current_position.name == self.tiler.current_position
+
+        return self.expt.current_position
+
+    def load_expt(self, source: Union[int, str], omero: bool = False) -> None:
+        if omero:
+            self.expt = Experiment.from_source(
+                self.expt_id,  #Experiment ID on OMERO
+                'upload',  #OMERO Username
+                '***REMOVED***',  #OMERO Password
+                'islay.bio.ed.ac.uk',  #OMERO host
+                port=4064  #This is default
+            )
+        else:
+            self.expt = Experiment.from_source(source)
+            self.expt_id = self.expt.exptID
+
+    def load_tiler_cells(self) -> None:
+        self.tiler = Tiler(self.expt)
+        self.cells = Cells()
+        self.cells = self.cells.from_source(
+            self.expt.current_position.annotation)
+
+    def get_pos_mo_bud(self):
+        annot = self.expt._get_position_annotation(
+            self.expt.current_position.name)
+        matob = matObject(annot)
+        m = matob["timelapseTrapsOmero"].get("cellMothers", None)
+        if m is not None:
+            ids = np.nonzero(m.todense())
+            d = {(self.expt.current_position.name, i, int(m[i, j])): []
+                 for i, j in zip(*ids)}
+            for i, j, k in zip(*ids, d.keys()):
+                d[k].append((self.expt.current_position.name, i, j + 1))
+        else:
+            print("Pos {} has no mother matrix".format(
+                self.expt.current_position.name))
+            d = {}
+
+        return d
+
+    def get_exp_mo_bud(self):
+        d = {}
+        for pos in self.expt.positions:
+            self.expt.current_position = pos
+            d = {**d, **self.get_pos_mo_bud()}
+
+        self.expt.current_position = self.expt.positions[0]
+
+        return d
+
+    def load_extraction(self, folder=None) -> None:
+        if folder is None:
+            folder = Path(self.expt.name + '/extraction')
+
+        self.extraction = {}
+        for pos in self.expt.positions:
+            try:
+                self.extraction[pos] = load(folder / Path(pos + '.gz'))
+
+            except:
+                print(pos, ' not found')
diff --git a/core/processes/__init__.py b/core/processes/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e5a0d9b4834ec8f46d6e0d1256c6dcaad2e460fe
--- /dev/null
+++ b/core/processes/__init__.py
@@ -0,0 +1 @@
+#!/usr/bin/env python3
diff --git a/core/processes/aggregate.py b/core/processes/aggregate.py
new file mode 100644
index 0000000000000000000000000000000000000000..cfe8e8f03ad5a5cb8ee4c1a58f1dcb837eaedc4f
--- /dev/null
+++ b/core/processes/aggregate.py
@@ -0,0 +1,74 @@
+from itertools import cycle
+
+import numpy as np
+import pandas as pd
+
+from agora.base import ParametersABC, ProcessABC
+
+
+class aggregateParameters(ParametersABC):
+    """
+    Parameters
+    reduction: str to be passed to a dataframe for collapsing across columns
+    """
+
+    def __init__(self, reductions):
+        super().__init__()
+        self.reductions = reductions
+
+    @classmethod
+    def default(cls):
+        return cls.from_dict({"reductions": ["mean", "median", "max"]})
+
+
+class aggregate(ProcessABC):
+    """
+    aggregate multiple datasets
+    """
+
+    def __init__(self, parameters: aggregateParameters):
+        super().__init__(parameters)
+
+    def run(self, signals):
+        names = np.array([signal.index.names for signal in signals])
+        index = signals[0].index
+        for s in signals[0:]:
+            index = index.intersection(s.index)
+
+        tmp_signals = [s.loc[index] for s in signals]
+        for i, s in enumerate(signals):
+            tmp_signals[i].name = s.name
+        signals = tmp_signals
+
+        assert len(signals), "Signals is empty"
+
+        bad_words = {
+            "postprocessing",
+            "extraction",
+            "None",
+            "np_max",
+            "",
+        }
+        get_keywords = lambda df: [
+            ind
+            for item in df.name.split("/")
+            for ind in item.split("/")
+            if ind not in bad_words
+        ]
+        colnames = [
+            "_".join(get_keywords(s) + [red])
+            for s in signals
+            for red in self.parameters.reductions
+        ]
+        concat = pd.concat(
+            [
+                getattr(signal, red)(axis=1)
+                for signal in signals
+                for red in self.parameters.reductions
+            ],
+            names=signals[0].index.names,
+            axis=1,
+        )
+        concat.columns = colnames
+
+        return concat
diff --git a/core/processes/base.py b/core/processes/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..142bd6c3fa2c5ef0fc578d3d09bfa47530482619
--- /dev/null
+++ b/core/processes/base.py
@@ -0,0 +1 @@
+from agora.base import ParametersABC, ProcessABC
\ No newline at end of file
diff --git a/core/processes/births.py b/core/processes/births.py
new file mode 100644
index 0000000000000000000000000000000000000000..e5a0d9b4834ec8f46d6e0d1256c6dcaad2e460fe
--- /dev/null
+++ b/core/processes/births.py
@@ -0,0 +1 @@
+#!/usr/bin/env python3
diff --git a/core/processes/bud_metric.py b/core/processes/bud_metric.py
new file mode 100644
index 0000000000000000000000000000000000000000..518ddf5231c96458763fca35f52762def97e8b73
--- /dev/null
+++ b/core/processes/bud_metric.py
@@ -0,0 +1,65 @@
+import numpy as np
+import pandas as pd
+from agora.base import ParametersABC, ProcessABC
+
+
+class bud_metricParameters(ParametersABC):
+    """
+    Parameters
+    """
+
+    def __init__(self, mode="longest"):
+        super().__init__()
+        self.mode = mode
+
+    @classmethod
+    def default(cls):
+        return cls.from_dict({"mode": "longest"})
+
+
+class bud_metric(ProcessABC):
+    """
+    Obtain the volume of daughter cells
+    if 'longest' assumes a single mother per trap.
+    """
+
+    def __init__(self, parameters: bud_metricParameters):
+        super().__init__(parameters)
+
+    def run(self, signal: pd.DataFrame):
+        if self.parameters.mode is "longest":
+            result = self.get_bud_metric_wrap(signal)
+
+        return result
+
+    @staticmethod
+    def get_bud_metric(signal):
+        mother_id = signal.index[signal.notna().sum(axis=1).argmax()]
+
+        nomother = signal.drop(mother_id)
+
+        starts = nomother.apply(pd.Series.first_valid_index, axis=1).sort_values()
+
+        ranges = [np.arange(i, j) for i, j in zip(starts[:-1], starts[1:])]
+        ranges.append(np.arange(starts.iloc[-1], signal.columns[-1]))
+
+        bud_metric = pd.concat(
+            [signal.loc[i, rng] for i, rng in zip(starts.index, ranges)]
+        )
+        srs = pd.Series(bud_metric, index=signal.columns, name=mother_id)
+
+        return srs
+
+    def get_bud_metric_wrap(self, signals):
+        srs = [
+            self.get_bud_metric(signals.loc[trap])
+            for trap in signals.index.unique(level="trap")
+        ]
+        index = [
+            (trap, mother.name)
+            for trap, mother in zip(signals.index.unique(level="trap"), srs)
+        ]
+
+        concatenated = pd.concat(srs, keys=index, axis=1, sort=True).T.sort_index()
+        concatenated.index.names = ["trap", "cell_label"]
+        return concatenated
diff --git a/core/processes/dsignal.py b/core/processes/dsignal.py
new file mode 100644
index 0000000000000000000000000000000000000000..484ad59944c12cd12005c02b9470b84d8f2f19d7
--- /dev/null
+++ b/core/processes/dsignal.py
@@ -0,0 +1,28 @@
+import pandas as pd
+
+from postprocessor.core.processes.base import ParametersABC, ProcessABC
+
+
+class dsignalParameters(ParametersABC):
+    """
+    :window: Number of timepoints to consider for signal.
+    """
+
+    def __init__(self, window: int):
+        self.window = window
+
+    @classmethod
+    def default(cls):
+        return cls.from_dict({"window": 3})
+
+
+class dsignal(ProcessABC):
+    """
+    Calculate the change in a signal depending on a window
+    """
+
+    def __init__(self, parameters: dsignalParameters):
+        super().__init__(parameters)
+
+    def run(self, signal: pd.DataFrame):
+        return signal.rolling(window=self.parameters.window, axis=1).mean().diff(axis=1)
diff --git a/core/processes/gpsignal.py b/core/processes/gpsignal.py
new file mode 100644
index 0000000000000000000000000000000000000000..f6cdb45fbd1416630382246fb3152c9d99184076
--- /dev/null
+++ b/core/processes/gpsignal.py
@@ -0,0 +1,97 @@
+"""Gaussian process fit of a Signal."""
+import logging
+
+from postprocessor.core.processes.base import ParametersABC, ProcessABC
+import numpy as np
+import pandas as pd
+
+import gaussian_processes.gaussianprocess as gp
+
+def estimate_gr(volume, dt, noruns, bounds, verbose):
+    """
+    Parameters
+    ----------
+
+    volume : pd.Series
+        The volume series of a given cell
+    dt : float
+        The time interval in hours
+    noruns : int
+        The number of runs for optimisation
+    bounds : dict
+        The hyperparameter bounds used for optimisation
+    verbose : bool
+        If True, prints results
+
+    Returns
+    -------
+    """
+    volume = volume.values
+    n = len(volume)
+    idx = np.arange(n)
+    t = idx * dt
+    y = volume[volume > 0]
+    x = t[volume > 0]
+    idx = idx[volume > 0]
+    # Fit the GP
+    mg = gp.maternGP(bounds, x, y)
+    mg.findhyperparameters(noruns=noruns)
+    if verbose:
+        mg.results()  # Prints out the hyperparameters
+    mg.predict(x, derivs=2)  # Saves the predictions to object
+    # Save the predictions to a csv file so they can be reused
+    results = dict(time=mg.x, volume=mg.y, fit_time=mg.xnew, fit_volume=mg.f,
+                   growth_rate=mg.df, d_growth_rate=mg.ddf,
+                   volume_var=mg.fvar, growth_rate_var=mg.dfvar,
+                   d_growth_rate_var=mg.ddfvar)
+    for name, value in results.items():
+        results[name] = np.full((n, ), np.nan)
+        results[name][idx] = value
+    return results
+
+# Give that to a writer: NOTE the writer needs to learn how to write the
+# output of a process that returns multiple signals like this one does.
+
+class gpParameters(ParametersABC):
+    default_dict = dict(dt=5,
+             noruns=5,
+             bounds={0: (-2, 3),
+                     1: (-2, 1),
+                     2: (-4, -1)},
+             verbose=False)
+    def __init__(self, dt, noruns, bounds, verbose):
+        """
+        Parameters
+        ----------
+            dt : float
+                The time step between time points, in minutes
+            noruns : int
+                The number of times the optimisation is tried
+            bounds : dict
+                Hyperparameter bounds for the Matern Kernel
+            verbose : bool
+                Determines whether to print hyperparameter results
+        """
+        self.dt = dt
+        self.noruns = noruns
+        self.bounds = bounds
+        self.verbose = verbose
+
+    @classmethod
+    def default(cls):
+        return cls.from_dict(cls.default_dict)
+
+
+class GPSignal(ProcessABC):
+    """Gaussian process fit of a Signal.
+    """
+    def __init__(self, parameters: gpParameters):
+        super().__init__(parameters)
+
+    def run(self, signal: pd.DataFrame):
+        results = signal.apply(lambda x: estimate_gr(x,
+                                                     **self.parameters.to_dict()),
+                               result_type='expand').T
+        multi_signal = {name: pd.DataFrame(np.vstack(results[name]))
+                        for name in results.columns}
+        return multi_signal
diff --git a/core/processes/merger.py b/core/processes/merger.py
new file mode 100644
index 0000000000000000000000000000000000000000..a79227fac3e9522b6c9ac2a218751c179f1b7477
--- /dev/null
+++ b/core/processes/merger.py
@@ -0,0 +1,60 @@
+from agora.base import ParametersABC, ProcessABC
+from postprocessor.core.functions.tracks import get_joinable
+
+
+class mergerParameters(ParametersABC):
+    """
+    :param tol: float or int threshold of average (prediction error/std) necessary
+        to consider two tracks the same. If float is fraction of first track,
+        if int it is absolute units.
+    :param window: int value of window used for savgol_filter
+    :param degree: int value of polynomial degree passed to savgol_filter
+    """
+
+    def __init__(
+        self,
+        tolerance: float,
+        smooth: bool = False,
+        window: int = 5,
+        degree: int = 3,
+        min_avg_delta: float = 0.9,
+    ):
+
+        self.tolerance = tolerance
+
+        self.smooth = smooth
+
+        self.window = window
+
+        self.degree = degree
+
+        self.min_avg_delta = min_avg_delta
+
+    @classmethod
+    def default(cls):
+        return cls.from_dict(
+            {
+                "smooth": False,
+                "tolerance": 0.2,
+                "window": 5,
+                "degree": 3,
+                "min_avg_delta": 0.9,
+            }
+        )
+
+
+class merger(ProcessABC):
+    """
+    TODO check why it needs to be run a few times to complete the merging
+    """
+
+    def __init__(self, parameters):
+        super().__init__(parameters)
+
+    def run(self, signal):
+        joinable = get_joinable(signal, tol=self.parameters.tolerance)
+        # merged, _ = merge_tracks(signal)  # , min_len=self.window + 1)
+        # indices = (*zip(*merged.index.tolist()),)
+        # names = merged.index.names
+        # return {name: ids for name, ids in zip(names, indices)}
+        return joinable
diff --git a/core/processes/peaks.py b/core/processes/peaks.py
new file mode 100644
index 0000000000000000000000000000000000000000..15ddf950ea1ebb16a6a46177474dbec14489e1f3
--- /dev/null
+++ b/core/processes/peaks.py
@@ -0,0 +1,44 @@
+from scipy.signal import argrelmax, argrelmin
+from postprocessor.core.processes.base import ParametersABC, ProcessABC
+
+
+class PeaksParameters(ParametersABC):
+    """
+    Parameters
+        type : str {minima,  maxima, "all"}. Determines which type of peaks to identify
+        order : int Parameter to pass to scipy.signal.argrelextrema indicating
+            how many points to use for comparison.
+    """
+
+    def __init__(self, type):
+        self.type = type
+
+    @classmethod
+    def default(cls):
+        return cls.from_dict({"type": "minima", "order": 3})
+
+
+class Peaks(ProcessABC):
+    """
+    Identifies a signal sharply dropping.
+    """
+
+    def __init__(self, parameters: PeaksParameters):
+        super().__init__(parameters)
+
+    def run(self, signal: pd.DataFrame):
+        """
+        Returns a boolean dataframe with the same shape as the
+        original signal but with peaks as true values.
+        """
+        peaks_mat = np.zeros_like(signal, dtype=bool)
+
+        comparator = np.less if self.parameters.type is "minima" else np.greater
+        peaks_ids = argrelextrema(new_df, comparator=comparator, order=order)
+        peaks_mat[peak_ids] = True
+
+        return pd.DataFrame(
+            peaks_mat,
+            index=signal.index,
+            columns=signal.columns,
+        )
diff --git a/core/processes/picker.py b/core/processes/picker.py
new file mode 100644
index 0000000000000000000000000000000000000000..a7221e6f8146609a59921e3752826aedac6c3802
--- /dev/null
+++ b/core/processes/picker.py
@@ -0,0 +1,339 @@
+import seaborn as sns
+from matplotlib import pyplot as plt  # TODO DELETE THIS
+
+from typing import Tuple, Union, List
+from abc import ABC, abstractmethod
+
+from itertools import groupby
+
+from utils_find_1st import find_1st, cmp_equal
+import numpy as np
+import pandas as pd
+
+from pcore.cells import CellsHDF
+
+from agora.base import ParametersABC, ProcessABC
+from postprocessor.core.functions.tracks import max_ntps, max_nonstop_ntps
+
+
+class pickerParameters(ParametersABC):
+    def __init__(
+        self,
+        sequence: List[str] = ["lineage", "condition"],
+    ):
+        self.sequence = sequence
+
+    @classmethod
+    def default(cls):
+        return cls.from_dict(
+            {
+                "sequence": [
+                    # ("lineage", "intersection", "families"),
+                    ("condition", "intersection", "any_present", 0.8),
+                    ("condition", "intersection", "growing", 40),
+                    ("condition", "intersection", "present", 8),
+                    ("condition", "intersection", "mother_buds", 5, 0.8),
+                    # ("lineage", "full_families", "intersection"),
+                ],
+            }
+        )
+
+
+class picker(ProcessABC):
+    """
+    :cells: Cell object passed to the constructor
+    :condition: Tuple with condition and associated parameter(s), conditions can be
+    "present", "nonstoply_present" or "quantile".
+    Determines the thersholds or fractions of signals/signals to use.
+    :lineage: str {"mothers", "daughters", "families" (mothers AND daughters), "orphans"}. Mothers/daughters picks cells with those tags, families pick the union of both and orphans the difference between the total and families.
+    """
+
+    def __init__(
+        self,
+        parameters: pickerParameters,
+        cells: CellsHDF,
+    ):
+        super().__init__(parameters=parameters)
+
+        self._cells = cells
+
+    @staticmethod
+    def mother_assign_to_mb_matrix(ma: List[np.array]):
+        # Convert from list of lists to mother_bud sparse matrix
+        ncells = sum([len(t) for t in ma])
+        mb_matrix = np.zeros((ncells, ncells), dtype=bool)
+        c = 0
+        for cells in ma:
+            for d, m in enumerate(cells):
+                if m:
+                    mb_matrix[c + d, c + m - 1] = True
+
+            c += len(cells)
+
+        return mb_matrix
+
+    @staticmethod
+    def mother_assign_from_dynamic(ma, label, trap, ntraps: int):
+        """
+        Interpolate the list of lists containing the associated mothers from the mother_assign_dynamic feature
+        """
+        idlist = list(zip(trap, label))
+        cell_gid = np.unique(idlist, axis=0)
+
+        last_lin_preds = [
+            find_1st(((label[::-1] == lbl) & (trap[::-1] == tr)), True, cmp_equal)
+            for tr, lbl in cell_gid
+        ]
+        mother_assign_sorted = ma[last_lin_preds]
+
+        traps = cell_gid[:, 0]
+        iterator = groupby(zip(traps, mother_assign_sorted), lambda x: x[0])
+        d = {key: [x[1] for x in group] for key, group in iterator}
+        nested_massign = [d.get(i, []) for i in range(ntraps)]
+
+        return nested_massign
+
+    def pick_by_lineage(self, signals, how):
+
+        idx = signals.index
+
+        if how:
+            mothers = set(self.mothers)
+            daughters = set(self.daughters)
+            # daughters, mothers = np.where(mother_bud_mat)
+
+            search = lambda a, b: np.where(
+                np.in1d(
+                    np.ravel_multi_index(np.array(a).T, np.array(a).max(0) + 1),
+                    np.ravel_multi_index(np.array(b).T, np.array(a).max(0) + 1),
+                )
+            )
+            if how == "mothers":
+                idx = mothers
+            elif how == "daughters":
+                idx = daughters
+            elif how == "daughters_w_mothers":
+                present_mothers = idx.intersection(mothers)
+                idx = set(
+                    [
+                        tuple(x)
+                        for m in present_mothers
+                        for x in np.array(self.daughters)[search(self.mothers, m)]
+                    ]
+                )
+
+                print("associated daughters: ", idx)
+            elif how == "mothers_w_daughters":
+                present_daughters = idx.intersection(daughters)
+                idx = set(
+                    [
+                        tuple(x)
+                        for d in present_daughters
+                        for x in np.array(self.mothers)[search(self.daughters, d)]
+                    ]
+                )
+            elif how == "full_families":
+                present_mothers = idx.intersection(mothers)
+                dwm_idx = set(
+                    [
+                        tuple(x)
+                        for m in present_mothers
+                        for x in np.array(self.daughters)[
+                            search(np.array(self.mothers), m)
+                        ]
+                    ]
+                )
+                present_daughters = idx.intersection(daughters)
+                mwd_idx = set(
+                    [
+                        tuple(x)
+                        for d in present_daughters
+                        for x in np.array(self.mothers)[
+                            search(np.array(self.daughters), d)
+                        ]
+                    ]
+                )
+                idx = mwd_idx.union(dwm_idx)
+            elif how == "families" or how == "orphans":
+                families = mothers.union(daughters)
+                if how == "families":
+                    idx = families
+                elif how == "orphans":
+                    idx = idx.diference(families)
+
+            idx = idx.intersection(signals.index)
+
+        return idx
+
+    def pick_by_condition(self, signals, condition, thresh):
+        idx = self.switch_case(signals, condition, thresh)
+        return idx
+
+    def get_mothers_daughters(self):
+        ma = self._cells["mother_assign_dynamic"]
+        trap = self._cells["trap"]
+        label = self._cells["cell_label"]
+        nested_massign = self.mother_assign_from_dynamic(
+            ma, label, trap, self._cells.ntraps
+        )
+        # mother_bud_mat = self.mother_assign_to_mb_matrix(nested_massign)
+
+        idx = set(
+            [
+                (tid, i + 1)
+                for tid, x in enumerate(nested_massign)
+                for i in range(len(x))
+            ]
+        )
+        mothers, daughters = zip(
+            *[
+                ((tid, m), (tid, d))
+                for tid, trapcells in enumerate(nested_massign)
+                for d, m in enumerate(trapcells, 1)
+                if m
+            ]
+        )
+        return mothers, daughters
+
+    def run(self, signals):
+        indices = set(signals.index)
+        self.mothers, self.daughters = self.get_mothers_daughters()
+        for alg, op, *params in self.sequence:
+            if alg is "lineage":
+                param1 = params[0]
+                new_indices = getattr(self, "pick_by_" + alg)(
+                    signals.loc[list(indices)], param1
+                )
+            else:
+                param1, *param2 = params
+                new_indices = getattr(self, "pick_by_" + alg)(
+                    signals.loc[list(indices)], param1, param2
+                )
+
+            if op is "union":
+                # new_indices = new_indices.intersection(set(signals.index))
+                new_indices = indices.union(new_indices)
+
+            indices = indices.intersection(new_indices)
+
+        return np.array(list(indices))
+
+    @staticmethod
+    def switch_case(
+        signals: pd.DataFrame,
+        condition: str,
+        threshold: Union[float, int, list],
+    ):
+        if len(threshold) == 1:
+            threshold = [_as_int(*threshold, signals.shape[1])]
+        case_mgr = {
+            "any_present": lambda s, thresh: any_present(s, thresh),
+            "present": lambda s, thresh: s.notna().sum(axis=1) > thresh,
+            "nonstoply_present": lambda s, thresh: s.apply(thresh, axis=1) > thresh,
+            "growing": lambda s, thresh: s.diff(axis=1).sum(axis=1) > thresh,
+            "mother_buds": lambda s, p1, p2: mother_buds_wrap(s, p1, p2)
+            # "quantile": [np.quantile(signals.values[signals.notna()], threshold)],
+        }
+        return set(signals.index[case_mgr[condition](signals, *threshold)])
+
+
+def any_present(signals, threshold):
+    """
+    Returns a mask for cells, True if there is a cell in that trap that was present for more than :threshold: timepoints.
+    """
+    any_present = pd.Series(
+        np.sum(
+            [
+                np.isin([x[0] for x in signals.index], i) & v
+                for i, v in (signals.notna().sum(axis=1) > threshold)
+                .groupby("trap")
+                .any()
+                .items()
+            ],
+            axis=0,
+        ).astype(bool),
+        index=signals.index,
+    )
+    return any_present
+
+
+from copy import copy
+
+
+def mother_buds(df, min_budgrowth_t, min_mobud_ratio):
+    """
+    Parameters
+    ----------
+    signals : pd.DataFrame
+    min_budgrowth_t: Minimal number of timepoints we lock reassignment after assigning bud
+    min_initial_size: Minimal mother-bud ratio at the assignment
+    #TODO incorporate bud-assignment data?
+
+    # If more than one bud start in the same time point pick the smallest one
+    """
+
+    ntps = df.notna().sum(axis=1)
+    mother_id = df.index[ntps.argmax()]
+    nomother = df.drop(mother_id)
+    if not len(nomother):
+        return []
+    nomother = (  # Clean short-lived cells outside our mother cell's timepoints
+        nomother.loc[
+            nomother.apply(
+                lambda x: x.first_valid_index() >= df.loc[mother_id].first_valid_index()
+                and x.last_valid_index() <= df.loc[mother_id].last_valid_index(),
+                axis=1,
+            )
+        ]
+    )
+
+    start = nomother.apply(pd.Series.first_valid_index, axis=1)
+
+    # clean duplicates
+    duplicates = start.duplicated(False)
+    if duplicates.any():
+        dup_tps = np.unique(start[duplicates])
+        idx, tps = zip(
+            *[(nomother.loc[start == tp, tp].idxmin(), tp) for tp in dup_tps]
+        )
+        start = start[~duplicates]
+        start = pd.concat(
+            (start, pd.Series(tps, index=idx, dtype="int", name="cell_label"))
+        )
+        nomother = nomother.loc[start.index]
+        nomother.index = nomother.index.astype("int")
+
+    d_to_mother = nomother[start] - df.loc[mother_id, start] * min_mobud_ratio
+    size_filter = d_to_mother[
+        d_to_mother.apply(lambda x: x.dropna().iloc[0], axis=1) < 0
+    ]
+    cols_sorted = (
+        size_filter.sort_index(axis=1)
+        .apply(pd.Series.first_valid_index, axis=1)
+        .sort_values()
+    )
+    if not len(cols_sorted):
+        return []
+    bud_candidates = cols_sorted.loc[
+        [True, *(np.diff(cols_sorted.values) > min_budgrowth_t)]
+    ]
+
+    return [mother_id] + [int(i) for i in bud_candidates.index.tolist()]
+
+
+def mother_buds_wrap(signals, *args):
+    ids = []
+    for trap in signals.index.unique(level="trap"):
+        df = signals.loc[trap]
+        selected_ids = mother_buds(df, *args)
+        ids += [(trap, i) for i in selected_ids]
+
+    idx_srs = pd.Series(False, signals.index).astype(bool)
+    idx_srs.loc[ids] = True
+    return idx_srs
+
+
+def _as_int(threshold: Union[float, int], ntps: int):
+    if type(threshold) is float:
+        threshold = ntps * threshold
+    return threshold
diff --git a/core/processes/savgol.py b/core/processes/savgol.py
new file mode 100644
index 0000000000000000000000000000000000000000..d0256736bb83a26e253302017babe122545d8260
--- /dev/null
+++ b/core/processes/savgol.py
@@ -0,0 +1,152 @@
+import numpy as np
+import pandas as pd
+
+from postprocessor.core.processes.base import ParametersABC, ProcessABC
+
+
+class savgolParameters(ParametersABC):
+    """
+    Parameters
+
+        window : int (odd)
+            Window length of datapoints. Must be odd and smaller than x
+        polynom : int
+            The order of polynom used. Must be smaller than the window size
+    """
+
+    def __init__(self, window, polynom):
+        self.window = window
+        self.polynom = polynom
+
+    @classmethod
+    def default(cls):
+        return cls.from_dict({"window": 3, "polynom": 2})
+
+
+class savgol(ProcessABC):
+    """
+    Apply Savitzky-Golay filter (works with NaNs, but it might return
+    NaN regions).
+    """
+
+    def __init__(self, parameters: savgolParameters):
+        super().__init__(parameters)
+
+    def run(self, signal: pd.DataFrame):
+        savgol_on_srs = lambda x: self.non_uniform_savgol(
+            x.index, x.values, self.parameters.window, self.parameters.polynom
+        )
+        return signal.apply(savgol_on_srs, 1)
+
+    @staticmethod
+    def non_uniform_savgol(x, y, window: int, polynom: int):
+        """
+        Applies a Savitzky-Golay filter to y with non-uniform spacing
+        as defined in x
+
+        This is based on https://dsp.stackexchange.com/questions/1676/savitzky-golay-smoothing-filter-for-not-equally-spaced-data
+        The borders are interpolated like scipy.signal.savgol_filter would do
+
+        source: https://dsp.stackexchange.com/a/64313
+
+        Parameters
+        ----------
+        x : array_like
+            List of floats representing the x values of the data
+        y : array_like
+            List of floats representing the y values. Must have same length
+            as x
+        window : int (odd)
+            Window length of datapoints. Must be odd and smaller than x
+        polynom : int
+            The order of polynom used. Must be smaller than the window size
+
+        Returns
+        -------
+        np.array of float
+            The smoothed y values
+        """
+        if len(x) != len(y):
+            raise ValueError('"x" and "y" must be of the same size')
+
+        if len(x) < window:
+            raise ValueError("The data size must be larger than the window size")
+
+        if type(window) is not int:
+            raise TypeError('"window" must be an integer')
+
+        if window % 2 == 0:
+            raise ValueError('The "window" must be an odd integer')
+
+        if type(polynom) is not int:
+            raise TypeError('"polynom" must be an integer')
+
+        if polynom >= window:
+            raise ValueError('"polynom" must be less than "window"')
+
+        half_window = window // 2
+        polynom += 1
+
+        # Initialize variables
+        A = np.empty((window, polynom))  # Matrix
+        tA = np.empty((polynom, window))  # Transposed matrix
+        t = np.empty(window)  # Local x variables
+        y_smoothed = np.full(len(y), np.nan)
+
+        # Start smoothing
+        for i in range(half_window, len(x) - half_window, 1):
+            # Center a window of x values on x[i]
+            for j in range(0, window, 1):
+                t[j] = x[i + j - half_window] - x[i]
+
+            # Create the initial matrix A and its transposed form tA
+            for j in range(0, window, 1):
+                r = 1.0
+                for k in range(0, polynom, 1):
+                    A[j, k] = r
+                    tA[k, j] = r
+                    r *= t[j]
+
+            # Multiply the two matrices
+            tAA = np.matmul(tA, A)
+
+            # Invert the product of the matrices
+            tAA = np.linalg.inv(tAA)
+
+            # Calculate the pseudoinverse of the design matrix
+            coeffs = np.matmul(tAA, tA)
+
+            # Calculate c0 which is also the y value for y[i]
+            y_smoothed[i] = 0
+            for j in range(0, window, 1):
+                y_smoothed[i] += coeffs[0, j] * y[i + j - half_window]
+
+            # If at the end or beginning, store all coefficients for the polynom
+            if i == half_window:
+                first_coeffs = np.zeros(polynom)
+                for j in range(0, window, 1):
+                    for k in range(polynom):
+                        first_coeffs[k] += coeffs[k, j] * y[j]
+            elif i == len(x) - half_window - 1:
+                last_coeffs = np.zeros(polynom)
+                for j in range(0, window, 1):
+                    for k in range(polynom):
+                        last_coeffs[k] += coeffs[k, j] * y[len(y) - window + j]
+
+        # Interpolate the result at the left border
+        for i in range(0, half_window, 1):
+            y_smoothed[i] = 0
+            x_i = 1
+            for j in range(0, polynom, 1):
+                y_smoothed[i] += first_coeffs[j] * x_i
+                x_i *= x[i] - x[half_window]
+
+        # Interpolate the result at the right border
+        for i in range(len(x) - half_window, len(x), 1):
+            y_smoothed[i] = 0
+            x_i = 1
+            for j in range(0, polynom, 1):
+                y_smoothed[i] += last_coeffs[j] * x_i
+                x_i *= x[i] - x[-half_window - 1]
+
+        return y_smoothed
diff --git a/core/processes/template.py b/core/processes/template.py
new file mode 100644
index 0000000000000000000000000000000000000000..6fad758f7be89e37d40d3ba15e7d6674e25ba56b
--- /dev/null
+++ b/core/processes/template.py
@@ -0,0 +1,31 @@
+import numpy as np
+import pandas as pd
+
+from agora.base import ParametersABC, ProcessABC
+
+
+class TemplateParameters(ParametersABC):
+    """
+    Parameters
+    """
+
+    def __init__(
+        self,
+    ):
+        super().__init__()
+
+    @classmethod
+    def default(cls):
+        return cls.from_dict({})
+
+
+class Template(ProcessABC):
+    """
+    Template for process class.
+    """
+
+    def __init__(self, parameters: TemplateParameters):
+        super().__init__(parameters)
+
+    def run(self):
+        pass
diff --git a/core/processor.py b/core/processor.py
new file mode 100644
index 0000000000000000000000000000000000000000..81c54b6f19f60df1316223df87ad8dbb883c1ef3
--- /dev/null
+++ b/core/processor.py
@@ -0,0 +1,316 @@
+import h5py
+from typing import List, Dict, Union
+from pydoc import locate
+
+import numpy as np
+import pandas as pd
+
+from tqdm import tqdm
+
+from agora.base import ParametersABC
+from pcore.io.writer import Writer
+from pcore.io.signal import Signal
+
+from pcore.cells import Cells
+from postprocessor.core.processes.merger import mergerParameters, merger
+from postprocessor.core.processes.picker import pickerParameters, picker
+
+
+class PostProcessorParameters(ParametersABC):
+    """
+    Anthology of parameters used for postprocessing
+    :merger:
+    :picker: parameters for picker
+    :processes: Dict processes:[objectives], 'processes' are defined in ./processes/
+        while objectives are relative or absolute paths to datasets. If relative paths the
+        post-processed addresses are used.
+
+    """
+
+    def __init__(
+        self,
+        targets={},
+        parameters={},
+        outpaths={},
+    ):
+        self.targets: Dict = targets
+        self.parameters: Dict = parameters
+        self.outpaths: Dict = outpaths
+
+    def __getitem__(self, item):
+        return getattr(self, item)
+
+    @classmethod
+    def default(cls, kind=None):
+        if kind == "defaults" or kind == None:
+            return cls(
+                targets={
+                    "prepost": {
+                        "merger": "/extraction/general/None/area",
+                        "picker": ["/extraction/general/None/area"],
+                    },
+                    "processes": (
+                        (
+                            "bud_metric",
+                            [
+                                "/extraction/general/None/volume",
+                                "/extraction/em_ratio/np_max/mean",
+                                "/extraction/em_ratio/np_max/median",
+                            ],
+                        ),
+                        (
+                            "dsignal",
+                            [
+                                "/extraction/general/None/volume",
+                                "/extraction/em_ratio/np_max/mean",
+                                "/extraction/em_ratio/np_max/median",
+                                "/extraction/em_ratio_bgsub/np_max/mean",
+                                "/extraction/em_ratio_bgsub/np_max/median",
+                                "/postprocessing/bud_metric/extraction_general_None_volume",
+                                "/postprocessing/bud_metric/extraction_em_ratio_np_max_mean",
+                                "/postprocessing/bud_metric/extraction_em_ratio_np_max_median",
+                            ],
+                        ),
+                        (
+                            "aggregate",
+                            [
+                                [
+                                    "/extraction/general/None/volume",
+                                    "/extraction/em_ratio/np_max/mean",
+                                    "/extraction/em_ratio/np_max/median",
+                                    "/extraction/em_ratio_bgsub/np_max/mean",
+                                    "/extraction/em_ratio_bgsub/np_max/median",
+                                    "/extraction/gsum/np_max/median",
+                                    "/extraction/gsum/np_max/mean",
+                                    "postprocessing/bud_metric/extraction_general_None_volume",
+                                    "postprocessing/bud_metric/extraction_em_ratio_np_max_mean",
+                                    "postprocessing/bud_metric/extraction_em_ratio_np_max_median",
+                                    "postprocessing/dsignal/extraction_general_None_volume",
+                                    "postprocessing/dsignal/postprocessing_bud_metric_extraction_general_None_volume",
+                                    "postprocessing/dsignal/postprocessing_bud_metric_extraction_em_ratio_np_max_median",
+                                    "postprocessing/dsignal/postprocessing_bud_metric_extraction_em_ratio_np_max_mean",
+                                ]
+                            ],
+                        ),
+                        # "savgol": ["/extraction/general/None/area"],
+                    ),
+                },
+                parameters={
+                    "prepost": {
+                        "merger": mergerParameters.default(),
+                        "picker": pickerParameters.default(),
+                    }
+                },
+                outpaths={"aggregate": "/postprocessing/experiment_wide/aggregated/"},
+            )
+
+    def to_dict(self):
+        return {k: _if_dict(v) for k, v in self.__dict__.items()}
+
+
+class PostProcessor:
+    def __init__(self, filename, parameters):
+        self.parameters = parameters
+        self._filename = filename
+        self._signal = Signal(filename)
+        self._writer = Writer(filename)
+
+        # self.outpaths = parameters["outpaths"]
+        self.merger = merger(parameters["parameters"]["prepost"]["merger"])
+
+        self.picker = picker(
+            parameters=parameters["parameters"]["prepost"]["picker"],
+            cells=Cells.from_source(filename),
+        )
+        self.classfun = {
+            process: self.get_process(process)
+            for process, _ in parameters["targets"]["processes"]
+        }
+        self.parameters_classfun = {
+            process: self.get_parameters(process)
+            for process, _ in parameters["targets"]["processes"]
+        }
+        self.targets = parameters["targets"]
+
+    @staticmethod
+    def get_process(process):
+        """
+        Dynamically import a process class from the 'processes' folder.
+        Assumes process filename and class name are the same
+        """
+        return locate("postprocessor.core.processes." + process + "." + process)
+
+    @staticmethod
+    def get_parameters(process):
+        """
+        Dynamically import a process class from the 'processes' folder.
+        Assumes process filename and class name are the same
+        """
+        return locate(
+            "postprocessor.core.processes." + process + "." + process + "Parameters"
+        )
+
+    def run_prepost(self):
+        """Important processes run before normal post-processing ones"""
+
+        merge_events = self.merger.run(self._signal[self.targets["prepost"]["merger"]])
+
+        with h5py.File(self._filename, "r") as f:
+            prev_idchanges = self._signal.get_merges()
+
+        changes_history = list(prev_idchanges) + [np.array(x) for x in merge_events]
+        self._writer.write("modifiers/merges", data=changes_history)
+
+        with h5py.File(self._filename, "a") as f:  # TODO Remove this once done tweaking
+            if "modifiers/picks" in f:
+                del f["modifiers/picks"]
+
+        indices = self.picker.run(self._signal[self.targets["prepost"]["picker"][0]])
+        from collections import Counter
+
+        mothers, daughters = np.array(self.picker.mothers), np.array(
+            self.picker.daughters
+        )
+        self.tmp = [y for y in Counter([tuple(x) for x in mothers]).items() if y[1] > 2]
+        self._writer.write(
+            "postprocessing/lineage",
+            data=pd.MultiIndex.from_arrays(
+                np.append(mothers, daughters[:, 1].reshape(-1, 1), axis=1).T,
+                names=["trap", "mother_label", "daughter_label"],
+            ),
+            overwrite="overwrite",
+        )
+
+        # apply merge to mother-daughter
+        moset = set([tuple(x) for x in mothers])
+        daset = set([tuple(x) for x in daughters])
+        picked_set = set([tuple(x) for x in indices])
+        with h5py.File(self._filename, "a") as f:
+            merge_events = f["modifiers/merges"][()]
+        merged_moda = set([tuple(x) for x in merge_events[:, 0, :]]).intersection(
+            set([*moset, *daset, *picked_set])
+        )
+        search = lambda a, b: np.where(
+            np.in1d(
+                np.ravel_multi_index(a.T, a.max(0) + 1),
+                np.ravel_multi_index(b.T, a.max(0) + 1),
+            )
+        )
+
+        for target, source in merge_events:
+            if (
+                tuple(source) in moset
+            ):  # update mother to lowest positive index among the two
+                mother_ids = search(mothers, source)
+                mothers[mother_ids] = (
+                    target[0],
+                    self.pick_mother(mothers[mother_ids][0][1], target[1]),
+                )
+            if tuple(source) in daset:
+                daughters[search(daughters, source)] = target
+            if tuple(source) in picked_set:
+                indices[search(indices, source)] = target
+
+        self._writer.write(
+            "postprocessing/lineage_merged",
+            data=pd.MultiIndex.from_arrays(
+                np.append(mothers, daughters[:, 1].reshape(-1, 1), axis=1).T,
+                names=["trap", "mother_label", "daughter_label"],
+            ),
+            overwrite="overwrite",
+        )
+
+        self._writer.write(
+            "modifiers/picks",
+            data=pd.MultiIndex.from_arrays(
+                indices.T,
+                names=["trap", "cell_label"],
+            ),
+            overwrite="overwrite",
+        )
+
+    @staticmethod
+    def pick_mother(a, b):
+        """Update the mother id following this priorities:
+
+        The mother has a lower id
+        """
+        x = max(a, b)
+        if min([a, b]):
+            x = [a, b][np.argmin([a, b])]
+        return x
+
+    def run(self):
+        self.run_prepost()
+
+        for process, datasets in tqdm(self.targets["processes"]):
+            if process in self.parameters["parameters"].get(
+                "processes", {}
+            ):  # If we assigned parameters
+                parameters = self.parameters_classfun[process](self.parameters[process])
+
+            else:
+                parameters = self.parameters_classfun[process].default()
+
+            loaded_process = self.classfun[process](parameters)
+            for dataset in datasets:
+                # print("Processing", process, "for", dataset)
+
+                if isinstance(dataset, list):  # multisignal process
+                    signal = [self._signal[d] for d in dataset]
+                elif isinstance(dataset, str):
+                    signal = self._signal[dataset]
+                else:
+                    raise ("Incorrect dataset")
+
+                result = loaded_process.run(signal)
+
+                if process in self.parameters.to_dict()["outpaths"]:
+                    outpath = self.parameters.to_dict()["outpaths"][process]
+                elif isinstance(dataset, list):
+                    # If no outpath defined, place the result in the minimum common
+                    # branch of all signals used
+                    prefix = "".join(
+                        prefix + c[0]
+                        for c in takewhile(
+                            lambda x: all(x[0] == y for y in x), zip(*dataset)
+                        )
+                    )
+                    outpath = (
+                        prefix
+                        + "_".join(  # TODO check that it always finishes in '/'
+                            [d[len(prefix) :].replace("/", "_") for d in dataset]
+                        )
+                    )
+                elif isinstance(dataset, str):
+                    outpath = dataset[1:].replace("/", "_")
+                else:
+                    raise ("Outpath not defined", type(dataset))
+
+                if process not in self.parameters.to_dict()["outpaths"]:
+                    outpath = "/postprocessing/" + process + "/" + outpath
+
+                if isinstance(result, dict):  # Multiple Signals as output
+                    for k, v in result:
+                        self.write_result(
+                            outpath + f"/{k}",
+                            v,
+                            metadata={},
+                        )
+                else:
+                    self.write_result(
+                        outpath,
+                        result,
+                        metadata={},
+                    )
+
+    def write_result(
+        self, path: str, result: Union[List, pd.DataFrame, np.ndarray], metadata: Dict
+    ):
+        self._writer.write(path, result, meta=metadata)
+
+
+def _if_dict(item):
+    if hasattr(item, "to_dict"):
+        item = item.to_dict()
+    return item
diff --git a/examples/basic_processes.py b/examples/basic_processes.py
new file mode 100644
index 0000000000000000000000000000000000000000..c322fd4b68b2dd4f310a44bcd2a4ccbebb366f48
--- /dev/null
+++ b/examples/basic_processes.py
@@ -0,0 +1,15 @@
+from postprocessor.core.processor import PostProcessor, PostProcessorParameters
+
+params = PostProcessorParameters.default()
+pp = PostProcessor(
+    "/shared_libs/pipeline-core/scripts/pH_calibration_dual_phl__ura8__by4741__01/ph_5_29_025store.h5",
+    params,
+)
+tmp = pp.run()
+
+import h5py
+
+# f = h5py.File(
+#     "/shared_libs/pipeline-core/scripts/pH_calibration_dual_phl__ura8__by4741__01/ph_5_29_025store.h5",
+#     "a",
+# )
diff --git a/examples/group.py b/examples/group.py
new file mode 100644
index 0000000000000000000000000000000000000000..1e59978acca5cd34c7796cb65d93d95d1657773c
--- /dev/null
+++ b/examples/group.py
@@ -0,0 +1,21 @@
+from pathlib import Path
+
+from postprocessor.core.group import GroupParameters, Group
+
+poses = [
+    x.name.split("store")[0]
+    for x in Path(
+        "/shared_libs/pipeline-core/scripts/data/ph_calibration_dual_phl_ura8_5_04_5_83_7_69_7_13_6_59__01"
+    ).rglob("*")
+    if x.name != "images.h5"
+]
+
+gr = Group(
+    GroupParameters(
+        signals=["/extraction/general/None/area", "/extraction/mCherry/np_max/median"]
+    )
+)
+gr.run(
+    central_store="/shared_libs/pipeline-core/scripts/data/ph_calibration_dual_phl_ura8_5_04_5_83_7_69_7_13_6_59__01",
+    poses=poses,
+)
diff --git a/examples/testing.py b/examples/testing.py
new file mode 100644
index 0000000000000000000000000000000000000000..d5420d8337710e6a41f47a7cec436ed2a765b5f0
--- /dev/null
+++ b/examples/testing.py
@@ -0,0 +1,220 @@
+from typing import Dict, List, Union
+import re
+
+import numpy as np
+import pandas as pd
+from pandas import Series, DataFrame
+from sklearn.cluster import KMeans
+from matplotlib import pyplot as plt
+import seaborn as sns
+
+import compress_pickle
+
+from postprocessor.core.postprocessor import PostProcessor
+from postprocessor.core.tracks import get_avg_grs, non_uniform_savgol
+from postprocessor.core.tracks import clean_tracks, merge_tracks, join_tracks
+from postprocessor.core.ph import *
+
+sns.set_context("talk", font_scale=1.8)
+sns.set_theme(style="whitegrid")
+
+# pp_c = PostProcessor(source=19920)  # 19916
+# pp_c.load_tiler_cells()
+# # f = "/home/alan/Documents/libs/extraction/extraction/examples/gluStarv_2_0_x2_dual_phl_ura8_00/extraction"
+# # f = "/home/alan/Documents/tmp/pH_calibration_dual_phl__ura8__by4741__01/extraction/"
+# f = "/home/alan/Documents/tmp/pH_calibration_dual_phl__ura8__by4741_Alan4_00/extraction/"
+# pp_c.load_extraction(f)
+
+# calib = process_phs(pp_c)
+# # c = calib.loc[(5.4 < calib["ph"]) & (calib["ph"] < 8)].groupby("ph").mean()
+# sns.violinplot(x="ph", y="em_ratio_mean", data=calib)
+# plt.show()
+
+
+# bring timelapse data and convert it to pH
+
+pp = PostProcessor(source=19831)  # 19831
+pp.load_tiler_cells()
+f = "/home/alan/Documents/tmp/gluStarv_2_0_x2_dual_phl_ura8_00/extraction/"
+# f = "/home/alan/Documents/tmp/downUpshift_2_0_2_glu_dual_phluorin__glt1_psa1_ura7__thrice_00/extraction/"
+pp.load_extraction(f)
+
+import compress_pickle
+
+compress_pickle.dump(pp.extraction, "/home/alan/extraction_example.pkl")
+
+if True:  # Load extracted data or pkld version
+    new_dfs = compress_pickle.load("/home/alan/Documents/tmp/new_dfs.gz")
+# Combine dataframes
+else:
+    new_dfs = combine_dfs(get_dfs(pp))
+    # t = [x.index for x in new_dfs.values()]
+    # i = set(t[0])
+    # for j in t:
+    #     i = i.intersection(j)
+    new_dfs = {
+        k: v
+        for k, v in new_dfs.items()
+        if k[2] != "imBackground"
+        and k[2] != "median"
+        and ~(((k[0] == "GFPFast") | (k[0] == "pHluorin405")) and k[2] == "max2p5pc")
+    }
+
+del pp
+compress_pickle.dump(new_dfs, "/home/alan/Documents/tmp/new_dfs.gz")
+
+
+def get_clean_dfs(dfs=None):
+    if dfs is None:
+        clean_dfs = compress_pickle.load("/home/alan/Documents/tmp/clean_dfs.gz")
+    else:
+
+        # Clean timelapse
+        clean = clean_tracks(new_dfs[("general", None, "area")])
+        tra, joint = merge_tracks(clean)
+        clean_dfs = new_dfs
+        i_ids = set(clean.index).intersection(
+            clean_dfs[("general", None, "area")].index
+        )
+        clean_dfs = {k: v.loc[i_ids] for k, v in clean_dfs.items()}
+        clean_dfs = {k: join_tracks(v, joint, drop=True) for k, v in clean_dfs.items()}
+
+        del new_dfs
+        compress_pickle.dump(clean_dfs, "/home/alan/Documents/tmp/clean_dfs.gz")
+
+
+def plot_ph_hmap(clean_dfs):
+    GFPFast = clean_dfs[("GFPFast", np.maximum, "mean")]
+    phluorin = clean_dfs[("pHluorin405", np.maximum, "mean")]
+    ph = GFPFast / phluorin
+    ph = ph.loc[ph.notna().sum(axis=1) > 0.7 * ph.shape[1]]
+    ph = 1 / ph
+
+    fig, ax = plt.subplots()
+    hmap(ph, cbar_kws={"label": r"emission ratio $\propto$ pH"})
+    plt.xlabel("Time (hours)")
+    plt.ylabel("Cells")
+    xticks = plt.xticks(fontsize=15)[0]
+    ax.set(yticklabels=[], xticklabels=[str(round(i * 5 / 60, 1)) for i in xticks])
+    # plt.setp(ax.get_xticklabels(), Rotation=90)
+    plt.show()
+
+
+def fit_calibs(c, h):
+    h = process_phs(pp_c)
+    h["ratio"] = h["GFPFast_bgsub_median"] / h["pHluorin405_bgsub_median"]
+    sns.lineplot(x="ph", y="ratio", data=h, err_style="bars")
+    plt.show()
+
+    calibs = fit_calibration(c)
+    for k, params in calibs.items():
+        i, j = ("_".join(k.split("_")[:-1]), k.split("_")[-1])
+        if j == "mean" and "k2" not in k:
+            clean_dfs[k] = objective(clean_dfs[i, np.maximum, j], *params)
+
+
+# max 2.5% / med
+def plot_ratio_vs_max2p5(h):
+    fig, ax = plt.subplots()
+    sns.regplot(
+        x="em_ratio_median",
+        y="mCherry_max2p5pc",
+        data=h,
+        scatter=False,
+        ax=ax,
+        color="teal",
+    )
+    sns.scatterplot(x="em_ratio_median", y="max5_d_med", data=h, hue="ph", ax=ax)
+    plt.xlabel(r"Fluorescence ratio $R \propto (1/pH)$")
+    plt.ylabel("Max 2.5% px / median")
+    plt.show()
+
+
+em = clean_dfs[("em_ratio", np.maximum, "mean")]
+area = clean_dfs[("general", None, "area")]
+
+
+def get_grs(clean_dfs):
+    area = clean_dfs[("general", None, "area")]
+    area = area.loc[area.notna().sum(axis=1) > 10]
+    return area.apply(growth_rate, axis=1)
+
+
+def get_agg(dfs, rng):
+    # df dict of DataFrames containing an area/vol one TODO generalise this beyond area
+    # rng tuple of section to use
+    grs = get_grs(dfs)
+    smooth = grs.loc(axis=1)[list(range(rng[0], rng[1]))].dropna(how="all")
+
+    aggregate_mean = lambda dfs, rng: pd.concat(
+        {
+            k[0] + "_" + k[2]: dfs[k].loc[smooth.index, rng[0] : rng[1]].mean(axis=1)
+            for k in clean_dfs.keys()
+        },
+        axis=1,
+    )
+    # f_comp_df = comp_df.loc[(comp_df["gr"] > 0) & (area.notna().sum(axis=1) > 50)]
+
+    agg = aggregate_mean(dfs, rng)
+    agg["max2_med"] = agg["mCherry_max2p5pc"] / agg["mCherry_mean"]
+
+    for c in agg.columns:
+        agg[c + "_log"] = np.log(agg[c])
+
+    agg["gr_mean"] = smooth.loc[set(agg.index).intersection(smooth.index)].mean(axis=1)
+    agg["gr_max"] = smooth.loc[set(agg.index).intersection(smooth.index)].max(axis=1)
+
+    return agg
+
+
+def plot_scatter_fit(x, y, data, hue=None, xlabel=None, ylabel=None, ylim=None):
+    fig, ax = plt.subplots()
+    sns.regplot(x=x, y=y, data=data, scatter=False, ax=ax)
+    sns.scatterplot(x=x, y=y, data=data, ax=ax, alpha=0.1, hue=hue)
+    # plt.show()
+    if xlabel is not None:
+        plt.xlabel(xlabel)
+    if ylabel is not None:
+        plt.ylabel(ylabel)
+    if ylim is not None:
+        plt.ylim(ylim)
+
+    fig.savefig(
+        "/home/alan/Documents/sync_docs/drafts/third_year_pres/figs/"
+        + str(len(data))
+        + "_"
+        + x
+        + "_vs_"
+        + y
+        + ".png",
+        dpi=200,
+    )
+
+
+from extraction.core.argo import Argo, annot_from_dset
+
+
+def additional_feats(aggs):
+    aggs["gr_mean_norm"] = aggs["gr_mean"] * 12
+    aggs["log_ratio_r"] = np.log(1 / aggs["em_ratio_mean"])
+    return aggs
+
+
+def compare_methods_ph_calculation(dfs):
+    GFPFast = dfs[("GFPFast", np.maximum, "mean")]
+    phluorin = dfs[("pHluorin405", np.maximum, "mean")]
+    ph = GFPFast / phluorin
+
+    sns.scatterplot(
+        dfs["em_ratio", np.maximum, "mean"].values.flatten(),
+        ph.values.flatten(),
+        alpha=0.1,
+    )
+    plt.xlabel("ratio_median")
+    plt.ylabel("median_ratio")
+    plt.title("Comparison of ph calculation")
+    plt.show()
+
+
+# get the number of changes in a bool list
+nchanges = lambda x: sum([i for i, j in zip(x[:-2], x[1:]) if operator.xor(i, j)])