From 4f302a13ee818196ebd03871484a29cef37dfa99 Mon Sep 17 00:00:00 2001
From: Swainlab <peter.swain@ed.ac.uk>
Date: Sun, 2 Jul 2023 11:48:19 +0100
Subject: [PATCH] docs for buddings

---
 src/agora/io/signal.py                       |  4 +-
 src/aliby/pipeline.py                        | 38 +------------
 src/postprocessor/core/processor.py          |  6 +-
 src/postprocessor/core/reshapers/buddings.py | 59 ++++++++++----------
 4 files changed, 38 insertions(+), 69 deletions(-)

diff --git a/src/agora/io/signal.py b/src/agora/io/signal.py
index cc852089..20b6d8ba 100644
--- a/src/agora/io/signal.py
+++ b/src/agora/io/signal.py
@@ -20,7 +20,9 @@ class Signal(BridgeH5):
     """
     Fetch data from h5 files for post-processing.
 
-    Signal assumes that the metadata and data are accessible to perform time-adjustments and apply previously recorded post-processes.
+    Signal assumes that the metadata and data are accessible to
+    perform time-adjustments and apply previously recorded
+    post-processes.
     """
 
     def __init__(self, file: t.Union[str, Path]):
diff --git a/src/aliby/pipeline.py b/src/aliby/pipeline.py
index 6e37bac2..52638883 100644
--- a/src/aliby/pipeline.py
+++ b/src/aliby/pipeline.py
@@ -592,41 +592,6 @@ class Pipeline(ProcessABC):
         )
         return (traps_above_nthresh & traps_above_athresh).mean()
 
-    # FIXME: Remove this functionality. It used to be for
-    # older hdf5 file formats.
-    def _load_config_from_file(
-        self,
-        filename: Path,
-        process_from: t.Dict[str, int],
-        trackers_state: t.List,
-        overwrite: t.Dict[str, bool],
-    ):
-        with h5py.File(filename, "r") as f:
-            for k in process_from.keys():
-                if not overwrite[k]:
-                    process_from[k] = self.legacy_get_last_tp[k](f)
-                    process_from[k] += 1
-        return process_from, trackers_state, overwrite
-
-    # FIXME: Remove this functionality. It used to be for
-    # older hdf5 file formats.
-    @staticmethod
-    def legacy_get_last_tp(step: str) -> t.Callable:
-        """Get last time-point in different ways depending
-        on which step we are using
-
-        To support segmentation in aliby < v0.24
-        TODO Deprecate and replace with State method
-        """
-        switch_case = {
-            "tiler": lambda f: f["trap_info/drifts"].shape[0] - 1,
-            "baby": lambda f: f["cell_info/timepoint"][-1],
-            "extraction": lambda f: f[
-                "extraction/general/None/area/timepoint"
-            ][-1],
-        }
-        return switch_case[step]
-
     def _setup_pipeline(
         self, image_id: int
     ) -> t.Tuple[
@@ -682,7 +647,6 @@ class Pipeline(ProcessABC):
                 step: self.step_sequence.index(ow_id) < i
                 for i, step in enumerate(self.step_sequence, 1)
             }
-
         # set up
         directory = config["general"]["directory"]
         trackers_state: t.List[np.ndarray] = []
@@ -722,7 +686,7 @@ class Pipeline(ProcessABC):
                         )
                         config["tiler"] = steps["tiler"].parameters.to_dict()
                     except Exception:
-                        self._log(f"Overwriting tiling data")
+                        self._log("Overwriting tiling data")
 
             if config["general"]["use_explog"]:
                 meta.run()
diff --git a/src/postprocessor/core/processor.py b/src/postprocessor/core/processor.py
index 76be13cd..ec9bcd04 100644
--- a/src/postprocessor/core/processor.py
+++ b/src/postprocessor/core/processor.py
@@ -139,7 +139,7 @@ class PostProcessor(ProcessABC):
         for k in dicted_params.keys():
             if not isinstance(dicted_params[k], dict):
                 dicted_params[k] = dicted_params[k].to_dict()
-        # merger and picker
+        # initialise merger and picker
         self.merger = Merger(
             MergerParameters.from_dict(dicted_params["merger"])
         )
@@ -147,12 +147,12 @@ class PostProcessor(ProcessABC):
             PickerParameters.from_dict(dicted_params["picker"]),
             cells=Cells.from_source(filename),
         )
-        # processes, such as buddings
+        # get processes, such as buddings
         self.classfun = {
             process: get_process(process)
             for process, _ in parameters["targets"]["processes"]
         }
-        # parameters for the process in classfun
+        # get parameters for the processes in classfun
         self.parameters_classfun = {
             process: get_parameters(process)
             for process, _ in parameters["targets"]["processes"]
diff --git a/src/postprocessor/core/reshapers/buddings.py b/src/postprocessor/core/reshapers/buddings.py
index 4398b947..ba9fe2fc 100644
--- a/src/postprocessor/core/reshapers/buddings.py
+++ b/src/postprocessor/core/reshapers/buddings.py
@@ -13,25 +13,18 @@ from postprocessor.core.lineageprocess import (
 
 
 class buddingsParameters(LineageProcessParameters):
-    """
-    Parameter class to obtain budding events.
-
-    Define the location of lineage information in the h5 file.
-
-    """
+    """Give the location of lineage information in the h5 file."""
 
     _defaults = {"lineage_location": "postprocessing/lineage_merged"}
 
 
 class buddings(LineageProcess):
     """
-    Calculate buddings in a trap assuming one mother per trap.
+    Generate a dataframe of budding events.
 
-    Return a pandas series with the buddings.
+    We assume one mother per trap.
 
-    We define a budding event as when a bud is first identified.
-
-    This bud may not be considered a bud until later in the experiment.
+    A bud may not be considered a bud until later in the experiment.
     """
 
     def __init__(self, parameters: buddingsParameters):
@@ -41,38 +34,48 @@ class buddings(LineageProcess):
     def run(
         self, signal: pd.DataFrame, lineage: np.ndarray = None
     ) -> pd.DataFrame:
-        """TODO."""
+        """
+        Generate dataframe of budding events.
+
+        Find daughters for mothers in a Signal for which we have lineage data.
+        Create a dataframe indicating the time each daughter first appears.
+
+        We use the data from Signal only to find when the daughters appear, by
+        their first non-NaN value.
+        """
+        # lineage is (trap, mother, daughter)
         lineage = lineage or self.lineage
-        # select traps and mother cells in a given signal
+        # select traps and mothers in the signal that have lineage data
         traps_mothers: t.Dict[tuple, list] = {
-            tuple(mo): [] for mo in lineage[:, :2] if tuple(mo) in signal.index
+            tuple(trap_mo): []
+            for trap_mo in lineage[:, :2]
+            if tuple(trap_mo) in signal.index
         }
+        # find daughters for these traps and mothers
         for trap, mother, daughter in lineage:
             if (trap, mother) in traps_mothers.keys():
                 traps_mothers[(trap, mother)].append(daughter)
+        # sub dataframe of signal for the selected mothers
         mothers = signal.loc[
             set(signal.index).intersection(traps_mothers.keys())
         ]
-        # create a new dataframe with dimensions (n_mother_cells * n_timepoints)
+        # a new dataframe with dimensions (n_mother_cells * n_tps)
         buddings = pd.DataFrame(
             np.zeros((mothers.shape[0], signal.shape[1])).astype(bool),
             index=mothers.index,
             columns=signal.columns,
         )
         buddings.columns.names = ["timepoint"]
-        # get time of first appearance for every cell using Pandas
+        # get time of first non-NaN value of signal for every mother using Pandas
         fvi = signal.apply(lambda x: x.first_valid_index(), axis=1)
         # fill the budding events
-        for mother_id, daughters in traps_mothers.items():
-            daughters_idx = set(
-                fvi.loc[
-                    fvi.index.intersection(
-                        list(product((mother_id[0],), daughters))
-                    )
-                ].values
-            ).difference({0})
-            buddings.loc[
-                mother_id,
-                daughters_idx,
-            ] = True
+        for trap_mother_id, daughters in traps_mothers.items():
+            times_of_bud_appearance = fvi.loc[
+                fvi.index.intersection(
+                    list(product((trap_mother_id[0],), daughters))
+                )
+            ].values
+            # ignore zeros - ignore buds in first image
+            daughters_idx = set(times_of_bud_appearance).difference({0})
+            buddings.loc[trap_mother_id, daughters_idx] = True
         return buddings
-- 
GitLab