From fb7a49e85467dd406ad96bd962ceaafd69243da8 Mon Sep 17 00:00:00 2001
From: pswain <peter.swain@ed.ac.uk>
Date: Thu, 2 Nov 2023 18:01:22 +0000
Subject: [PATCH] feat(babysitter): removed pixel_size; docs(bud_metric)

---
 src/agora/utils/lineage.py                    | 17 ----
 src/aliby/baby_client.py                      |  2 -
 src/aliby/baby_sitter.py                      |  5 +-
 src/aliby/pipeline.py                         |  2 +-
 src/postprocessor/core/lineageprocess.py      |  5 +-
 .../core/reshapers/bud_metric.py              | 98 +++++++++----------
 src/postprocessor/core/reshapers/picker.py    | 12 +--
 7 files changed, 54 insertions(+), 87 deletions(-)
 delete mode 100644 src/agora/utils/lineage.py

diff --git a/src/agora/utils/lineage.py b/src/agora/utils/lineage.py
deleted file mode 100644
index 05c161a5..00000000
--- a/src/agora/utils/lineage.py
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/usr/bin/env python3
-
-import numpy as np
-
-from agora.io.bridge import groupsort
-
-
-def mb_array_to_dict(mb_array: np.ndarray):
-    """
-    Convert a lineage ndarray (trap, mother_id, daughter_id)
-    into a dictionary of lists ( mother_id ->[daughters_ids] )
-    """
-    return {
-        (trap, mo): [(trap, d[0]) for d in daughters]
-        for trap, mo_da in groupsort(mb_array).items()
-        for mo, daughters in groupsort(mo_da).items()
-    }
diff --git a/src/aliby/baby_client.py b/src/aliby/baby_client.py
index 1ced0c76..a94fe6ff 100644
--- a/src/aliby/baby_client.py
+++ b/src/aliby/baby_client.py
@@ -204,7 +204,6 @@ def choose_model_from_params(
     """
     # cameras prime95 has become sCMOS and evolve has EMCCD
     valid_models = list(modelsets().keys())
-    breakpoint()
 
     # Apply modelset filter if specified
     if modelset_filter is not None:
@@ -218,7 +217,6 @@ def choose_model_from_params(
     ]
     params_re = re.compile("^" + "_".join(params) + "$")
     valid_models = list(filter(params_re.search, valid_models))
-    breakpoint()
     # Check that there are valid models
     if len(valid_models) == 0:
         raise KeyError(
diff --git a/src/aliby/baby_sitter.py b/src/aliby/baby_sitter.py
index 386e312b..2bd930ba 100644
--- a/src/aliby/baby_sitter.py
+++ b/src/aliby/baby_sitter.py
@@ -15,15 +15,14 @@ class BabyParameters(ParametersABC):
     def __init__(
         self,
         modelset_name,
-        pixel_size,
         clogging_thresh,
         min_bud_tps,
         isbud_thresh,
         session,
     ):
         """Initialise parameters for BABY."""
+        # pixel_size is specified in BABY's model sets
         self.modelset_name = modelset_name
-        self.pixel_size = pixel_size
         self.clogging_thresh = clogging_thresh
         self.min_bud_tps = min_bud_tps
         self.isbud_thresh = isbud_thresh
@@ -34,7 +33,6 @@ class BabyParameters(ParametersABC):
         """Define default parameters; kwargs choose BABY model set."""
         return cls(
             modelset_name=get_modelset_name_from_params(**kwargs),
-            pixel_size=0.182,
             clogging_thresh=0.75,
             min_bud_tps=3,
             isbud_thresh=0.5,
@@ -84,7 +82,6 @@ class BabyRunner(StepABC):
         else:
             brain = modelsets.get(
                 modelset_name,
-                pixel_size=parameters.pixel_size,
                 clogging_thresh=parameters.clogging_thresh,
                 min_bud_tps=parameters.min_bud_tps,
                 isbud_thresh=parameters.isbud_thresh,
diff --git a/src/aliby/pipeline.py b/src/aliby/pipeline.py
index aa74fdfd..46b786c7 100644
--- a/src/aliby/pipeline.py
+++ b/src/aliby/pipeline.py
@@ -476,7 +476,7 @@ class Pipeline(ProcessABC):
                                     result = pipe["steps"][step].run_tp(
                                         i, **run_kwargs.get(step, {})
                                     )
-                                    # write to h5 file using writers
+                                    # write result to h5 file using writers
                                     # extractor writes to h5 itself
                                     if step in loaded_writers:
                                         loaded_writers[step].write(
diff --git a/src/postprocessor/core/lineageprocess.py b/src/postprocessor/core/lineageprocess.py
index c359df89..90378a47 100644
--- a/src/postprocessor/core/lineageprocess.py
+++ b/src/postprocessor/core/lineageprocess.py
@@ -7,7 +7,6 @@ import numpy as np
 import pandas as pd
 
 from agora.abc import ParametersABC
-from agora.utils.kymograph import get_index_as_np
 from postprocessor.core.abc import PostProcessABC
 
 
@@ -57,10 +56,10 @@ class LineageProcess(PostProcessABC):
         )
 
     def get_lineage_information(self, signal=None, merged=True):
-        """Get lineage as an array with tile IDs, mother labels, and corresponding bud labels."""
+        """Get lineage as an array with tile IDs, mother and bud labels."""
         if signal is not None and "mother_label" in signal.index.names:
             # from kymograph
-            lineage = get_index_as_np(signal)
+            lineage = np.array(signal.index.to_list())
         elif hasattr(self, "lineage"):
             lineage = self.lineage
         elif hasattr(self, "cells"):
diff --git a/src/postprocessor/core/reshapers/bud_metric.py b/src/postprocessor/core/reshapers/bud_metric.py
index 57c01069..eee5c0b4 100644
--- a/src/postprocessor/core/reshapers/bud_metric.py
+++ b/src/postprocessor/core/reshapers/bud_metric.py
@@ -3,11 +3,26 @@ import typing as t
 import numpy as np
 import pandas as pd
 
-from agora.utils.lineage import mb_array_to_dict
+from agora.io.bridge import groupsort
 from postprocessor.core.lineageprocess import (
     LineageProcess,
     LineageProcessParameters,
 )
+import logging
+
+
+def mother_bud_array_to_dict(mb_array: np.ndarray):
+    """
+    Convert a lineage into a dict of lists.
+
+    A lineage is an array (trap, mother_id, daughter_id) and
+    becomes a dictionary of lists (mother_id->[daughters_ids])
+    """
+    return {
+        (trap, mo): [(trap, d[0]) for d in daughters]
+        for trap, mo_da in groupsort(mb_array).items()
+        for mo, daughters in groupsort(mo_da).items()
+    }
 
 
 class BudMetricParameters(LineageProcessParameters):
@@ -18,12 +33,13 @@ class BudMetricParameters(LineageProcessParameters):
 
 class BudMetric(LineageProcess):
     """
-    Requires mother-bud information to create a new dataframe where the
-    indices are mother ids and values are the daughters' values for a
-    given signal.
+    Create a dataframe with indices mother IDs and values from buds.
+
+    Requires mother-bud information.
     """
 
     def __init__(self, parameters: BudMetricParameters):
+        """Initialise using LineageProcess."""
         super().__init__(parameters)
 
     def run(
@@ -31,23 +47,27 @@ class BudMetric(LineageProcess):
         signal: pd.DataFrame,
         lineage: t.Dict[pd.Index, t.Tuple[pd.Index]] = None,
     ):
+        """Calculate a metric for all buds."""
         if lineage is None:
             # define lineage
             if hasattr(self, "lineage"):
                 lineage = self.lineage
             else:
-                # lineage information in the Signal dataframe
+                # lineage information in the Signal data frame
                 assert "mother_label" in signal.index.names
                 lineage = signal.index.to_list()
-        return self.get_bud_metric(signal, mb_array_to_dict(lineage))
+        return self.get_bud_metric(signal, mother_bud_array_to_dict(lineage))
 
     @staticmethod
     def get_bud_metric(
-        signal: pd.DataFrame, md: t.Dict[t.Tuple, t.Tuple[t.Tuple]] = None
+        signal: pd.DataFrame,
+        lineage_dict: t.Dict[t.Tuple, t.Tuple[t.Tuple]] = None,
     ):
         """
-        Generate a dataframe of a Signal for buds indexed by their mothers,
-        concatenating data from all the buds for each mother.
+        Generate a dataframe of a Signal for buds.
+
+        The data frame is indexed by the buds' mothers and concatenates
+        data from all the buds for each mother.
 
         Parameters
         ---------
@@ -62,7 +82,11 @@ class BudMetric(LineageProcess):
         # md_index should only comprise (trap, cell_label)
         if "mother_label" not in md_index.names:
             # dict with daughter indices as keys and mother indices as values
-            bud_dict = {v: k for k, values in md.items() for v in values}
+            bud_dict = {
+                bud: mother
+                for mother, buds in lineage_dict.items()
+                for bud in buds
+            }
             # generate mother_label in Signal using the mother's cell_label
             # cells with no mothers have a mother_label of 0
             signal["mother_label"] = list(
@@ -70,17 +94,21 @@ class BudMetric(LineageProcess):
             )
             signal.set_index("mother_label", append=True, inplace=True)
             # combine mothers and daughter indices
-            mothers_index = md.keys()
-            daughters_index = [y for x in md.values() for y in x]
+            mothers_index = lineage_dict.keys()
+            daughters_index = [
+                bud for buds in lineage_dict.values() for bud in buds
+            ]
             relations = set([*mothers_index, *daughters_index])
-            # keep from md_index only cells that are mother or daughters
+            # keep only cells that are mother or daughters
             md_index = md_index.intersection(relations)
         else:
             md_index = md_index.droplevel("mother_label")
         if len(md_index) < len(signal):
-            print(
-                f"Dropped {len(signal) - len(md_index)} cells before applying bud_metric"
-            )  # TODO log
+            logging.getLogger("aliby").log(
+                logging.WARNING,
+                f"Dropped {len(signal) - len(md_index)} cells before "
+                "applying bud_metric.",
+            )
         # restrict signal to the cells in md_index moving mother_label to do so
         signal = (
             signal.reset_index("mother_label")
@@ -137,41 +165,3 @@ def combine_daughter_tracks(tracks: pd.DataFrame):
             bud_df.iloc[jrow].loc[init_tps[j] :].values
         )
     return pd.Series(combined_tracks, index=tracks.columns)
-
-
-def _combine_daughter_tracks_original(tracks: pd.DataFrame):
-    """
-    Combine multiple time series of daughter cells into one time series.
-
-    At any one time, a mother cell should have only one daughter.
-
-    Two daughters are still sometimes present at the same time point, and we
-    then choose the daughter that appears first.
-
-    TODO We need to fix examples with more than one daughter at a time point.
-
-    Parameters
-    ----------
-    tracks: a Signal
-        Data for all daughters, which are distinguished by different cell_labels,
-        for a particular trap and mother_label.
-    """
-    # sort by daughter IDs
-    bud_df = tracks.sort_index(level="cell_label")
-    # remove multi-index
-    bud_df.index = range(len(bud_df))
-    # find which row of sorted_df has the daughter for each time point
-    tp_fvt: pd.Series = bud_df.apply(lambda x: x.first_valid_index(), axis=0)
-    # combine data for all daughters
-    combined_tracks = np.nan * np.ones(tracks.columns.size)
-    for bud_row in np.unique(tp_fvt.dropna().values).astype(int):
-        ilocs = np.where(tp_fvt.values == bud_row)[0]
-        combined_tracks[ilocs] = bud_df.values[bud_row, ilocs]
-    # TODO delete old version
-    tp_fvt = bud_df.columns.get_indexer(tp_fvt)
-    tp_fvt[tp_fvt == -1] = len(bud_df) - 1
-    old = np.choose(tp_fvt, bud_df.values)
-    assert (
-        (combined_tracks == old) | (np.isnan(combined_tracks) & np.isnan(old))
-    ).all(), "yikes"
-    return pd.Series(combined_tracks, index=tracks.columns)
diff --git a/src/postprocessor/core/reshapers/picker.py b/src/postprocessor/core/reshapers/picker.py
index 14d76446..e283386e 100644
--- a/src/postprocessor/core/reshapers/picker.py
+++ b/src/postprocessor/core/reshapers/picker.py
@@ -24,9 +24,9 @@ class PickerParameters(ParametersABC):
     """
 
     _defaults = {
-        "sequence": [
+        "picker_sequence": [
             ["lineage", "families"],
-            ["condition", "present", 7],
+            ["condition", "present", 3],
         ],
     }
 
@@ -80,9 +80,9 @@ class Picker(LineageProcess):
         if len(lineage):
             self.mothers = lineage[:, [0, 1]]
             self.daughters = lineage[:, [0, 2]]
-            for alg, *params in self.sequence:
+            for method, *params in self.picker_sequence:
                 if indices:
-                    if alg == "lineage":
+                    if method == "lineage":
                         # pick by lineage
                         param1 = params[0]
                         new_indices = self.pick_by_lineage(
@@ -113,7 +113,7 @@ class Picker(LineageProcess):
     ):
         """Pick indices from signal by any_present, present, and growing."""
         if len(threshold) == 1:
-            threshold = [_as_int(*threshold, signal.shape[1])]
+            threshold = [as_int(*threshold, signal.shape[1])]
             #: is this correct for "growing"?
         case_mgr = {
             "any_present": lambda s, threshold: any_present(s, threshold),
@@ -127,7 +127,7 @@ class Picker(LineageProcess):
         return new_indices
 
 
-def _as_int(threshold: t.Union[float, int], ntps: int):
+def as_int(threshold: t.Union[float, int], ntps: int):
     """Convert a fraction of the total experiment duration into a number of time points."""
     if type(threshold) is float:
         threshold = ntps * threshold
-- 
GitLab