From 846211e62dda82005195add9b4f8073042a0d3d7 Mon Sep 17 00:00:00 2001
From: Swainlab <peter.swain@ed.ac.uk>
Date: Mon, 3 Jul 2023 11:37:45 +0100
Subject: [PATCH] unfinished mods to bud_metrioc and buddings

---
 .../core/reshapers/bud_metric.py              | 50 ++++++++++++++++++-
 src/postprocessor/core/reshapers/buddings.py  | 11 ++--
 2 files changed, 55 insertions(+), 6 deletions(-)

diff --git a/src/postprocessor/core/reshapers/bud_metric.py b/src/postprocessor/core/reshapers/bud_metric.py
index ee239221..9e9e1f70 100644
--- a/src/postprocessor/core/reshapers/bud_metric.py
+++ b/src/postprocessor/core/reshapers/bud_metric.py
@@ -105,7 +105,7 @@ class BudMetric(LineageProcess):
         return output_df
 
 
-def _combine_daughter_tracks(tracks: pd.DataFrame):
+def _combine_daughter_tracks_old(tracks: pd.DataFrame):
     """
     Combine multiple time series of daughter cells into one time series.
 
@@ -141,3 +141,51 @@ def _combine_daughter_tracks(tracks: pd.DataFrame):
         (combined_tracks == old) | (np.isnan(combined_tracks) & np.isnan(old))
     ).all(), "yikes"
     return pd.Series(combined_tracks, index=tracks.columns)
+
+
+def _combine_daughter_tracks(tracks: pd.DataFrame):
+    """
+    Combine multiple time series of daughter cells into one time series.
+
+    Concatenate daughter values into one time series starting with the first
+    daughter and replacing later values with the values from the next daughter,
+    and so on.
+
+    Parameters
+    ----------
+    tracks: a Signal
+        Data for all daughters, which are distinguished by different cell_labels,
+        for a particular trap and mother_label.
+    """
+    # sort by daughter IDs
+    bud_df = tracks.sort_index(level="cell_label")
+    # remove multi-index
+    no_rows = len(bud_df)
+    bud_df.index = range(no_rows)
+    # find time point of first non-NaN data point of each row
+    init_tps = [
+        bud_df.iloc[irow].first_valid_index() for irow in range(no_rows)
+    ]
+    # sort so that earliest daughter is first
+    sorted_rows = np.argsort(init_tps)
+    init_tps = np.sort(init_tps)
+    # combine data for all daughters
+    combined_tracks = np.nan * np.ones(tracks.columns.size)
+    for j, jrow in enumerate(sorted_rows):
+        # over-write with next earliest daughter
+        combined_tracks[bud_df.columns.get_loc(init_tps[j]) :] = (
+            bud_df.iloc[jrow].loc[init_tps[j] :].values
+        )
+    # ## OLD
+    # # find which row of sorted_df has the daughter for each time point
+    # tp_fvt: pd.Series = bud_df.apply(lambda x: x.first_valid_index(), axis=0)
+    # # combine data for all daughters
+    # old = np.nan * np.ones(tracks.columns.size)
+    # for bud_row in np.unique(tp_fvt.dropna().values).astype(int):
+    #     ilocs = np.where(tp_fvt.values == bud_row)[0]
+    #     old[ilocs] = bud_df.values[bud_row, ilocs]
+    # assert (
+    #     (combined_tracks == old) | (np.isnan(combined_tracks) & np.isnan(old))
+    # ).all(), "yikes"
+    # ###
+    return pd.Series(combined_tracks, index=tracks.columns)
diff --git a/src/postprocessor/core/reshapers/buddings.py b/src/postprocessor/core/reshapers/buddings.py
index ba9fe2fc..90785bce 100644
--- a/src/postprocessor/core/reshapers/buddings.py
+++ b/src/postprocessor/core/reshapers/buddings.py
@@ -37,7 +37,7 @@ class buddings(LineageProcess):
         """
         Generate dataframe of budding events.
 
-        Find daughters for mothers in a Signal for which we have lineage data.
+        Find daughters for those mothers in a Signal with lineage data.
         Create a dataframe indicating the time each daughter first appears.
 
         We use the data from Signal only to find when the daughters appear, by
@@ -66,14 +66,15 @@ class buddings(LineageProcess):
             columns=signal.columns,
         )
         buddings.columns.names = ["timepoint"]
-        # get time of first non-NaN value of signal for every mother using Pandas
+        # get time of first non-NaN value of signal for every cell using Pandas
         fvi = signal.apply(lambda x: x.first_valid_index(), axis=1)
         # fill the budding events
         for trap_mother_id, daughters in traps_mothers.items():
+            trap_daughter_ids = [
+                i for i in product((trap_mother_id[0],), daughters)
+            ]
             times_of_bud_appearance = fvi.loc[
-                fvi.index.intersection(
-                    list(product((trap_mother_id[0],), daughters))
-                )
+                fvi.index.intersection(trap_daughter_ids)
             ].values
             # ignore zeros - ignore buds in first image
             daughters_idx = set(times_of_bud_appearance).difference({0})
-- 
GitLab