From fb7a49e85467dd406ad96bd962ceaafd69243da8 Mon Sep 17 00:00:00 2001 From: pswain <peter.swain@ed.ac.uk> Date: Thu, 2 Nov 2023 18:01:22 +0000 Subject: [PATCH] feat(babysitter): removed pixel_size; docs(bud_metric) --- src/agora/utils/lineage.py | 17 ---- src/aliby/baby_client.py | 2 - src/aliby/baby_sitter.py | 5 +- src/aliby/pipeline.py | 2 +- src/postprocessor/core/lineageprocess.py | 5 +- .../core/reshapers/bud_metric.py | 98 +++++++++---------- src/postprocessor/core/reshapers/picker.py | 12 +-- 7 files changed, 54 insertions(+), 87 deletions(-) delete mode 100644 src/agora/utils/lineage.py diff --git a/src/agora/utils/lineage.py b/src/agora/utils/lineage.py deleted file mode 100644 index 05c161a5..00000000 --- a/src/agora/utils/lineage.py +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/env python3 - -import numpy as np - -from agora.io.bridge import groupsort - - -def mb_array_to_dict(mb_array: np.ndarray): - """ - Convert a lineage ndarray (trap, mother_id, daughter_id) - into a dictionary of lists ( mother_id ->[daughters_ids] ) - """ - return { - (trap, mo): [(trap, d[0]) for d in daughters] - for trap, mo_da in groupsort(mb_array).items() - for mo, daughters in groupsort(mo_da).items() - } diff --git a/src/aliby/baby_client.py b/src/aliby/baby_client.py index 1ced0c76..a94fe6ff 100644 --- a/src/aliby/baby_client.py +++ b/src/aliby/baby_client.py @@ -204,7 +204,6 @@ def choose_model_from_params( """ # cameras prime95 has become sCMOS and evolve has EMCCD valid_models = list(modelsets().keys()) - breakpoint() # Apply modelset filter if specified if modelset_filter is not None: @@ -218,7 +217,6 @@ def choose_model_from_params( ] params_re = re.compile("^" + "_".join(params) + "$") valid_models = list(filter(params_re.search, valid_models)) - breakpoint() # Check that there are valid models if len(valid_models) == 0: raise KeyError( diff --git a/src/aliby/baby_sitter.py b/src/aliby/baby_sitter.py index 386e312b..2bd930ba 100644 --- a/src/aliby/baby_sitter.py +++ b/src/aliby/baby_sitter.py @@ -15,15 +15,14 @@ class BabyParameters(ParametersABC): def __init__( self, modelset_name, - pixel_size, clogging_thresh, min_bud_tps, isbud_thresh, session, ): """Initialise parameters for BABY.""" + # pixel_size is specified in BABY's model sets self.modelset_name = modelset_name - self.pixel_size = pixel_size self.clogging_thresh = clogging_thresh self.min_bud_tps = min_bud_tps self.isbud_thresh = isbud_thresh @@ -34,7 +33,6 @@ class BabyParameters(ParametersABC): """Define default parameters; kwargs choose BABY model set.""" return cls( modelset_name=get_modelset_name_from_params(**kwargs), - pixel_size=0.182, clogging_thresh=0.75, min_bud_tps=3, isbud_thresh=0.5, @@ -84,7 +82,6 @@ class BabyRunner(StepABC): else: brain = modelsets.get( modelset_name, - pixel_size=parameters.pixel_size, clogging_thresh=parameters.clogging_thresh, min_bud_tps=parameters.min_bud_tps, isbud_thresh=parameters.isbud_thresh, diff --git a/src/aliby/pipeline.py b/src/aliby/pipeline.py index aa74fdfd..46b786c7 100644 --- a/src/aliby/pipeline.py +++ b/src/aliby/pipeline.py @@ -476,7 +476,7 @@ class Pipeline(ProcessABC): result = pipe["steps"][step].run_tp( i, **run_kwargs.get(step, {}) ) - # write to h5 file using writers + # write result to h5 file using writers # extractor writes to h5 itself if step in loaded_writers: loaded_writers[step].write( diff --git a/src/postprocessor/core/lineageprocess.py b/src/postprocessor/core/lineageprocess.py index c359df89..90378a47 100644 --- a/src/postprocessor/core/lineageprocess.py +++ b/src/postprocessor/core/lineageprocess.py @@ -7,7 +7,6 @@ import numpy as np import pandas as pd from agora.abc import ParametersABC -from agora.utils.kymograph import get_index_as_np from postprocessor.core.abc import PostProcessABC @@ -57,10 +56,10 @@ class LineageProcess(PostProcessABC): ) def get_lineage_information(self, signal=None, merged=True): - """Get lineage as an array with tile IDs, mother labels, and corresponding bud labels.""" + """Get lineage as an array with tile IDs, mother and bud labels.""" if signal is not None and "mother_label" in signal.index.names: # from kymograph - lineage = get_index_as_np(signal) + lineage = np.array(signal.index.to_list()) elif hasattr(self, "lineage"): lineage = self.lineage elif hasattr(self, "cells"): diff --git a/src/postprocessor/core/reshapers/bud_metric.py b/src/postprocessor/core/reshapers/bud_metric.py index 57c01069..eee5c0b4 100644 --- a/src/postprocessor/core/reshapers/bud_metric.py +++ b/src/postprocessor/core/reshapers/bud_metric.py @@ -3,11 +3,26 @@ import typing as t import numpy as np import pandas as pd -from agora.utils.lineage import mb_array_to_dict +from agora.io.bridge import groupsort from postprocessor.core.lineageprocess import ( LineageProcess, LineageProcessParameters, ) +import logging + + +def mother_bud_array_to_dict(mb_array: np.ndarray): + """ + Convert a lineage into a dict of lists. + + A lineage is an array (trap, mother_id, daughter_id) and + becomes a dictionary of lists (mother_id->[daughters_ids]) + """ + return { + (trap, mo): [(trap, d[0]) for d in daughters] + for trap, mo_da in groupsort(mb_array).items() + for mo, daughters in groupsort(mo_da).items() + } class BudMetricParameters(LineageProcessParameters): @@ -18,12 +33,13 @@ class BudMetricParameters(LineageProcessParameters): class BudMetric(LineageProcess): """ - Requires mother-bud information to create a new dataframe where the - indices are mother ids and values are the daughters' values for a - given signal. + Create a dataframe with indices mother IDs and values from buds. + + Requires mother-bud information. """ def __init__(self, parameters: BudMetricParameters): + """Initialise using LineageProcess.""" super().__init__(parameters) def run( @@ -31,23 +47,27 @@ class BudMetric(LineageProcess): signal: pd.DataFrame, lineage: t.Dict[pd.Index, t.Tuple[pd.Index]] = None, ): + """Calculate a metric for all buds.""" if lineage is None: # define lineage if hasattr(self, "lineage"): lineage = self.lineage else: - # lineage information in the Signal dataframe + # lineage information in the Signal data frame assert "mother_label" in signal.index.names lineage = signal.index.to_list() - return self.get_bud_metric(signal, mb_array_to_dict(lineage)) + return self.get_bud_metric(signal, mother_bud_array_to_dict(lineage)) @staticmethod def get_bud_metric( - signal: pd.DataFrame, md: t.Dict[t.Tuple, t.Tuple[t.Tuple]] = None + signal: pd.DataFrame, + lineage_dict: t.Dict[t.Tuple, t.Tuple[t.Tuple]] = None, ): """ - Generate a dataframe of a Signal for buds indexed by their mothers, - concatenating data from all the buds for each mother. + Generate a dataframe of a Signal for buds. + + The data frame is indexed by the buds' mothers and concatenates + data from all the buds for each mother. Parameters --------- @@ -62,7 +82,11 @@ class BudMetric(LineageProcess): # md_index should only comprise (trap, cell_label) if "mother_label" not in md_index.names: # dict with daughter indices as keys and mother indices as values - bud_dict = {v: k for k, values in md.items() for v in values} + bud_dict = { + bud: mother + for mother, buds in lineage_dict.items() + for bud in buds + } # generate mother_label in Signal using the mother's cell_label # cells with no mothers have a mother_label of 0 signal["mother_label"] = list( @@ -70,17 +94,21 @@ class BudMetric(LineageProcess): ) signal.set_index("mother_label", append=True, inplace=True) # combine mothers and daughter indices - mothers_index = md.keys() - daughters_index = [y for x in md.values() for y in x] + mothers_index = lineage_dict.keys() + daughters_index = [ + bud for buds in lineage_dict.values() for bud in buds + ] relations = set([*mothers_index, *daughters_index]) - # keep from md_index only cells that are mother or daughters + # keep only cells that are mother or daughters md_index = md_index.intersection(relations) else: md_index = md_index.droplevel("mother_label") if len(md_index) < len(signal): - print( - f"Dropped {len(signal) - len(md_index)} cells before applying bud_metric" - ) # TODO log + logging.getLogger("aliby").log( + logging.WARNING, + f"Dropped {len(signal) - len(md_index)} cells before " + "applying bud_metric.", + ) # restrict signal to the cells in md_index moving mother_label to do so signal = ( signal.reset_index("mother_label") @@ -137,41 +165,3 @@ def combine_daughter_tracks(tracks: pd.DataFrame): bud_df.iloc[jrow].loc[init_tps[j] :].values ) return pd.Series(combined_tracks, index=tracks.columns) - - -def _combine_daughter_tracks_original(tracks: pd.DataFrame): - """ - Combine multiple time series of daughter cells into one time series. - - At any one time, a mother cell should have only one daughter. - - Two daughters are still sometimes present at the same time point, and we - then choose the daughter that appears first. - - TODO We need to fix examples with more than one daughter at a time point. - - Parameters - ---------- - tracks: a Signal - Data for all daughters, which are distinguished by different cell_labels, - for a particular trap and mother_label. - """ - # sort by daughter IDs - bud_df = tracks.sort_index(level="cell_label") - # remove multi-index - bud_df.index = range(len(bud_df)) - # find which row of sorted_df has the daughter for each time point - tp_fvt: pd.Series = bud_df.apply(lambda x: x.first_valid_index(), axis=0) - # combine data for all daughters - combined_tracks = np.nan * np.ones(tracks.columns.size) - for bud_row in np.unique(tp_fvt.dropna().values).astype(int): - ilocs = np.where(tp_fvt.values == bud_row)[0] - combined_tracks[ilocs] = bud_df.values[bud_row, ilocs] - # TODO delete old version - tp_fvt = bud_df.columns.get_indexer(tp_fvt) - tp_fvt[tp_fvt == -1] = len(bud_df) - 1 - old = np.choose(tp_fvt, bud_df.values) - assert ( - (combined_tracks == old) | (np.isnan(combined_tracks) & np.isnan(old)) - ).all(), "yikes" - return pd.Series(combined_tracks, index=tracks.columns) diff --git a/src/postprocessor/core/reshapers/picker.py b/src/postprocessor/core/reshapers/picker.py index 14d76446..e283386e 100644 --- a/src/postprocessor/core/reshapers/picker.py +++ b/src/postprocessor/core/reshapers/picker.py @@ -24,9 +24,9 @@ class PickerParameters(ParametersABC): """ _defaults = { - "sequence": [ + "picker_sequence": [ ["lineage", "families"], - ["condition", "present", 7], + ["condition", "present", 3], ], } @@ -80,9 +80,9 @@ class Picker(LineageProcess): if len(lineage): self.mothers = lineage[:, [0, 1]] self.daughters = lineage[:, [0, 2]] - for alg, *params in self.sequence: + for method, *params in self.picker_sequence: if indices: - if alg == "lineage": + if method == "lineage": # pick by lineage param1 = params[0] new_indices = self.pick_by_lineage( @@ -113,7 +113,7 @@ class Picker(LineageProcess): ): """Pick indices from signal by any_present, present, and growing.""" if len(threshold) == 1: - threshold = [_as_int(*threshold, signal.shape[1])] + threshold = [as_int(*threshold, signal.shape[1])] #: is this correct for "growing"? case_mgr = { "any_present": lambda s, threshold: any_present(s, threshold), @@ -127,7 +127,7 @@ class Picker(LineageProcess): return new_indices -def _as_int(threshold: t.Union[float, int], ntps: int): +def as_int(threshold: t.Union[float, int], ntps: int): """Convert a fraction of the total experiment duration into a number of time points.""" if type(threshold) is float: threshold = ntps * threshold -- GitLab