diff --git a/src/agora/abc.py b/src/agora/abc.py index 437214e3d7840d049b238feaa562e4026e96a4bf..6f9a701100c10e2dda4ebd43446d0640a9db756f 100644 --- a/src/agora/abc.py +++ b/src/agora/abc.py @@ -107,7 +107,7 @@ class ParametersABC(ABC): def update(self, name: str, new_value): """Update a parameter in the nested dict of parameters.""" - flat_params_dict = flatten(self.to_dict()) + flat_params_dict = flatten(self.to_dict(), keep_empty_types=(dict,)) names_found = [ param for param in flat_params_dict.keys() if name in param ] diff --git a/src/agora/io/signal.py b/src/agora/io/signal.py index 03dc98fc162efce983aa4be45a73d162ca0c5d6e..3f0d66d5c011e577c32c621df9c710b244c2d018 100644 --- a/src/agora/io/signal.py +++ b/src/agora/io/signal.py @@ -15,6 +15,7 @@ from agora.io.decorators import _first_arg_str_to_raw_df from agora.utils.indexing import validate_lineage from agora.utils.kymograph import add_index_levels from agora.utils.merge import apply_merges +from postprocessor.core.reshapers.picker import Picker, PickerParameters class Signal(BridgeH5): @@ -27,8 +28,7 @@ class Signal(BridgeH5): """ def __init__(self, file: t.Union[str, Path]): - """Define index_names for dataframes, candidate fluorescence channels, - and composite statistics.""" + """Initialise defining index names for the dataframe.""" super().__init__(file, flag=None) self.index_names = ( "experiment", @@ -54,11 +54,11 @@ class Signal(BridgeH5): raise Exception(f"Invalid type {type(dsets)} to get datasets") def get(self, dset_name: t.Union[str, t.Collection], **kwargs): - """Return pre-processed data as a dataframe.""" + """Get Signal after merging and picking.""" if isinstance(dset_name, str): dsets = self.get_raw(dset_name, **kwargs) - prepost_applied = self.apply_prepost(dsets, **kwargs) - return self.add_name(prepost_applied, dset_name) + picked_merged = self.apply_merging_picking(dsets, **kwargs) + return self.add_name(picked_merged, dset_name) else: raise Exception("Error in Signal.get") @@ -95,10 +95,25 @@ class Signal(BridgeH5): ) return 5 + def retained( + self, signal, cutoff=global_parameters.signal_retained_cutoff + ): + """Get retained cells for a Signal or list of Signals.""" + if isinstance(signal, str): + signal = self.get(signal) + if isinstance(signal, pd.DataFrame): + return self.get_retained(signal, cutoff) + elif isinstance(signal, list): + return [self.get_retained(d, cutoff=cutoff) for d in signal] + @staticmethod def get_retained(df, cutoff): - """Return rows of df with at least cutoff fraction of the total number - of time points.""" + """ + Return sub data frame with retained cells. + + Cells must be present for at least cutoff fraction of the total number + of time points. + """ return df.loc[bn.nansum(df.notna(), axis=1) > df.shape[1] * cutoff] @property @@ -107,22 +122,6 @@ class Signal(BridgeH5): with h5py.File(self.filename, "r") as f: return list(f.attrs["channels"]) - def retained( - self, signal, cutoff=global_parameters.signal_retained_cutoff - ): - """ - Load data (via decorator) and reduce the resulting dataframe. - - Load data for a signal or a list of signals and reduce the resulting - dataframes to rows with sufficient numbers of time points. - """ - if isinstance(signal, str): - signal = self.get_raw(signal) - if isinstance(signal, pd.DataFrame): - return self.get_retained(signal, cutoff) - elif isinstance(signal, list): - return [self.get_retained(d, cutoff=cutoff) for d in signal] - @lru_cache(2) def lineage( self, lineage_location: t.Optional[str] = None, merged: bool = False @@ -152,7 +151,7 @@ class Signal(BridgeH5): return lineage @_first_arg_str_to_raw_df - def apply_prepost( + def apply_merging_picking( self, data: t.Union[str, pd.DataFrame], merges: t.Union[np.ndarray, bool] = True, @@ -186,7 +185,7 @@ class Signal(BridgeH5): if picks else merged.index ) - if picks: + if len(picks): picked_indices = set(picks).intersection( [tuple(x) for x in merged.index] ) @@ -218,8 +217,8 @@ class Signal(BridgeH5): return self._available def get_merged(self, dataset): - """Run preprocessing for merges.""" - return self.apply_prepost(dataset, picks=False) + """Run merging.""" + return self.apply_merging_picking(dataset, picks=False) @cached_property def merges(self) -> np.ndarray: @@ -245,9 +244,11 @@ class Signal(BridgeH5): dataset: str or t.List[str], in_minutes: bool = True, lineage: bool = False, + merges: bool = False, + picks: bool = False, ) -> pd.DataFrame or t.List[pd.DataFrame]: """ - Load data from a h5 file and return as a dataframe. + Get raw Signal without merging, picking, and lineage information. Parameters ---------- @@ -257,6 +258,10 @@ class Signal(BridgeH5): If True, convert column headings to times in minutes. lineage: boolean If True, add mother_label to index. + merges: boolean + If True, apply merges. + picks: boolean + If True, apply picks. """ try: if isinstance(dataset, str): @@ -269,15 +274,17 @@ class Signal(BridgeH5): self.get_raw(dset, in_minutes=in_minutes, lineage=lineage) for dset in dataset ] + # apply merging or picking or both or neither + df = self.apply_merging_picking(df, merges, picks) + # add mother label to data frame if lineage: - # assume that df is sorted mother_label = np.zeros(len(df), dtype=int) lineage = self.lineage() - # information on buds + valid_lineage, valid_indices = validate_lineage( lineage, - np.array(df.index.to_list()), - "daughters", + indices=np.array(df.index.to_list()), + how="daughters", ) mother_label[valid_indices] = lineage[valid_lineage, 1] df = add_index_levels(df, {"mother_label": mother_label}) @@ -360,62 +367,7 @@ class Signal(BridgeH5): if isinstance(obj, h5py.Group) and name.endswith("picks"): return obj[()] - # TODO FUTURE add stages support to fluigent system @property def ntps(self) -> int: """Get number of time points from the metadata.""" return self.meta_h5["time_settings/ntimepoints"][0] - - @property - def stages(self) -> t.List[str]: - """Get the contents of the pump with highest flow rate at each stage.""" - flowrate_name = "pumpinit/flowrate" - pumprate_name = "pumprate" - switchtimes_name = "switchtimes" - main_pump_id = np.concatenate( - ( - (np.argmax(self.meta_h5[flowrate_name]),), - np.argmax(self.meta_h5[pumprate_name], axis=0), - ) - ) - if not self.meta_h5[switchtimes_name][0]: # Cover for t0 switches - main_pump_id = main_pump_id[1:] - return [self.meta_h5["pumpinit/contents"][i] for i in main_pump_id] - - @property - def nstages(self) -> int: - return len(self.switch_times) + 1 - - @property - def max_span(self) -> int: - return int(self.tinterval * self.ntps / 60) - - @property - def switch_times(self) -> t.List[int]: - switchtimes_name = "switchtimes" - switches_minutes = self.meta_h5[switchtimes_name] - return [ - t_min - for t_min in switches_minutes - if t_min and t_min < self.max_span - ] # Cover for t0 switches - - @property - def stages_span(self) -> t.Tuple[t.Tuple[str, int], ...]: - """Get consecutive stages and their corresponding number of time points.""" - transition_tps = (0, *self.switch_times, self.max_span) - spans = [ - end - start - for start, end in zip(transition_tps[:-1], transition_tps[1:]) - if end <= self.max_span - ] - return tuple((stage, ntps) for stage, ntps in zip(self.stages, spans)) - - @property - def stages_span_tp(self) -> t.Tuple[t.Tuple[str, int], ...]: - return tuple( - [ - (name, (t_min * 60) // self.tinterval) - for name, t_min in self.stages_span - ] - ) diff --git a/src/agora/utils/indexing.py b/src/agora/utils/indexing.py index 371a69e6514417bc4ba9e0cb99ca20e0b0210c19..5e77dbb6f11077f2456df799deac5a8f83b03023 100644 --- a/src/agora/utils/indexing.py +++ b/src/agora/utils/indexing.py @@ -1,13 +1,4 @@ -#!/usr/bin/env jupyter -""" -Utilities based on association are used to efficiently acquire indices of tracklets with some kind of relationship. -This can be: - - Cells that are to be merged - - Cells that have a linear relationship -""" - import numpy as np -import typing as t # data type to link together trap and cell ids i_dtype = {"names": ["trap_id", "cell_id"], "formats": [np.int64, np.int64]} @@ -17,8 +8,7 @@ def validate_lineage( lineage: np.ndarray, indices: np.ndarray, how: str = "families" ): """ - Identify mother-bud pairs that exist both in lineage and a Signal's - indices. + Identify mother-bud pairs both in lineage and a Signal's indices. We expect the lineage information to be unique: a bud should not have two mothers. @@ -94,12 +84,22 @@ def validate_lineage( else: valid_indices = index_isin(indices, selected_lineages[:, c_index, :]) flat_valid_indices = valid_indices.flatten() - if ( - indices[flat_valid_indices, :].size - != np.unique( - lineage[flat_valid_lineage, :].reshape(-1, 2), axis=0 - ).size - ): + # test for mismatch + if how == "families": + test_mismatch = ( + indices[flat_valid_indices, :].size + != np.unique( + lineage[flat_valid_lineage, :].reshape(-1, 2), axis=0 + ).size + ) + else: + test_mismatch = ( + indices[flat_valid_indices, :].size + != np.unique( + lineage[flat_valid_lineage, c_index, :].reshape(-1, 2), axis=0 + ).size + ) + if test_mismatch: # all unique indices in valid_lineages should be in valid_indices raise Exception( "Error in validate_lineage: " @@ -161,108 +161,3 @@ def assoc_indices_to_2d(array: np.ndarray): (array[:, 0, :], array[:, 1, 1, np.newaxis]), axis=1 ) return result - - -################################################################### - - -def validate_association( - association: np.ndarray, - indices: np.ndarray, - match_column: t.Optional[int] = None, -) -> t.Tuple[np.ndarray, np.ndarray]: - - """Select rows from the first array that are present in both. - We use casting for fast multiindexing, generalising for lineage dynamics - - - Parameters - ---------- - association : np.ndarray - 2-D array where columns are (trap, mother, daughter) or 3-D array where - dimensions are (X,trap,2), containing tuples ((trap,mother), (trap,daughter)) - across the 3rd dimension. - indices : np.ndarray - 2-D array where each column is a different level. This should not include mother_label. - match_column: int - int indicating a specific column is required to match (i.e. - 0-1 for target-source when trying to merge tracklets or mother-bud for lineage) - must be present in indices. If it is false one match suffices for the resultant indices - vector to be True. - - Returns - ------- - np.ndarray - 1-D boolean array indicating valid merge events. - np.ndarray - 1-D boolean array indicating indices with an association relationship. - - Examples - -------- - - >>> import numpy as np - >>> from agora.utils.indexing import validate_association - >>> merges = np.array(range(12)).reshape(3,2,2) - >>> indices = np.array(range(6)).reshape(3,2) - - >>> print(merges, indices) - >>> print(merges); print(indices) - [[[ 0 1] - [ 2 3]] - - [[ 4 5] - [ 6 7]] - - [[ 8 9] - [10 11]]] - - [[0 1] - [2 3] - [4 5]] - - >>> valid_associations, valid_indices = validate_association(merges, indices) - >>> print(valid_associations, valid_indices) - [ True False False] [ True True False] - - """ - if association.ndim == 2: - # Reshape into 3-D array for broadcasting if neded - # association = np.stack( - # (association[:, [0, 1]], association[:, [0, 2]]), axis=1 - # ) - association = _assoc_indices_to_3d(association) - - # Compare existing association with available indices - # Swap trap and label axes for the association array to correctly cast - valid_ndassociation = association[..., None] == indices.T[None, ...] - - # Broadcasting is confusing (but efficient): - # First we check the dimension across trap and cell id, to ensure both match - valid_cell_ids = valid_ndassociation.all(axis=2) - - if match_column is None: - # Then we check the merge tuples to check which cases have both target and source - valid_association = valid_cell_ids.any(axis=2).all(axis=1) - - # Finally we check the dimension that crosses all indices, to ensure the pair - # is present in a valid merge event. - valid_indices = ( - valid_ndassociation[valid_association].all(axis=2).any(axis=(0, 1)) - ) - else: # We fetch specific indices if we aim for the ones with one present - valid_indices = valid_cell_ids[:, match_column].any(axis=0) - # Valid association then becomes a boolean array, true means that there is a - # match (match_column) between that cell and the index - valid_association = ( - valid_cell_ids[:, match_column] & valid_indices - ).any(axis=1) - - return valid_association, valid_indices - - -def compare_indices(x: np.ndarray, y: np.ndarray) -> np.ndarray: - """ - Fetch two 2-D indices and return a binary 2-D matrix - where a True value links two cells where all cells are the same - """ - return (x[..., None] == y.T[None, ...]).all(axis=1) diff --git a/src/agora/utils/kymograph.py b/src/agora/utils/kymograph.py index 46df84ec3e117969a4987ddb5e6191d2e7537b7c..5bceb147be7a831b2662e7e2b9e22e787d845d94 100644 --- a/src/agora/utils/kymograph.py +++ b/src/agora/utils/kymograph.py @@ -6,7 +6,7 @@ import numpy as np import pandas as pd from sklearn.cluster import KMeans -from agora.utils.indexing import validate_association +# from agora.utils.indexing import validate_association index_row = t.Tuple[str, str, int, int] diff --git a/src/postprocessor/core/postprocessing.py b/src/postprocessor/core/postprocessing.py index 6369afb6b605a724572d80ec95c202f568b8d2da..301891dc6e8d6df48dca92801fd82ac7483a6310 100644 --- a/src/postprocessor/core/postprocessing.py +++ b/src/postprocessor/core/postprocessing.py @@ -54,6 +54,7 @@ class PostProcessorParameters(ParametersABC): "merger": "/extraction/general/None/area", "picker": "/extraction/general/None/area", }, + # lists because bud_metric can be applied to multiple signals "processes": [ ["buddings", ["/extraction/general/None/volume"]], ["bud_metric", ["/extraction/general/None/volume"]], @@ -104,12 +105,12 @@ class PostProcessor(ProcessABC): cells=Cells.from_source(filename), ) # get processes, such as buddings - self.classfun = { + self.process_funcs = { process: get_process(process) for process, _ in parameters["targets"]["processes"] } - # get parameters for the processes in classfun - self.parameters_classfun = { + # get parameters for the processes + self.parameters_process_funcs = { process: get_parameters(process) for process, _ in parameters["targets"]["processes"] } @@ -141,8 +142,10 @@ class PostProcessor(ProcessABC): "modifiers/lineage_merged", assoc_indices_to_2d(new_lineage) ) # run picker - picked_indices = self.picker.run( - self.signal[self.targets["merging_picking"]["picker"]] + picked_indices = np.array( + self.picker.run( + self.signal[self.targets["merging_picking"]["picker"]] + ) ) if picked_indices.any(): self.writer.write( @@ -161,26 +164,26 @@ class PostProcessor(ProcessABC): """ # run merger, picker, and find lineages self.run_merging_picking() - # run processes: process is a str; datasets is a list of str + # run processes: process is a str; data sets is a list of str for process, datasets in tqdm(self.targets["processes"]): if process in self.parameters["param_sets"].get("processes", {}): # parameters already assigned - parameters = self.parameters_classfun[process]( + parameters = self.parameters_process_funcs[process]( self.parameters[process] ) else: # assign default parameters - parameters = self.parameters_classfun[process].default() + parameters = self.parameters_process_funcs[process].default() # load process - instantiate an object in the class - loaded_process = self.classfun[process](parameters) + loaded_process = self.process_funcs[process](parameters) if isinstance(parameters, LineageProcessParameters): loaded_process.lineage = self.lineage - # apply process to each dataset + # apply process to each data set for dataset in datasets: self.run_process(dataset, process, loaded_process) def run_process(self, dataset, process, loaded_process): - """Run process to obtain a single dataset and write the result.""" + """Run processes to obtain single data sets and write the results.""" # get pre-processed data if isinstance(dataset, list): signal = [self.signal[d] for d in dataset] @@ -212,11 +215,8 @@ class PostProcessor(ProcessABC): lambda x: all(x[0] == y for y in x), zip(*dataset) ) ) - outpath = ( - prefix - + "_".join( # TODO check that it always finishes in '/' - [d[len(prefix) :].replace("/", "_") for d in dataset] - ) + outpath = prefix + "_".join( + [d[len(prefix) :].replace("/", "_") for d in dataset] ) elif isinstance(dataset, str): outpath = dataset[1:].replace("/", "_") diff --git a/src/postprocessor/core/reshapers/bud_metric.py b/src/postprocessor/core/reshapers/bud_metric.py index eee5c0b42c9c1d821bfa9c2f3009c1987ca8b8c6..d2b87f795e1e8c00be152ff4d478ba6ddb3adf73 100644 --- a/src/postprocessor/core/reshapers/bud_metric.py +++ b/src/postprocessor/core/reshapers/bud_metric.py @@ -11,20 +11,6 @@ from postprocessor.core.lineageprocess import ( import logging -def mother_bud_array_to_dict(mb_array: np.ndarray): - """ - Convert a lineage into a dict of lists. - - A lineage is an array (trap, mother_id, daughter_id) and - becomes a dictionary of lists (mother_id->[daughters_ids]) - """ - return { - (trap, mo): [(trap, d[0]) for d in daughters] - for trap, mo_da in groupsort(mb_array).items() - for mo, daughters in groupsort(mo_da).items() - } - - class BudMetricParameters(LineageProcessParameters): """Give default location of lineage information.""" @@ -56,79 +42,91 @@ class BudMetric(LineageProcess): # lineage information in the Signal data frame assert "mother_label" in signal.index.names lineage = signal.index.to_list() - return self.get_bud_metric(signal, mother_bud_array_to_dict(lineage)) + result = get_bud_metric(signal, mother_bud_array_to_dict(lineage)) + return result - @staticmethod - def get_bud_metric( - signal: pd.DataFrame, - lineage_dict: t.Dict[t.Tuple, t.Tuple[t.Tuple]] = None, - ): - """ - Generate a dataframe of a Signal for buds. - - The data frame is indexed by the buds' mothers and concatenates - data from all the buds for each mother. - - Parameters - --------- - signal: pd.Dataframe - A dataframe that includes data for both mothers and daughters. - md: dict - A dict of lineage information with each key a mother's index, - defined as (trap, cell_label), and the corresponding values are a - list of daughter indices, also defined as (trap, cell_label). - """ - md_index = signal.index - # md_index should only comprise (trap, cell_label) - if "mother_label" not in md_index.names: - # dict with daughter indices as keys and mother indices as values - bud_dict = { - bud: mother - for mother, buds in lineage_dict.items() - for bud in buds - } - # generate mother_label in Signal using the mother's cell_label - # cells with no mothers have a mother_label of 0 - signal["mother_label"] = list( - map(lambda x: bud_dict.get(x, [0])[-1], signal.index) - ) - signal.set_index("mother_label", append=True, inplace=True) - # combine mothers and daughter indices - mothers_index = lineage_dict.keys() - daughters_index = [ - bud for buds in lineage_dict.values() for bud in buds - ] - relations = set([*mothers_index, *daughters_index]) - # keep only cells that are mother or daughters - md_index = md_index.intersection(relations) - else: - md_index = md_index.droplevel("mother_label") - if len(md_index) < len(signal): - logging.getLogger("aliby").log( - logging.WARNING, - f"Dropped {len(signal) - len(md_index)} cells before " - "applying bud_metric.", - ) - # restrict signal to the cells in md_index moving mother_label to do so - signal = ( - signal.reset_index("mother_label") - .loc(axis=0)[md_index] - .set_index("mother_label", append=True) - ) - # restrict to daughters: cells with a mother - mother_labels = signal.index.get_level_values("mother_label") - daughter_df = signal.loc[mother_labels > 0] - # join data for daughters with the same mother - output_df = daughter_df.groupby(["trap", "mother_label"]).apply( - combine_daughter_tracks + +def mother_bud_array_to_dict(mb_array: np.ndarray): + """ + Convert a lineage into a dict of lists. + + A lineage is an array (trap, mother_id, daughter_id) and + becomes a dictionary of lists (mother_id->[daughters_ids]) + """ + return { + (trap, mo): [(trap, d[0]) for d in daughters] + for trap, mo_da in groupsort(mb_array).items() + for mo, daughters in groupsort(mo_da).items() + } + + +def get_bud_metric( + signal: pd.DataFrame, + lineage_dict: t.Dict[t.Tuple, t.Tuple[t.Tuple]] = None, +): + """ + Generate a dataframe of a Signal for buds. + + The data frame is indexed by the buds' mothers and concatenates + data from all the buds for each mother. + + Parameters + --------- + signal: pd.Dataframe + A dataframe that includes data for both mothers and daughters. + md: dict + A dict of lineage information with each key a mother's index, + defined as (trap, cell_label), and the corresponding values are a + list of daughter indices, also defined as (trap, cell_label). + """ + md_index = signal.index + # md_index should only comprise (trap, cell_label) + if "mother_label" not in md_index.names: + # dict with daughter indices as keys and mother indices as values + bud_dict = { + bud: mother + for mother, buds in lineage_dict.items() + for bud in buds + } + # generate mother_label in Signal using the mother's cell_label + # cells with no mothers have a mother_label of 0 + signal["mother_label"] = list( + map(lambda x: bud_dict.get(x, [0])[-1], signal.index) ) - output_df.columns = signal.columns - # daughter data is indexed by mothers, which themselves have no mothers - output_df["temp_mother_label"] = 0 - output_df.set_index("temp_mother_label", append=True, inplace=True) - if len(output_df): - output_df.index.names = signal.index.names - return output_df + signal.set_index("mother_label", append=True, inplace=True) + # combine mothers and daughter indices + mothers_index = lineage_dict.keys() + daughters_index = [ + bud for buds in lineage_dict.values() for bud in buds + ] + relations = set([*mothers_index, *daughters_index]) + # keep only cells that are mother or daughters + md_index = md_index.intersection(relations) + else: + md_index = md_index.droplevel("mother_label") + # restrict signal to the cells in md_index moving mother_label to do so + md_signal = ( + signal.reset_index("mother_label") + .loc(axis=0)[md_index] + .set_index("mother_label", append=True) + ) + # restrict to daughters: cells with a mother + mother_labels = md_signal.index.get_level_values("mother_label") + daughter_df = md_signal.loc[mother_labels > 0] + # join data for daughters with the same mother + mini_df = daughter_df.groupby(["trap", "mother_label"]).apply( + combine_daughter_tracks + ) + mini_df.columns = md_signal.columns + # daughter data is indexed by mothers, which themselves have no mothers + mini_df["temp_mother_label"] = 0 + mini_df.set_index("temp_mother_label", append=True, inplace=True) + if len(mini_df): + mini_df.index.names = md_signal.index.names + # initialise as all NaNs and then update for cells with buds + bud_signal = pd.DataFrame(columns=signal.columns, index=signal.index) + bud_signal.update(mini_df) + return bud_signal def combine_daughter_tracks(tracks: pd.DataFrame): diff --git a/src/postprocessor/core/reshapers/picker.py b/src/postprocessor/core/reshapers/picker.py index e283386efcee661a0d01a22b72e866c424f5ab33..f257557d985906d520db48f2f11a1a5bfea786ba 100644 --- a/src/postprocessor/core/reshapers/picker.py +++ b/src/postprocessor/core/reshapers/picker.py @@ -6,7 +6,6 @@ import pandas as pd from agora.abc import ParametersABC from agora.io.cells import Cells from agora.utils.indexing import validate_lineage -from agora.utils.cast import _str_to_int from agora.utils.kymograph import drop_mother_label from postprocessor.core.lineageprocess import LineageProcess @@ -21,6 +20,12 @@ class PickerParameters(ParametersABC): "condition" is further specified by "present", "any_present", or "growing" and a threshold, either a number of time points or a fraction of the total duration of the experiment. + + Note that to pick most cells, particularly for short movies, use + {"picker_sequence": [["condition", "present", 3]]} + with no "lineage" condition specified. For short movies, lineage + information is only available for a small fraction of the + segmented cells. """ _defaults = { @@ -72,7 +77,10 @@ class Picker(LineageProcess): Pick indices from the index of a signal's dataframe. Typically, we first pick by lineage, then by condition. - The indices are returned as an array. + The indices are returned as a list for use in pd.loc. + Converting indices that are a mixture of strings and integers + into arrays changes integers into strings, breaking the + index convention for Signals. """ self.orig_signal = signal indices = set(signal.index) @@ -101,8 +109,8 @@ class Picker(LineageProcess): else: self._log("No lineage assignment") indices = np.array([]) - # convert to array - indices_arr = np.array([tuple(map(_str_to_int, x)) for x in indices]) + # return as list + indices_arr = [tuple(x) for x in indices] return indices_arr def pick_by_condition( @@ -153,3 +161,14 @@ def any_present(signal, threshold): np.sum(trap_array, axis=0).astype(bool), index=signal.index ) return any_present + + +def str_to_int(x: str or None): + """Cast string as int if possible.""" + if x is None: + return x + else: + try: + return int(x) + except ValueError: + return x diff --git a/src/postprocessor/grouper.py b/src/postprocessor/grouper.py index 267f6a81ab3702b3fb56809b244170e8727d7751..cc525eca6bbed3c45878f5ca58b70e4ec349b4cd 100644 --- a/src/postprocessor/grouper.py +++ b/src/postprocessor/grouper.py @@ -90,6 +90,17 @@ class Grouper(ABC): pool : int Number of threads used; if 0 or None only one core is used. mode: str + If "retained" (default), return Signal with merging, picking, and lineage + information applied but only for cells present for at least some + cutoff fraction of the movie. + If "raw", return Signal without merging, picking, lineage information, + or a cutoff applied. Each of the first three options can be + re-selected. A raw Signal with all three selected is the same as a + retained Signal with a 0 cutoff. + If "daughters", return Signal with only daughters - cells with an + identified mother. + If "families", get Signal with merging, picking, and lineage + information applied. **kwargs : key, value pairings Named arguments for concat_ind_function @@ -111,9 +122,9 @@ class Grouper(ABC): ) # check for errors errors = [ - k - for kymo, k in zip(records, self.positions.keys()) - if kymo is None + position + for record, position in zip(records, self.positions.keys()) + if record is None ] records = [record for record in records if record is not None] if len(errors): @@ -122,7 +133,8 @@ class Grouper(ABC): # combine into one dataframe concat = pd.concat(records, axis=0) if len(concat.index.names) > 4: - # reorder levels in the multi-index dataframe when mother_label is present + # reorder levels in the multi-index dataframe + # when mother_label is present concat = concat.reorder_levels( ("group", "position", "trap", "cell_label", "mother_label") )