diff --git a/src/agora/io/signal.py b/src/agora/io/signal.py index ea407f1b88cbcab220dfe37f75be7afc5a3b3946..e989ca2e9777d9b390af58e5ecb8911f64a4dc97 100644 --- a/src/agora/io/signal.py +++ b/src/agora/io/signal.py @@ -235,7 +235,6 @@ class Signal(BridgeH5): dataset: str or t.List[str], in_minutes: bool = True, lineage: bool = False, - stop_on_lineage_check: bool = True, tmax_in_mins: int = None, **kwargs, ) -> pd.DataFrame or t.List[pd.DataFrame]: @@ -258,52 +257,51 @@ class Signal(BridgeH5): Setting tmax_in_mins is a way to ignore parts of the experiment with incorrect lineages generated by clogging. """ - try: - if isinstance(dataset, str): - with h5py.File(self.filename, "r") as f: - df = self.dataset_to_df(f, dataset) - if df is not None: - df = df.sort_index() - if in_minutes: - df = self.cols_in_mins(df) - # limit data by time and discard NaNs - if ( - in_minutes - and tmax_in_mins - and type(tmax_in_mins) is int - ): - df = df[df.columns[df.columns <= tmax_in_mins]] - df = df.dropna(how="all") - # add mother label to data frame - if lineage: - mother_label = np.zeros(len(df), dtype=int) - lineage = self.lineage() - valid_lineage, valid_indices = validate_lineage( - lineage, - indices=np.array(df.index.to_list()), - how="daughters", - stop_on_lineage_check=stop_on_lineage_check, - ) - mother_label[valid_indices] = lineage[ - valid_lineage, 1 - ] - df = add_index_levels( - df, {"mother_label": mother_label} - ) - return df - elif isinstance(dataset, list): - return [ - self.get_raw( - dset, - in_minutes=in_minutes, - lineage=lineage, - tmax_in_mins=tmax_in_mins, - ) - for dset in dataset - ] - except Exception as e: - message = f"Signal could not obtain data {dataset}: {e}" - self._log(message) + if isinstance(dataset, str): + with h5py.File(self.filename, "r") as f: + df = self.dataset_to_df(f, dataset) + if df is not None: + df = df.sort_index() + if in_minutes: + df = self.cols_in_mins(df) + # limit data by time and discard NaNs + if ( + in_minutes + and tmax_in_mins + and type(tmax_in_mins) is int + ): + df = df[df.columns[df.columns <= tmax_in_mins]] + df = df.dropna(how="all") + # add mother label to data frame + if lineage: + if "mother_label" in df.index.names: + df = df.droplevel("mother_label") + mother_label = np.zeros(len(df), dtype=int) + lineage = self.lineage() + ( + valid_lineage, + valid_indices, + lineage, + ) = validate_lineage( + lineage, + indices=np.array(df.index.to_list()), + how="daughters", + ) + mother_label[valid_indices] = lineage[valid_lineage, 1] + df = add_index_levels( + df, {"mother_label": mother_label} + ) + return df + elif isinstance(dataset, list): + return [ + self.get_raw( + dset, + in_minutes=in_minutes, + lineage=lineage, + tmax_in_mins=tmax_in_mins, + ) + for dset in dataset + ] def load_merges(self): """Get merge events going up to the first level.""" diff --git a/src/agora/utils/indexing.py b/src/agora/utils/indexing.py index 61efe7a3ad717bcbbdf5daf1ae55f4291c93eb33..87b77481211a2658037f8d69dd04b9487152ba10 100644 --- a/src/agora/utils/indexing.py +++ b/src/agora/utils/indexing.py @@ -1,4 +1,5 @@ import numpy as np +import pandas as pd # data type to link together trap and cell ids i_dtype = {"names": ["trap_id", "cell_id"], "formats": [np.int64, np.int64]} @@ -8,7 +9,6 @@ def validate_lineage( lineage: np.ndarray, indices: np.ndarray, how: str = "families", - stop_on_lineage_check: bool = True, ): """ Identify mother-bud pairs both in lineage and a Signal's indices. @@ -16,6 +16,9 @@ def validate_lineage( We expect the lineage information to be unique: a bud should not have two mothers. + Lineage is returned with buds assigned only to their first mother if they + have multiple. + Parameters ---------- lineage : np.ndarray @@ -31,9 +34,6 @@ def validate_lineage( If "mothers", matches indicate mothers from mother-bud pairs; If "daughters", matches indicate daughters from mother-bud pairs; If "families", matches indicate mothers and daughters in mother-bud pairs. - stop_on_lineage_check: bool - If True, raise an Exception for any errors in the lineage assignment such - as a daughter being assigned two mothers. Returns ------- @@ -41,6 +41,9 @@ def validate_lineage( 1D array indicating matched elements in lineage. valid_indices: boolean np.ndarray 1D array indicating matched elements in indices. + lineage: np.ndarray + Any bud already having a mother that is assigned to another has that + second assignment discarded. Examples -------- @@ -50,7 +53,7 @@ def validate_lineage( >>> lineage = np.array([ [[0, 1], [0, 3]], [[0, 1], [0, 4]], [[0, 1], [0, 6]], [[0, 4], [0, 7]] ]) >>> indices = np.array([ [0, 1], [0, 2], [0, 3]]) - >>> valid_lineage, valid_indices = validate_lineage(lineage, indices) + >>> valid_lineage, valid_indices, lineage = validate_lineage(lineage, indices) >>> print(valid_lineage) array([ True, False, False, False]) @@ -61,7 +64,7 @@ def validate_lineage( >>> lineage = np.array([[[0,3], [0,1]], [[0,2], [0,4]]]) >>> indices = np.array([[0,1], [0,2], [0,3]]) - >>> valid_lineage, valid_indices = validate_lineage(lineage, indices) + >>> valid_lineage, valid_indices, lineage = validate_lineage(lineage, indices) >>> print(valid_lineage) array([ True, False]) >>> print(valid_indices) @@ -70,10 +73,16 @@ def validate_lineage( if lineage.ndim == 2: # [trap, mother, daughter] becomes [[trap, mother], [trap, daughter]] lineage = assoc_indices_to_3d(lineage) + invert_lineage = True if how == "mothers": c_index = 0 elif how == "daughters": c_index = 1 + + # if buds have two mothers, pick the first one + lineage = lineage[ + ~pd.DataFrame(lineage[:, 1, :]).duplicated().values, :, : + ] # find valid lineage valid_lineages = index_isin(lineage, indices) if how == "families": @@ -90,29 +99,10 @@ def validate_lineage( else: valid_indices = index_isin(indices, selected_lineages[:, c_index, :]) flat_valid_indices = valid_indices.flatten() - # test for mismatch - if how == "families": - test_mismatch = ( - indices[flat_valid_indices, :].size - != np.unique( - lineage[flat_valid_lineage, :].reshape(-1, 2), axis=0 - ).size - ) - else: - test_mismatch = ( - indices[flat_valid_indices, :].size - != lineage[flat_valid_lineage, c_index, :].size - ) - if test_mismatch: - # all unique indices in valid_lineages should be in valid_indices - if stop_on_lineage_check: - raise Exception( - "Error in validate_lineage: " - "lineage information is likely not unique." - ) - else: - print("Warning: error in validate_lineage.") - return flat_valid_lineage, flat_valid_indices + # put the corrected lineage in the right format + if invert_lineage: + lineage = assoc_indices_to_2d(lineage) + return flat_valid_lineage, flat_valid_indices, lineage def index_isin(x: np.ndarray, y: np.ndarray) -> np.ndarray: diff --git a/src/postprocessor/core/reshapers/picker.py b/src/postprocessor/core/reshapers/picker.py index f257557d985906d520db48f2f11a1a5bfea786ba..ccd58693de36f47badadb1db5a5a0d5a91d1c8b4 100644 --- a/src/postprocessor/core/reshapers/picker.py +++ b/src/postprocessor/core/reshapers/picker.py @@ -67,7 +67,7 @@ class Picker(LineageProcess): """ cells_present = drop_mother_label(signal.index) mothers_daughters = self.get_lineage_information(signal) - _, valid_indices = validate_lineage( + _, valid_indices, mothers_daughters = validate_lineage( mothers_daughters, cells_present, how ) return signal.index[valid_indices] diff --git a/src/postprocessor/grouper.py b/src/postprocessor/grouper.py index e31d533d218ea71097f3451056be767358c7b6a4..f9e1fa85c3402542bd2c06a6e86ba6e18b188e9c 100644 --- a/src/postprocessor/grouper.py +++ b/src/postprocessor/grouper.py @@ -337,8 +337,8 @@ def concat_one_signal( ] combined = combined.droplevel("mother_label") elif mode == "families": - # applies picking and merging via Signal.__getitem__ - combined = position[path] + # applies picking and merging + combined = position.get(path, tmax_in_mins=tmax_in_mins) else: raise Exception(f"concat_one_signal: {mode} not recognised.") if combined is not None: