diff --git a/src/agora/io/signal.py b/src/agora/io/signal.py index 3a5261f7bc7f02374f2f10980d271afbfaa2f104..4193178abb71a2ca81d03c7435725ef28ef2fc57 100644 --- a/src/agora/io/signal.py +++ b/src/agora/io/signal.py @@ -59,7 +59,7 @@ class Signal(BridgeH5): ): """Get Signal after merging and picking.""" if isinstance(dset_name, str): - dsets = self.get_raw(dset_name, tmax_in_mins) + dsets = self.get_raw(dset_name, tmax_in_mins=tmax_in_mins) if dsets is not None: picked_merged = self.apply_merging_picking(dsets) return self.add_name(picked_merged, dset_name) @@ -76,10 +76,7 @@ class Signal(BridgeH5): def cols_in_mins(self, df: pd.DataFrame): """Convert numerical columns in a data frame to minutes.""" - try: - df.columns = (df.columns * self.tinterval // 60).astype(int) - except Exception as e: - self._log(f"Unable to convert columns to minutes: {e}", "debug") + df.columns = (df.columns * self.tinterval // 60).astype(int) return df @cached_property @@ -105,7 +102,7 @@ class Signal(BridgeH5): ) return 300 - def retained(self, signal, cutoff=0, tmax_in_mins: int = None): + def retained(self, signal, cutoff: float = 0, tmax_in_mins: int = None): """Get retained cells for a Signal or list of Signals.""" if isinstance(signal, str): # get data frame @@ -284,8 +281,12 @@ class Signal(BridgeH5): if in_minutes: df = self.cols_in_mins(df) # limit data by time and discard NaNs - if tmax_in_mins and type(tmax_in_mins) is int: - df = df[df.columns[df.columns < tmax_in_mins]] + if ( + in_minutes + and tmax_in_mins + and type(tmax_in_mins) is int + ): + df = df[df.columns[df.columns <= tmax_in_mins]] df = df.dropna(how="all") # add mother label to data frame if lineage: @@ -303,7 +304,7 @@ class Signal(BridgeH5): df = add_index_levels( df, {"mother_label": mother_label} ) - return df + return df elif isinstance(dataset, list): return [ self.get_raw( @@ -315,7 +316,7 @@ class Signal(BridgeH5): for dset in dataset ] except Exception as e: - message = f"Signal could not obtain data {dataset}: {e}." + message = f"Signal could not obtain data {dataset}: {e}" self._log(message) def load_merges(self): diff --git a/src/postprocessor/grouper.py b/src/postprocessor/grouper.py index f964af7f58797e20d0529726d89e0e03be7c2a2c..7cf22be19fd7d3767223d88beb82204feaf39807 100644 --- a/src/postprocessor/grouper.py +++ b/src/postprocessor/grouper.py @@ -77,6 +77,7 @@ class Grouper(ABC): pool: t.Optional[int] = None, mode: str = "retained", selected_positions: t.List[str] = None, + tmax_in_mins_dict: dict = None, **kwargs, ): """ @@ -88,7 +89,7 @@ class Grouper(ABC): ---------- path : str Signal location within h5 file. - pool : int + pool : int (optional) Number of threads used; if 0 or None only one core is used. mode: str If "retained" (default), return Signal with merging, picking, and lineage @@ -102,8 +103,13 @@ class Grouper(ABC): identified mother. If "families", get Signal with merging, picking, and lineage information applied. - selected_positions: list[str] + selected_positions: list[str] (optional) If defined, get signals for only these positions. + tmax_in_mins_dict: dict (optional) + A dictionary with positions as keys and maximum times in minutes as + values. For example: { "PDR5_GFP_001": 6 * 60}. + Data will only be include up to this time point, which is a way to + avoid errors in assigning lineages because of clogging. **kwargs : key, value pairings Named arguments for concat_ind_function @@ -127,6 +133,7 @@ class Grouper(ABC): f=concat_one_signal, pool=pool, positions=good_positions, + tmax_in_mins_dict=tmax_in_mins_dict, **kwargs, ) # check for errors @@ -171,6 +178,7 @@ class Grouper(ABC): f: t.Callable, pool: t.Optional[int] = None, positions: t.Dict[str, Signal] = None, + tmax_in_mins_dict: dict = None, **kwargs, ): """ @@ -187,6 +195,7 @@ class Grouper(ABC): position=x[1], group=self.positions_groups[x[0]], position_name=x[0], + tmax_in_mins_dict=tmax_in_mins_dict, **kwargs, ), positions.items(), @@ -198,6 +207,7 @@ class Grouper(ABC): position=position, group=self.positions_groups[name], position_name=name, + tmax_in_mins_dict=tmax_in_mins_dict, **kwargs, ) for name, position in positions.items() @@ -280,7 +290,7 @@ def concat_one_signal( mode: str = "retained", position_name=None, tmax_in_mins_dict=None, - **kwargs, + cutoff: float = 0, ) -> pd.DataFrame: """Retrieve a signal for one position.""" if tmax_in_mins_dict and position_name in tmax_in_mins_dict: @@ -297,20 +307,24 @@ def concat_one_signal( else: print(f" Loading {path} for {position_name}.") if mode == "retained": - combined = position.retained(path, tmax_in_mins=tmax_in_mins, **kwargs) + # applies picking and merging via Signal.get + combined = position.retained( + path, tmax_in_mins=tmax_in_mins, cutoff=cutoff + ) elif mode == "raw": - combined = position.get_raw(path, tmax_in_mins=tmax_in_mins, **kwargs) - elif mode == "daughters": + # no picking and merging + combined = position.get_raw(path, tmax_in_mins=tmax_in_mins) + elif mode == "raw_daughters": combined = position.get_raw( - path, lineage=True, tmax_in_mins=tmax_in_mins, **kwargs + path, lineage=True, tmax_in_mins=tmax_in_mins ) if combined is not None: combined = combined.loc[ combined.index.get_level_values("mother_label") > 0 ] - elif mode == "mothers": + elif mode == "raw_mothers": combined = position.get_raw( - path, lineage=True, tmax_in_mins=tmax_in_mins, **kwargs + path, lineage=True, tmax_in_mins=tmax_in_mins ) if combined is not None: combined = combined.loc[ @@ -318,6 +332,7 @@ def concat_one_signal( ] combined = combined.droplevel("mother_label") elif mode == "families": + # applies picking and merging via Signal.__getitem__ combined = position[path] else: raise Exception(f"concat_one_signal: {mode} not recognised.")