From 90a06eeca6b8b60383be228df57207f24f4dc54c Mon Sep 17 00:00:00 2001 From: pswain <peter.swain@ed.ac.uk> Date: Tue, 23 May 2023 15:31:22 +0100 Subject: [PATCH] some more docs focused on picker --- src/agora/io/signal.py | 17 +++++++++-------- src/agora/utils/indexing.py | 11 ++++++----- src/postprocessor/chainer.py | 8 +++++--- src/postprocessor/core/reshapers/picker.py | 5 +++-- src/postprocessor/grouper.py | 11 ++++++----- 5 files changed, 29 insertions(+), 23 deletions(-) diff --git a/src/agora/io/signal.py b/src/agora/io/signal.py index ae6ae71e..4611663f 100644 --- a/src/agora/io/signal.py +++ b/src/agora/io/signal.py @@ -60,10 +60,10 @@ class Signal(BridgeH5): def get(self, dsets: t.Union[str, t.Collection], **kwargs): """Get and potentially pre-process data from h5 file and return as a dataframe.""" - if isinstance(dsets, str): # no pre-processing - df = self.get_raw(dsets, **kwargs) + if isinstance(dsets, str): + # no pre-processing + dsets = self.get_raw(dsets, **kwargs) prepost_applied = self.apply_prepost(dsets, **kwargs) - return self.add_name(prepost_applied, dsets) @staticmethod @@ -73,7 +73,7 @@ class Signal(BridgeH5): return df def cols_in_mins(self, df: pd.DataFrame): - # Convert numerical columns in a dataframe to minutes + """Convert numerical columns in a dataframe to minutes.""" try: df.columns = (df.columns * self.tinterval // 60).astype(int) except Exception as e: @@ -141,7 +141,6 @@ class Signal(BridgeH5): if lineage_location not in f: lineage_location = "postprocessing/lineage" tile_mo_da = f[lineage_location] - if isinstance(tile_mo_da, h5py.Dataset): lineage = tile_mo_da[()] else: @@ -288,15 +287,17 @@ class Signal(BridgeH5): self.get_raw(dset, in_minutes=in_minutes, lineage=lineage) for dset in dataset ] - if lineage: # assume that df is sorted + if lineage: + # assume that df is sorted mother_label = np.zeros(len(df), dtype=int) lineage = self.lineage() - a, b = validate_association( + valid_association, valid_indices = validate_association( lineage, np.array(df.index.to_list()), + #: are mothers not match_column=0? match_column=1, ) - mother_label[b] = lineage[a, 1] + mother_label[valid_indices] = lineage[valid_association, 1] df = add_index_levels(df, {"mother_label": mother_label}) return df except Exception as e: diff --git a/src/agora/utils/indexing.py b/src/agora/utils/indexing.py index a86fce66..5eedeabe 100644 --- a/src/agora/utils/indexing.py +++ b/src/agora/utils/indexing.py @@ -28,12 +28,13 @@ def validate_association( association : np.ndarray 2D array of lineage associations where columns are (trap, mother, daughter) or - a 3D array, which is an array of 2 X 2 arrays comprising [[trap_id, mother_label], [trap_id, daughter_label]]. + a 3D array, which is an array of 2 X 2 arrays comprising [[trap_id, mother_label], [trap_id, daughter_label]]. indices : np.ndarray - a 2D array where each column is a different level, such as (trap_id, cell_label). This should not include mother_label. + A 2D array where each column is a different level, such as (trap_id, cell_label). This array should not include mother_label. match_column: int - If 0, match mothers; if 1 match daughters. - If None, match both mothers and daughters. + If 0, matches indicate mothers from mother-bud pairs; + If 1, matches indicate daughters from mother-bud pairs; + If None, matches indicate either mothers or daughters in mother-bud pairs. Returns ------- @@ -100,7 +101,7 @@ def validate_association( # make True comparisons have both trap_ids and cell labels matching valid_cell_ids_va = valid_ndassociation[valid_association].all(axis=2) if match_column is None: - # make True comparisons match at least one mother or bud in association + # make True comparisons match either a mother or a bud in association valid_indices = valid_cell_ids_va.any(axis=1)[0] else: valid_indices = valid_cell_ids_va[:, match_column][0] diff --git a/src/postprocessor/chainer.py b/src/postprocessor/chainer.py index b834fb5d..027e02ee 100644 --- a/src/postprocessor/chainer.py +++ b/src/postprocessor/chainer.py @@ -14,9 +14,10 @@ from postprocessor.core.abc import get_process class Chainer(Signal): """ Extend Signal by applying post-processes and allowing composite signals that combine basic signals. - It "chains" multiple processes upon fetching a dataset to produce the desired datasets. - Instead of reading processes previously applied, it executes + Chainer "chains" multiple processes upon fetching a dataset. + + Instead of reading processes previously applied, Chainer executes them when called. """ @@ -25,6 +26,7 @@ class Chainer(Signal): } def __init__(self, *args, **kwargs): + """Initialise chainer.""" super().__init__(*args, **kwargs) def replace_path(path: str, bgsub: bool = ""): @@ -34,7 +36,7 @@ class Chainer(Signal): path = re.sub(channel, f"{channel}{suffix}", path) return path - # Add chain with and without bgsub for composite statistics + # add chain with and without bgsub for composite statistics self.common_chains = { alias + bgsub: lambda **kwargs: self.get( diff --git a/src/postprocessor/core/reshapers/picker.py b/src/postprocessor/core/reshapers/picker.py index 168b022a..f3c35852 100644 --- a/src/postprocessor/core/reshapers/picker.py +++ b/src/postprocessor/core/reshapers/picker.py @@ -30,7 +30,8 @@ class PickerParameters(ParametersABC): class Picker(LineageProcess): """ - Picker selects cells from a signal using lineage information and by how long and by how they are retained in the data set. + Picker selects cells from a signal using lineage information and + by how and for how long they are retained in the data set. """ def __init__( @@ -143,7 +144,7 @@ def _as_int(threshold: t.Union[float, int], ntps: int): def any_present(signal, threshold): """Return pd.Series for cells where True indicates that cell was present for more than threshold time points.""" - #: isn't full_traps all we need? + #: why isn't full_traps all we need? full_traps = (signal.notna().sum(axis=1) > threshold).groupby("trap") any_present = pd.Series( np.sum( diff --git a/src/postprocessor/grouper.py b/src/postprocessor/grouper.py index 27f905c3..08440093 100644 --- a/src/postprocessor/grouper.py +++ b/src/postprocessor/grouper.py @@ -86,8 +86,9 @@ class Grouper(ABC): **kwargs, ): """ - Concatenate data for one signal from different h5 files, with - one h5 file per position, into a dataframe. + Concatenate data for one signal from different h5 files into a dataframe. + + Each h5 file corresponds to one position Parameters ---------- @@ -267,17 +268,17 @@ class Grouper(ABC): @property def stages_span(self): - # FAILS on my example + # TODO: fails on my example return self.fsignal.stages_span @property def max_span(self): - # FAILS on my example + # TODO: fails on my example return self.fsignal.max_span @property def stages(self): - # FAILS on my example + # TODO: fails on my example return self.fsignal.stages @property -- GitLab