From 90a06eeca6b8b60383be228df57207f24f4dc54c Mon Sep 17 00:00:00 2001
From: pswain <peter.swain@ed.ac.uk>
Date: Tue, 23 May 2023 15:31:22 +0100
Subject: [PATCH] some more docs focused on picker

---
 src/agora/io/signal.py                     | 17 +++++++++--------
 src/agora/utils/indexing.py                | 11 ++++++-----
 src/postprocessor/chainer.py               |  8 +++++---
 src/postprocessor/core/reshapers/picker.py |  5 +++--
 src/postprocessor/grouper.py               | 11 ++++++-----
 5 files changed, 29 insertions(+), 23 deletions(-)

diff --git a/src/agora/io/signal.py b/src/agora/io/signal.py
index ae6ae71e..4611663f 100644
--- a/src/agora/io/signal.py
+++ b/src/agora/io/signal.py
@@ -60,10 +60,10 @@ class Signal(BridgeH5):
 
     def get(self, dsets: t.Union[str, t.Collection], **kwargs):
         """Get and potentially pre-process data from h5 file and return as a dataframe."""
-        if isinstance(dsets, str):  # no pre-processing
-            df = self.get_raw(dsets, **kwargs)
+        if isinstance(dsets, str):
+            # no pre-processing
+            dsets = self.get_raw(dsets, **kwargs)
             prepost_applied = self.apply_prepost(dsets, **kwargs)
-
             return self.add_name(prepost_applied, dsets)
 
     @staticmethod
@@ -73,7 +73,7 @@ class Signal(BridgeH5):
         return df
 
     def cols_in_mins(self, df: pd.DataFrame):
-        # Convert numerical columns in a dataframe to minutes
+        """Convert numerical columns in a dataframe to minutes."""
         try:
             df.columns = (df.columns * self.tinterval // 60).astype(int)
         except Exception as e:
@@ -141,7 +141,6 @@ class Signal(BridgeH5):
             if lineage_location not in f:
                 lineage_location = "postprocessing/lineage"
             tile_mo_da = f[lineage_location]
-
             if isinstance(tile_mo_da, h5py.Dataset):
                 lineage = tile_mo_da[()]
             else:
@@ -288,15 +287,17 @@ class Signal(BridgeH5):
                     self.get_raw(dset, in_minutes=in_minutes, lineage=lineage)
                     for dset in dataset
                 ]
-            if lineage:  # assume that df is sorted
+            if lineage:
+                # assume that df is sorted
                 mother_label = np.zeros(len(df), dtype=int)
                 lineage = self.lineage()
-                a, b = validate_association(
+                valid_association, valid_indices = validate_association(
                     lineage,
                     np.array(df.index.to_list()),
+                    #: are mothers not match_column=0?
                     match_column=1,
                 )
-                mother_label[b] = lineage[a, 1]
+                mother_label[valid_indices] = lineage[valid_association, 1]
                 df = add_index_levels(df, {"mother_label": mother_label})
             return df
         except Exception as e:
diff --git a/src/agora/utils/indexing.py b/src/agora/utils/indexing.py
index a86fce66..5eedeabe 100644
--- a/src/agora/utils/indexing.py
+++ b/src/agora/utils/indexing.py
@@ -28,12 +28,13 @@ def validate_association(
     association : np.ndarray
         2D array of lineage associations where columns are (trap, mother, daughter)
         or
-        a 3D array, which is an array of 2 X 2 arrays comprising  [[trap_id, mother_label], [trap_id, daughter_label]].
+        a 3D array, which is an array of 2 X 2 arrays comprising [[trap_id, mother_label], [trap_id, daughter_label]].
     indices : np.ndarray
-        a 2D array where each column is a different level, such as (trap_id, cell_label). This should not include mother_label.
+        A 2D array where each column is a different level, such as (trap_id, cell_label). This array should not include mother_label.
     match_column: int
-        If 0, match mothers; if 1 match daughters.
-        If None, match both mothers and daughters.
+        If 0, matches indicate mothers from mother-bud pairs;
+        If 1, matches indicate daughters from mother-bud pairs;
+        If None, matches indicate either mothers or daughters in mother-bud pairs.
 
     Returns
     -------
@@ -100,7 +101,7 @@ def validate_association(
     # make True comparisons have both trap_ids and cell labels matching
     valid_cell_ids_va = valid_ndassociation[valid_association].all(axis=2)
     if match_column is None:
-        # make True comparisons match at least one mother or bud in association
+        # make True comparisons match either a mother or a bud in association
         valid_indices = valid_cell_ids_va.any(axis=1)[0]
     else:
         valid_indices = valid_cell_ids_va[:, match_column][0]
diff --git a/src/postprocessor/chainer.py b/src/postprocessor/chainer.py
index b834fb5d..027e02ee 100644
--- a/src/postprocessor/chainer.py
+++ b/src/postprocessor/chainer.py
@@ -14,9 +14,10 @@ from postprocessor.core.abc import get_process
 class Chainer(Signal):
     """
     Extend Signal by applying post-processes and allowing composite signals that combine basic signals.
-    It "chains" multiple processes upon fetching a dataset to produce the desired datasets.
 
-    Instead of reading processes previously applied, it executes
+    Chainer "chains" multiple processes upon fetching a dataset.
+
+    Instead of reading processes previously applied, Chainer executes
     them when called.
     """
 
@@ -25,6 +26,7 @@ class Chainer(Signal):
     }
 
     def __init__(self, *args, **kwargs):
+        """Initialise chainer."""
         super().__init__(*args, **kwargs)
 
         def replace_path(path: str, bgsub: bool = ""):
@@ -34,7 +36,7 @@ class Chainer(Signal):
             path = re.sub(channel, f"{channel}{suffix}", path)
             return path
 
-        # Add chain with and without bgsub for composite statistics
+        # add chain with and without bgsub for composite statistics
         self.common_chains = {
             alias
             + bgsub: lambda **kwargs: self.get(
diff --git a/src/postprocessor/core/reshapers/picker.py b/src/postprocessor/core/reshapers/picker.py
index 168b022a..f3c35852 100644
--- a/src/postprocessor/core/reshapers/picker.py
+++ b/src/postprocessor/core/reshapers/picker.py
@@ -30,7 +30,8 @@ class PickerParameters(ParametersABC):
 
 class Picker(LineageProcess):
     """
-    Picker selects cells from a signal using lineage information and by how long and by how they are retained in the data set.
+    Picker selects cells from a signal using lineage information and
+    by how and for how long they are retained in the data set.
     """
 
     def __init__(
@@ -143,7 +144,7 @@ def _as_int(threshold: t.Union[float, int], ntps: int):
 
 def any_present(signal, threshold):
     """Return pd.Series for cells where True indicates that cell was present for more than threshold time points."""
-    #: isn't full_traps all we need?
+    #: why isn't full_traps all we need?
     full_traps = (signal.notna().sum(axis=1) > threshold).groupby("trap")
     any_present = pd.Series(
         np.sum(
diff --git a/src/postprocessor/grouper.py b/src/postprocessor/grouper.py
index 27f905c3..08440093 100644
--- a/src/postprocessor/grouper.py
+++ b/src/postprocessor/grouper.py
@@ -86,8 +86,9 @@ class Grouper(ABC):
         **kwargs,
     ):
         """
-        Concatenate data for one signal from different h5 files, with
-        one h5 file per position, into a dataframe.
+        Concatenate data for one signal from different h5 files into a dataframe.
+
+        Each h5 file corresponds to one position
 
         Parameters
         ----------
@@ -267,17 +268,17 @@ class Grouper(ABC):
 
     @property
     def stages_span(self):
-        # FAILS on my example
+        # TODO: fails on my example
         return self.fsignal.stages_span
 
     @property
     def max_span(self):
-        # FAILS on my example
+        # TODO: fails on my example
         return self.fsignal.max_span
 
     @property
     def stages(self):
-        # FAILS on my example
+        # TODO: fails on my example
         return self.fsignal.stages
 
     @property
-- 
GitLab