From 5c9e7adcb9a836f6daec8ce4d703382cc11c5695 Mon Sep 17 00:00:00 2001
From: pswain <peter.swain@ed.ac.uk>
Date: Tue, 14 Nov 2023 14:25:50 +0000
Subject: [PATCH] feat(Signal): returns None rather than error if data is
 missing

---
 src/agora/io/signal.py                   | 87 +++++++++++++-----------
 src/postprocessor/core/postprocessing.py |  4 +-
 src/postprocessor/grouper.py             |  2 +-
 3 files changed, 53 insertions(+), 40 deletions(-)

diff --git a/src/agora/io/signal.py b/src/agora/io/signal.py
index 3f0d66d..dac2ec6 100644
--- a/src/agora/io/signal.py
+++ b/src/agora/io/signal.py
@@ -15,7 +15,6 @@ from agora.io.decorators import _first_arg_str_to_raw_df
 from agora.utils.indexing import validate_lineage
 from agora.utils.kymograph import add_index_levels
 from agora.utils.merge import apply_merges
-from postprocessor.core.reshapers.picker import Picker, PickerParameters
 
 
 class Signal(BridgeH5):
@@ -57,8 +56,11 @@ class Signal(BridgeH5):
         """Get Signal after merging and picking."""
         if isinstance(dset_name, str):
             dsets = self.get_raw(dset_name, **kwargs)
-            picked_merged = self.apply_merging_picking(dsets, **kwargs)
-            return self.add_name(picked_merged, dset_name)
+            if dsets is not None:
+                picked_merged = self.apply_merging_picking(dsets, **kwargs)
+                return self.add_name(picked_merged, dset_name)
+            else:
+                return None
         else:
             raise Exception("Error in Signal.get")
 
@@ -266,32 +268,37 @@ class Signal(BridgeH5):
         try:
             if isinstance(dataset, str):
                 with h5py.File(self.filename, "r") as f:
-                    df = self.dataset_to_df(f, dataset).sort_index()
-                    if in_minutes:
-                        df = self.cols_in_mins(df)
+                    df = self.dataset_to_df(f, dataset)
+                    if df is not None:
+                        df = df.sort_index()
+                        if in_minutes:
+                            df = self.cols_in_mins(df)
+                        # apply merging or picking or both or neither
+                        df = self.apply_merging_picking(df, merges, picks)
+                        # add mother label to data frame
+                        if lineage:
+                            mother_label = np.zeros(len(df), dtype=int)
+                            lineage = self.lineage()
+                            valid_lineage, valid_indices = validate_lineage(
+                                lineage,
+                                indices=np.array(df.index.to_list()),
+                                how="daughters",
+                            )
+                            mother_label[valid_indices] = lineage[
+                                valid_lineage, 1
+                            ]
+                            df = add_index_levels(
+                                df, {"mother_label": mother_label}
+                            )
+                    return df
             elif isinstance(dataset, list):
                 return [
                     self.get_raw(dset, in_minutes=in_minutes, lineage=lineage)
                     for dset in dataset
                 ]
-            # apply merging or picking or both or neither
-            df = self.apply_merging_picking(df, merges, picks)
-            # add mother label to data frame
-            if lineage:
-                mother_label = np.zeros(len(df), dtype=int)
-                lineage = self.lineage()
-
-                valid_lineage, valid_indices = validate_lineage(
-                    lineage,
-                    indices=np.array(df.index.to_list()),
-                    how="daughters",
-                )
-                mother_label[valid_indices] = lineage[valid_lineage, 1]
-                df = add_index_levels(df, {"mother_label": mother_label})
-            return df
         except Exception as e:
-            self._log(f"Could not fetch dataset {dataset}: {e}", "error")
-            raise e
+            message = f"Signal could not find data {dataset}: {e}."
+            self._log(message)
 
     def load_merges(self):
         """Get merge events going up to the first level."""
@@ -318,21 +325,25 @@ class Signal(BridgeH5):
 
     def dataset_to_df(self, f: h5py.File, path: str) -> pd.DataFrame:
         """Get data from h5 file as a dataframe."""
-        assert path in f, f"{path} not in {f}"
-        dset = f[path]
-        values, index, columns = [], [], []
-        index_names = copy(self.index_names)
-        valid_names = [lbl for lbl in index_names if lbl in dset.keys()]
-        if valid_names:
-            index = pd.MultiIndex.from_arrays(
-                [dset[lbl] for lbl in valid_names], names=valid_names
-            )
-            columns = dset.attrs.get("columns", None)
-            if "timepoint" in dset:
-                columns = f[path + "/timepoint"][()]
-            values = f[path + "/values"][()]
-        df = pd.DataFrame(values, index=index, columns=columns)
-        return df
+        if path not in f:
+            message = f"{path} not in {f}."
+            self._log(message)
+            return None
+        else:
+            dset = f[path]
+            values, index, columns = [], [], []
+            index_names = copy(self.index_names)
+            valid_names = [lbl for lbl in index_names if lbl in dset.keys()]
+            if valid_names:
+                index = pd.MultiIndex.from_arrays(
+                    [dset[lbl] for lbl in valid_names], names=valid_names
+                )
+                columns = dset.attrs.get("columns", None)
+                if "timepoint" in dset:
+                    columns = f[path + "/timepoint"][()]
+                values = f[path + "/values"][()]
+            df = pd.DataFrame(values, index=index, columns=columns)
+            return df
 
     @property
     def stem(self):
diff --git a/src/postprocessor/core/postprocessing.py b/src/postprocessor/core/postprocessing.py
index 301891d..f7e0811 100644
--- a/src/postprocessor/core/postprocessing.py
+++ b/src/postprocessor/core/postprocessing.py
@@ -192,7 +192,9 @@ class PostProcessor(ProcessABC):
         else:
             raise ("Incorrect dataset")
         # run process on signal
-        if len(signal) and (
+        if signal is None:
+            return None
+        elif len(signal) and (
             not isinstance(loaded_process, LineageProcess)
             or len(loaded_process.lineage)
         ):
diff --git a/src/postprocessor/grouper.py b/src/postprocessor/grouper.py
index cc525ec..4903957 100644
--- a/src/postprocessor/grouper.py
+++ b/src/postprocessor/grouper.py
@@ -128,7 +128,7 @@ class Grouper(ABC):
             ]
             records = [record for record in records if record is not None]
             if len(errors):
-                print("Warning: Positions contain errors {errors}")
+                print(f"Warning: Positions ({errors}) contain errors.")
             assert len(records), "All data sets contain errors"
             # combine into one dataframe
             concat = pd.concat(records, axis=0)
-- 
GitLab