From 6a4cdb30026e6e81823f5749d820a23f2dcaf2bb Mon Sep 17 00:00:00 2001
From: pswain <peter.swain@ed.ac.uk>
Date: Fri, 15 Dec 2023 17:18:34 +0000
Subject: [PATCH] feature(dataloader): get sub_df to return a smaller data
 frame

---
 dataloader.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/dataloader.py b/dataloader.py
index b3c4525..2b789f7 100644
--- a/dataloader.py
+++ b/dataloader.py
@@ -509,6 +509,30 @@ class dataloader:
         """
         return self.df.pivot(y, x, signal)
 
+    def sub_df(self, signal, duration_threshold):
+        """
+        Find a sub dataframe of dataloader's main dataframe.
+
+        Parameters
+        ----------
+        duration_threshold: float
+            Specifies the fraction of the total duration of the time-lapse
+            experiment for which a cell must be present in a trap.
+        """
+        if duration_threshold < 0 or duration_threshold > 1:
+            print(
+                f"The threshold must be a fraction, not {duration_threshold}."
+            )
+            return
+        else:
+            wdf = self.wide_df(signal)
+            keep = (
+                wdf.notna().sum(axis=1) > duration_threshold * wdf.columns.size
+            ).values
+            ids_to_keep = list(wdf.index[keep])
+            sdf = self.df[self.df.id.isin(ids_to_keep)]
+            return sdf
+
     def get_time_series(self, signal, group=None):
         """
         Extract a signal as a 2D array with each row a time series.
-- 
GitLab