From 6a4cdb30026e6e81823f5749d820a23f2dcaf2bb Mon Sep 17 00:00:00 2001 From: pswain <peter.swain@ed.ac.uk> Date: Fri, 15 Dec 2023 17:18:34 +0000 Subject: [PATCH] feature(dataloader): get sub_df to return a smaller data frame --- dataloader.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/dataloader.py b/dataloader.py index b3c4525..2b789f7 100644 --- a/dataloader.py +++ b/dataloader.py @@ -509,6 +509,30 @@ class dataloader: """ return self.df.pivot(y, x, signal) + def sub_df(self, signal, duration_threshold): + """ + Find a sub dataframe of dataloader's main dataframe. + + Parameters + ---------- + duration_threshold: float + Specifies the fraction of the total duration of the time-lapse + experiment for which a cell must be present in a trap. + """ + if duration_threshold < 0 or duration_threshold > 1: + print( + f"The threshold must be a fraction, not {duration_threshold}." + ) + return + else: + wdf = self.wide_df(signal) + keep = ( + wdf.notna().sum(axis=1) > duration_threshold * wdf.columns.size + ).values + ids_to_keep = list(wdf.index[keep]) + sdf = self.df[self.df.id.isin(ids_to_keep)] + return sdf + def get_time_series(self, signal, group=None): """ Extract a signal as a 2D array with each row a time series. -- GitLab