diff --git a/correct_buds.py b/correct_buds.py
index 53ffa1f472976919d16dfe859802f702da4fc919..16aa6b6772d51637945b022fb2d7a04c84e53a5f 100644
--- a/correct_buds.py
+++ b/correct_buds.py
@@ -7,7 +7,8 @@ min_cell_cycle_in_tps = 3
 
 def drop_label(df, label="mother_label"):
     """Drop label from index of data frame."""
-    df = df.reset_index(label).drop(columns=[label])
+    if label in df.index.names:
+        df = df.reset_index(label).drop(columns=[label])
     return df
 
 
@@ -65,10 +66,14 @@ def interp_bud_volume(ind, buddings, bud_volume, figs=True):
                 # find last non-NaN data point
                 ib_real = np.max(np.where(~np.isnan(bud_vol))[0])
                 t_bv = bud_t[: ib_real + 1][~np.isnan(bud_vol[: ib_real + 1])]
-                v_bv = bud_vol[: ib_real + 1][~np.isnan(bud_vol[: ib_real + 1])]
+                v_bv = bud_vol[: ib_real + 1][
+                    ~np.isnan(bud_vol[: ib_real + 1])
+                ]
                 int_bud = interp1d(t_bv, v_bv)
                 # interpolate missing NaN within bud time series
-                new_bv[ib : ib + ib_real + 1] = int_bud(t[ib : ib + ib_real + 1])
+                new_bv[ib : ib + ib_real + 1] = int_bud(
+                    t[ib : ib + ib_real + 1]
+                )
         # plot
         if figs:
             plot_buds(ind, buddings, bud_volume, new_bv)
diff --git a/dataloader.py b/dataloader.py
index e666538e60625792ba18da25a78639697c4d521e..caff02186f51588eed6044cb07c4665adb2b31d6 100644
--- a/dataloader.py
+++ b/dataloader.py
@@ -4,6 +4,8 @@ from pathlib import Path
 import numpy as np
 import pandas as pd
 
+from wela.correct_buds import correct_buds
+
 try:
     from postprocessor.grouper import NameGrouper
 except ModuleNotFoundError:
@@ -14,6 +16,8 @@ class dataloader:
     """
     Compile aliby data sets into a single extensible data frame.
 
+    Buddings and bud_volume are always loaded first.
+
     Parameters
     ----------
     h5dir: string
@@ -66,7 +70,7 @@ class dataloader:
     """
 
     def __init__(self, h5dir=None, wdir=".", ls=True):
-        # from grouper.siglist to abbrevations
+        # from grouper.signals to abbrevations
         self.g2a_dict = {
             "extraction/cy5/max/median": "cy5",
             "extraction/GFP/max/median": "median_GFP",
@@ -79,11 +83,15 @@ class dataloader:
             "extraction/mCherry/max/max5px_med": "max5px_med_mCherry",
             "extraction/general/None/area": "area",
             "extraction/general/None/volume": "volume",
-            "postprocessing/buddings/extraction_general_None_volume": "buddings",
-            "postprocessing/bud_metric/extraction_general_None_volume": "bud_volume",
             "extraction/Flavin_bgsub/max/mean": "flavin",
             "extraction/GFP/max/nuc_est_conv": "GFP_nuc_est_conv",
         }
+        self.buddings_path = (
+            "postprocessing/buddings/extraction_general_None_volume"
+        )
+        self.bud_volume_path = (
+            "postprocessing/bud_metric/extraction_general_None_volume"
+        )
         # from abbreviations to grouper.siglist
         self.a2g_dict = {v: k for (k, v) in self.g2a_dict.items()}
         # establish paths
@@ -144,6 +152,8 @@ class dataloader:
     def load(
         self,
         dataname,
+        key_index="buddings",
+        cutoff=0.8,
         extra_g2a_dict=None,
         pxsize=0.182,
         use_tsv=False,
@@ -162,15 +172,20 @@ class dataloader:
         ----------
         dataname: string
             Either the name of the directory for an experiment or a
-            file name for a dataset
+            file name for a dataset.
+        key_index: string
+            Short name for key record that will be used to select cells.
+        cut_off: float
+            Select cells for key record that remain for at least cutoff
+            fraction of the experiment's total duration.
         g2a_dict: dictionary (optional)
             A dictionary of extra parameters to extract, which relates the
             aliby name to an abbreviation
-        pxsize: float (default 0.182)
+        pxsize: float
             Pixel size in microns, which is used to convert volumes.
-        use_tsv: boolean (default: False)
+        use_tsv: boolean
             If True, always load the data from a tsv file.
-        over_write: boolean (default: False)
+        over_write: boolean
             If True, overwrite the internal dictionary with extra_g2a_dict.
 
         Returns
@@ -181,14 +196,13 @@ class dataloader:
         Example
         -------
         >>> dl.load(dataname, {"extraction/cy5/max/median" : "cy5"})
+        >>> dl.load(dataname, key_index="flavin", cutoff= 0.9)
         """
         if dataname[-4:] == ".tsv":
             dataname = dataname[:-4]
             use_tsv = True
         # root name
         self.dataname = dataname
-
-        # load
         if use_tsv:
             self.load_tsv(dataname)
         else:
@@ -201,6 +215,12 @@ class dataloader:
             grouper = self.get_grouper(dataname)
             # find time interval between images
             self.dt = grouper.tintervals
+            # get key index for choosing cells
+            if key_index == "buddings":
+                key_index_path = self.buddings_path
+            else:
+                key_index_path = self.a2g_dict[key_index]
+            key_index = self.get_key_index(grouper, key_index_path, cutoff)
             # load data from h5 files
             print(dataname + "\n---")
             print("signals available:")
@@ -208,20 +228,22 @@ class dataloader:
             for signal in signals:
                 print(" ", signal)
             print("\nloading...")
-            first = True
+            # load and correct buddings and bud_volume
+            r_df = self.load_buddings_bud_volume(
+                grouper, cutoff, figs=False, key_index=key_index
+            )
+            # load other signals
             for i, char in enumerate(self.g2a_dict):
                 if char in signals:
                     print(" " + char)
-                    record = grouper.concat_signal(char)
-                    # convert to long dataframe
-                    tdf = self._long_df_with_id(record, self.g2a_dict[char])
-                    # drop redundant columns
-                    tdf = tdf.drop(["position", "trap", "cell_label"], axis=1)
-                    if first:
-                        r_df = tdf.copy()
-                        first = False
-                    else:
-                        r_df = pd.merge(r_df, tdf, how="left")
+                    # load all cells
+                    record = grouper.concat_signal(char, cutoff=0)
+                    # keep cells only in key_index
+                    new_record = self.get_key_cells(record, key_index)
+                    # convert to long data frame
+                    tdf = self.long_df_with_id(new_record, self.g2a_dict[char])
+                    # merge into one data set
+                    r_df = pd.merge(r_df, tdf, how="left")
                 else:
                     print(" Warning: " + char, "not found")
             if "r_df" in locals():
@@ -244,8 +266,46 @@ class dataloader:
             # return grouper
             return grouper
 
+    def get_key_index(self, grouper, key_index_path, cutoff):
+        """Find index of cells that have sufficient measurements of the
+        key record."""
+        record = grouper.concat_signal(key_index_path, cutoff=cutoff)
+        return record.index
+
+    def get_key_cells(self, df, key_index):
+        """Find a smaller data frame with only cells present in the key record."""
+        sdf = df.loc[df.index.intersection(key_index)]
+        return sdf
+
+    def load_buddings_bud_volume(self, grouper, cutoff, figs, key_index):
+        """Load buddings and bud_volume, interpolate over NaNs, and
+        drop adjacent buddings."""
+        # load buddings and all bud_volumes
+        buddings = grouper.concat_signal(self.buddings_path, cutoff=cutoff)
+        bud_volume = grouper.concat_signal(self.bud_volume_path, cutoff=0)
+        # perform correction
+        new_buddings, new_bud_volume = correct_buds(buddings, bud_volume, figs)
+        # keep cells only in key_index
+        new_buddings = self.get_key_cells(new_buddings, key_index)
+        new_bud_volume = self.get_key_cells(new_bud_volume, key_index)
+        # convert to long df and merge
+        df = self.long_df_with_id(new_buddings, "buddings")
+        df = pd.merge(
+            df,
+            self.long_df_with_id(new_bud_volume, "bud_volume"),
+            how="left",
+        )
+        return df
+
     def load_tsv(self, dataname):
-        # load data from tsv files
+        """
+        Load data from tsv files.
+
+        Parameters
+        ----------
+        dataname: string
+            Name of the file
+        """
         r_df = pd.read_csv(str(self.wdirpath / (dataname + ".tsv")), sep="\t")
         print("\nloaded", dataname)
         if hasattr(self, "r"):
@@ -272,7 +332,7 @@ class dataloader:
         )
         print("Saved", dataname)
 
-    def _long_df_with_id(self, df, char_name):
+    def long_df_with_id(self, df, char_name):
         """Convert an aliby multi-index dataframe into a long dataframe."""
         df = self.long_df(df, char_name)
         # add unique id for each cell
@@ -290,12 +350,14 @@ class dataloader:
                 + ";"
                 + df["trap"].astype(str),
             )
+        # drop redundant columns
+        df = df.drop(["position", "trap", "cell_label"], axis=1)
         return df
 
     @property
     def revert_df(self):
         """
-        Convert the 'id' column into the standard three columns used by aliby.
+        Convert the 'id' column into the standard columns used by aliby.
 
         These columns are 'position', 'trap', and 'cell_label' - and vice
         versa, either adding three columns to the .df dataframe or removing
@@ -318,7 +380,7 @@ class dataloader:
             # drop columns redundant with "id"
             self.df = self.df.drop(["position", "trap", "cell_label"], axis=1)
         else:
-            print("Cannot revert")
+            print("Cannot revert.")
 
     def wide_df(self, signal, x="time", y="id"):
         """
@@ -392,7 +454,7 @@ class dataloader:
         # create a dataframe using the structure of a pivot
         df = pd.DataFrame(values, index=wdf.index, columns=wdf.columns)
         # convert to long dataframe
-        tdf = self._long_df_with_id(df, signal)
+        tdf = self.long_df_with_id(df, signal)
         # add to r
         self.df = pd.merge(self.df, tdf, how="left")
 
@@ -423,13 +485,13 @@ class dataloader:
 
         Examples
         --------
-        To sort URA7 cells by the sum of their number of births after 10 hours
+        To sort URA7 cells by the sum of their number of births after 10 hours:
         >>> df = dl.get_sorted_df("births", tmax=10, strain="URA7")
 
-        To sort cells by their median fluorescence
+        To sort cells by their median fluorescence:
         >>> df = dl.get_sorted_df("median_GFP")
 
-        To convert "id" back from a categorical data type
+        To convert "id" back from a categorical data type:
         >>> df.id = df.id.astype("object")
         """
         if strain:
@@ -459,9 +521,7 @@ class dataloader:
 
     @property
     def strains(self):
-        """
-        List all strains in df.group
-        """
+        """List all strains in df.group."""
         for strain in list(self.df.group.unique()):
             print(" " + strain)
 
@@ -486,7 +546,7 @@ class dataloader:
 
     @staticmethod
     def long_df(df, char_name, var_name="time"):
-        """Convert an aliby multi-index dataframe into a long dataframe."""
+        """Convert a multi-index dataframe into a long dataframe."""
         # flatten multi-index dataframe
         df = df.copy()
         if isinstance(df, pd.Series):