reorganised, with provisional functions in limbo

27f58b18 · pswain · d28fc285 · 27f58b18 · 27f58b18 · 27f58b18
Commit 27f58b18 authored 2 years ago by pswain
--- a/dataloader.py
+++ b/dataloader.py
@@ -6,13 +6,7 @@ import pprint
 try:
    from postprocessor.grouper import NameGrouper
 except ModuleNotFoundError:
-    try:
-        import sys
-
-        sys.path.append("/Users/pswain/wip/aliby/src")
-        from postprocessor.grouper import NameGrouper
-    except ModuleNotFoundError:
-        print("Can only load tsv files - cannot find postprocessor")
+    print("Can only load tsv files - cannot find postprocessor")


 class dataloader:
@@ -70,7 +64,7 @@ class dataloader:
            kind="line", data=dl.df)
    """

-    def __init__(self, indir=".", outdir=".", ls=True):
+    def __init__(self, indir=None, outdir=".", ls=True):
        # from grouper.siglist to abbrevations
        self.g2a_dict = {
            "extraction/GFP/max/median": "median_GFP",
@@ -87,7 +81,10 @@ class dataloader:
        self.a2g_dict = {v: k for (k, v) in self.g2a_dict.items()}
        # establish paths
        self.outdirpath = Path(outdir)
-        self.indirpath = Path(indir)
+        if indir is None:
+            self.indirpath = None
+        else:
+            self.indirpath = Path(indir)
        if ls:
            self.ls

@@ -104,13 +101,14 @@ class dataloader:
        .datasets dictionary.
        """
        pp = pprint.PrettyPrinter()
-        # find raw data
-        print("\nData directory is", str(self.indirpath.resolve()))
-        print("Experiments available:")
-        dirs = [f.name for f in self.indirpath.glob("*") if f.is_dir()]
-        # directories of data are stored in experiments
-        self.experiments = {i: name for i, name in enumerate(dirs)}
-        pp.pprint(self.experiments)
+        if self.indirpath is not None:
+            # find h5 data
+            print("\nData directory is", str(self.indirpath.resolve()))
+            print("Experiments available:")
+            dirs = [f.name for f in self.indirpath.glob("*") if f.is_dir()]
+            # directories of data are stored in experiments
+            self.experiments = {i: name for i, name in enumerate(dirs)}
+            pp.pprint(self.experiments)
        # find processed data
        print("\nWorking directory is", str(self.outdirpath.resolve()))
        print("Datasets available:")

--- a/figs2pdf.py
+++ b/figs2pdf.py
+from matplotlib.backends.backend_pdf import PdfPages
+import matplotlib.pyplot as plt
+
+
+def figs2pdf(savename):
+    """Save all open figures to a pdf file."""
+    if "." not in savename:
+        savename += ".pdf"
+    with PdfPages(savename) as pp:
+        for i in plt.get_fignums():
+            plt.figure(i)
+            pp.savefig()
--- a/growth_rate.py
+++ b/growth_rate.py
@@ -6,6 +6,71 @@ import numpy as np
 import pandas as pd
 import gaussianprocessderivatives as gp

+from dataloader import dataloader
+from figs2pdf import figs2pdf
+
+
+def find_all_grs(
+    dataname,
+    max_no_cells=None,
+    indir=None,
+    outdir=".",
+    use_tsv=True,
+):
+    """
+    Find mother and bud growth rates.
+
+    Parameters
+    ----------
+    dataname: str
+        Name of experiment, passed to dataloader.
+    max_no_cells: None or int
+        The maximum number of cells to process.
+    indir: str (optional)
+        The directory containing the h5 files.
+    outdir: str (optional)
+        The directory containing any tsv files to load and where tsv
+        files will be saved.
+    use_tsv: boolean (optional)
+        If True, load initial data from a tsv file.
+
+    Example
+    -------
+    >>> find_all_grs("Pdr5", max_no_cells=2, outdir="tsv_files")
+    """
+    # only save if all cells are processed
+    if max_no_cells is None:
+        save = True
+    else:
+        save = False
+    # get data
+    dl = dataloader(indir=indir, outdir=outdir)
+    dl.load(dataname, use_tsv=use_tsv)
+    b_vol = dl.wide_df("bud_volume")
+    m_vol = dl.wide_df("volume")
+    buddings = dl.wide_df("buddings")
+    # find bud growth rate
+    res_b = find_gr(
+        b_vol,
+        buddings,
+        max_no_cells=max_no_cells,
+        bounds={0: (0, 8)},
+    )
+    if save:
+        pickle.dump(res_b, open(dataname + "_res_b.pkl", "wb"))
+        figs2pdf(dataname + "_b.pdf")
+        plt.close("all")
+    # find mother growth rate
+    res_m = find_gr(m_vol, max_no_cells=max_no_cells)
+    if save:
+        pickle.dump(res_m, open(dataname + "_res_m.pkl", "wb"))
+        figs2pdf(dataname + "_m.pdf")
+        plt.close("all")
+    # add to dataframe and save
+    if save:
+        addgrfrompkl(dl, ".")
+        dl.save(dataname)
+

 def find_gr(
    volume_df,
@@ -15,9 +80,7 @@ def find_gr(
    verbose=True,
    max_no_cells=None,
 ):
-    """
-    Use a Gaussian process to estimate the single-cell growth rates.
-    """
+    """Use a Gaussian process to find single-cell growth rates."""
    # default bounds on hyperparameters
    # Julian_bounds = {0: (-2, 3), 1: (-2, 0), 2: (0, 3)}
    if buddings_df is None:
@@ -112,12 +175,10 @@ def find_gr(
    return res_df


-###
-
-
 def runGP(x, y, bounds, noruns, signals, verbose, title, maxnofigs=70):
    """
    Run GP on a single time series.
+
    Results returned as a dictionary.
    """
    # drop NaNs
@@ -164,12 +225,10 @@ def runGP(x, y, bounds, noruns, signals, verbose, title, maxnofigs=70):
    return res


-###
-
-
 def reNaN(oldlist, i_nan):
    """
    Add back NaN dropped from an array.
+
    Takes a list of arrays and puts back NaN for each array
    at indices i_nan.
    """
@@ -184,13 +243,11 @@ def reNaN(oldlist, i_nan):
        return oldlist


-###
-
-
 def addgrfrompkl(dl, pkldir):
    """
-    Load growth rate from pickle files and add to and save dataframe
-    in a dataloader instance.
+    Load growth rate from pickle files.
+
+    Add to and save dataframe in a dataloader instance.

    Arguments
    ---------
@@ -207,5 +264,5 @@ def addgrfrompkl(dl, pkldir):
    # add to dataframe
    for res in [res_b, res_m]:
        for signal in res:
-            tdf = dl._long_df(res[signal], signal)
+            tdf = dataloader.long_df(res[signal], signal)
            dl.df = pd.merge(dl.df, tdf, on=["id", "time"], how="left")
--- a/entropy.py
+++ b/entropy.py
--- a/find_cytokinesis.py
+++ b/find_cytokinesis.py
--- a/find_cytokinesis_diane.py
+++ b/find_cytokinesis_diane.py
--- a/search_OMERO.py
+++ b/search_OMERO.py
--- a/tsa_inprogress.py
+++ b/tsa_inprogress.py
--- a/plotting.py
+++ b/plotting.py
@@ -18,6 +18,11 @@ def kymograph(
    title=None,
    returnfig=False,
 ):
+    """
+    Plot a heatmap.
+
+    Typically each row is a single cell and the x-axis shows time.
+    """
    if hue == "births":
        cmap = "Greys"
    elif "growth_rate" in hue:
@@ -62,13 +67,8 @@ def kymograph(
        return fig, ax


-###
-
-
 def plot_random_time_series(time, values, signalname=None, number=5):
-    """
-    Plots random time series on mouse click and terminates on a key press
-    """
+    """Plot random time series on mouse click and terminates on a key press."""
    fig = plt.figure()
    go = True

@@ -97,9 +97,6 @@ def plot_random_time_series(time, values, signalname=None, number=5):
        print(".")


-###
-
-
 def plot_lineage(
    idx,
    df,
@@ -111,7 +108,8 @@ def plot_lineage(
    plot_G1=False,
 ):
    """
-    Plots the signals for one cell lineage.
+    Plot the signals for one cell lineage.
+
    If "growth_rate" or "volume" is a signal, plots the signal for the
    mother and the different buds.

@@ -138,17 +136,17 @@ def plot_lineage(
        raise Exception("idx not part of dataframe")
    signals = gu.makelist(signals)
    nosubplots = len(signals)
-    # show buddingss if possible
+    # show buddings if possible
    if "buddings" in df.columns:
        buddings = df[df.id == idx]["buddings"].to_numpy()
        b_pts = np.where(buddings)[0]
    if "births" in df.columns:
        buddings = df[df.id == idx]["births"].to_numpy()
        b_pts = np.where(buddings)[0]
-        if len(b_pts) == 1:
-            nb_pts = np.concatenate((b_pts, [len(buddings) - 1]))
-        else:
-            nb_pts = b_pts
+    if len(b_pts) == 1:
+        nb_pts = np.concatenate((b_pts, [len(buddings) - 1]))
+    else:
+        nb_pts = b_pts
    # show cytokinesis point if possible
    if cyto_pts_signal and cyto_pts_signal in df.columns:
        cyto = df[df.id == idx][cyto_pts_signal].to_numpy()
@@ -232,9 +230,6 @@ def plot_lineage(
        plt.show()


-###
-
-
 def plot_replicate_array(
    data,
    t=None,
@@ -247,7 +242,7 @@ def plot_replicate_array(
    show=True,
 ):
    """
-    Plots summary statistics versus axis 1 for an array of replicates.
+    Plot summary statistics versus axis 1 (time) for an array of replicates.

    Parameters
    ----------

--- a/run_gr.py
+++ b/run_gr.py
-import matplotlib.pylab as plt
-import pickle
-import genutils as gu
-from mywela.dataloader import dataloader
-from mywela.growth_rate import find_gr, addgrfrompkl
-
-
-datadir = "/Users/pswain/ECDF/Swainlab/aliby_datasets/Arin"
-dataname = "26643_2022_05_23_flavin_htb2_glucose_20gpL_01_00"
-# dataname = "25681_2022_04_30_flavin_htb2_glucose_10mgpL_01_00"
-
-max_no_cells = None
-use_tsv = True
-
-pxsize = 0.182
-if max_no_cells is None:
-    save = True
-else:
-    save = False
-
-# get data
-dl = dataloader(
-    datadir, outdir="/Users/pswain/Dropbox/wip/uscope_analyses/tsv_data"
-)
-dl.load(dataname, use_tsv=use_tsv)
-
-# data
-b_vol = dl.wide_df("bud_volume")
-m_vol = dl.wide_df("volume")
-births = dl.wide_df("births")
-
-# bud growth rate
-res_b = find_gr(b_vol, births, max_no_cells=max_no_cells, bounds={0: (0, 8)})
-if save:
-    pickle.dump(res_b, open(dataname + "_res_b.pkl", "wb"))
-    gu.figs2pdf(dataname + "_b.pdf")
-    plt.close("all")
-
-# mother growth rate
-res_m = find_gr(m_vol, max_no_cells=max_no_cells)
-if save:
-    pickle.dump(res_m, open(dataname + "_res_m.pkl", "wb"))
-    gu.figs2pdf(dataname + "_m.pdf")
-    plt.close("all")
-
-# add to dataframe
-if save:
-    addgrfrompkl(dl, ".")
-    dl.save(dataname)