From 27f58b18dce9d2e767ef6474da55f08871a43da7 Mon Sep 17 00:00:00 2001
From: Peter Swain <peter.swain@ed.ac.uk>
Date: Sat, 18 Feb 2023 18:16:03 +0000
Subject: [PATCH] reorganised, with provisional functions in limbo

---
 dataloader.py                                 | 30 +++----
 figs2pdf.py                                   | 12 +++
 growth_rate.py                                | 87 +++++++++++++++----
 entropy.py => limbo/entropy.py                |  0
 .../find_cytokinesis.py                       |  0
 .../find_cytokinesis_diane.py                 |  0
 search_OMERO.py => limbo/search_OMERO.py      |  0
 tsa_inprogress.py => limbo/tsa_inprogress.py  |  0
 plotting.py                                   | 33 +++----
 run_gr.py                                     | 49 -----------
 10 files changed, 112 insertions(+), 99 deletions(-)
 create mode 100644 figs2pdf.py
 rename entropy.py => limbo/entropy.py (100%)
 rename find_cytokinesis.py => limbo/find_cytokinesis.py (100%)
 rename find_cytokinesis_diane.py => limbo/find_cytokinesis_diane.py (100%)
 rename search_OMERO.py => limbo/search_OMERO.py (100%)
 rename tsa_inprogress.py => limbo/tsa_inprogress.py (100%)
 delete mode 100644 run_gr.py

diff --git a/dataloader.py b/dataloader.py
index 7127279..4f2d320 100644
--- a/dataloader.py
+++ b/dataloader.py
@@ -6,13 +6,7 @@ import pprint
 try:
     from postprocessor.grouper import NameGrouper
 except ModuleNotFoundError:
-    try:
-        import sys
-
-        sys.path.append("/Users/pswain/wip/aliby/src")
-        from postprocessor.grouper import NameGrouper
-    except ModuleNotFoundError:
-        print("Can only load tsv files - cannot find postprocessor")
+    print("Can only load tsv files - cannot find postprocessor")
 
 
 class dataloader:
@@ -70,7 +64,7 @@ class dataloader:
             kind="line", data=dl.df)
     """
 
-    def __init__(self, indir=".", outdir=".", ls=True):
+    def __init__(self, indir=None, outdir=".", ls=True):
         # from grouper.siglist to abbrevations
         self.g2a_dict = {
             "extraction/GFP/max/median": "median_GFP",
@@ -87,7 +81,10 @@ class dataloader:
         self.a2g_dict = {v: k for (k, v) in self.g2a_dict.items()}
         # establish paths
         self.outdirpath = Path(outdir)
-        self.indirpath = Path(indir)
+        if indir is None:
+            self.indirpath = None
+        else:
+            self.indirpath = Path(indir)
         if ls:
             self.ls
 
@@ -104,13 +101,14 @@ class dataloader:
         .datasets dictionary.
         """
         pp = pprint.PrettyPrinter()
-        # find raw data
-        print("\nData directory is", str(self.indirpath.resolve()))
-        print("Experiments available:")
-        dirs = [f.name for f in self.indirpath.glob("*") if f.is_dir()]
-        # directories of data are stored in experiments
-        self.experiments = {i: name for i, name in enumerate(dirs)}
-        pp.pprint(self.experiments)
+        if self.indirpath is not None:
+            # find h5 data
+            print("\nData directory is", str(self.indirpath.resolve()))
+            print("Experiments available:")
+            dirs = [f.name for f in self.indirpath.glob("*") if f.is_dir()]
+            # directories of data are stored in experiments
+            self.experiments = {i: name for i, name in enumerate(dirs)}
+            pp.pprint(self.experiments)
         # find processed data
         print("\nWorking directory is", str(self.outdirpath.resolve()))
         print("Datasets available:")
diff --git a/figs2pdf.py b/figs2pdf.py
new file mode 100644
index 0000000..fa269ed
--- /dev/null
+++ b/figs2pdf.py
@@ -0,0 +1,12 @@
+from matplotlib.backends.backend_pdf import PdfPages
+import matplotlib.pyplot as plt
+
+
+def figs2pdf(savename):
+    """Save all open figures to a pdf file."""
+    if "." not in savename:
+        savename += ".pdf"
+    with PdfPages(savename) as pp:
+        for i in plt.get_fignums():
+            plt.figure(i)
+            pp.savefig()
diff --git a/growth_rate.py b/growth_rate.py
index cae9ee1..d3dd568 100644
--- a/growth_rate.py
+++ b/growth_rate.py
@@ -6,6 +6,71 @@ import numpy as np
 import pandas as pd
 import gaussianprocessderivatives as gp
 
+from dataloader import dataloader
+from figs2pdf import figs2pdf
+
+
+def find_all_grs(
+    dataname,
+    max_no_cells=None,
+    indir=None,
+    outdir=".",
+    use_tsv=True,
+):
+    """
+    Find mother and bud growth rates.
+
+    Parameters
+    ----------
+    dataname: str
+        Name of experiment, passed to dataloader.
+    max_no_cells: None or int
+        The maximum number of cells to process.
+    indir: str (optional)
+        The directory containing the h5 files.
+    outdir: str (optional)
+        The directory containing any tsv files to load and where tsv
+        files will be saved.
+    use_tsv: boolean (optional)
+        If True, load initial data from a tsv file.
+
+    Example
+    -------
+    >>> find_all_grs("Pdr5", max_no_cells=2, outdir="tsv_files")
+    """
+    # only save if all cells are processed
+    if max_no_cells is None:
+        save = True
+    else:
+        save = False
+    # get data
+    dl = dataloader(indir=indir, outdir=outdir)
+    dl.load(dataname, use_tsv=use_tsv)
+    b_vol = dl.wide_df("bud_volume")
+    m_vol = dl.wide_df("volume")
+    buddings = dl.wide_df("buddings")
+    # find bud growth rate
+    res_b = find_gr(
+        b_vol,
+        buddings,
+        max_no_cells=max_no_cells,
+        bounds={0: (0, 8)},
+    )
+    if save:
+        pickle.dump(res_b, open(dataname + "_res_b.pkl", "wb"))
+        figs2pdf(dataname + "_b.pdf")
+        plt.close("all")
+    # find mother growth rate
+    res_m = find_gr(m_vol, max_no_cells=max_no_cells)
+    if save:
+        pickle.dump(res_m, open(dataname + "_res_m.pkl", "wb"))
+        figs2pdf(dataname + "_m.pdf")
+        plt.close("all")
+    # add to dataframe and save
+    if save:
+        addgrfrompkl(dl, ".")
+        dl.save(dataname)
+
 
 def find_gr(
     volume_df,
@@ -15,9 +80,7 @@ def find_gr(
     verbose=True,
     max_no_cells=None,
 ):
-    """
-    Use a Gaussian process to estimate the single-cell growth rates.
-    """
+    """Use a Gaussian process to find single-cell growth rates."""
     # default bounds on hyperparameters
     # Julian_bounds = {0: (-2, 3), 1: (-2, 0), 2: (0, 3)}
     if buddings_df is None:
@@ -112,12 +175,10 @@ def find_gr(
     return res_df
 
 
-###
-
-
 def runGP(x, y, bounds, noruns, signals, verbose, title, maxnofigs=70):
     """
     Run GP on a single time series.
+
     Results returned as a dictionary.
     """
     # drop NaNs
@@ -164,12 +225,10 @@ def runGP(x, y, bounds, noruns, signals, verbose, title, maxnofigs=70):
     return res
 
 
-###
-
-
 def reNaN(oldlist, i_nan):
     """
     Add back NaN dropped from an array.
+
     Takes a list of arrays and puts back NaN for each array
     at indices i_nan.
     """
@@ -184,13 +243,11 @@ def reNaN(oldlist, i_nan):
         return oldlist
 
 
-###
-
-
 def addgrfrompkl(dl, pkldir):
     """
-    Load growth rate from pickle files and add to and save dataframe
-    in a dataloader instance.
+    Load growth rate from pickle files.
+
+    Add to and save dataframe in a dataloader instance.
 
     Arguments
     ---------
@@ -207,5 +264,5 @@ def addgrfrompkl(dl, pkldir):
     # add to dataframe
     for res in [res_b, res_m]:
         for signal in res:
-            tdf = dl._long_df(res[signal], signal)
+            tdf = dataloader.long_df(res[signal], signal)
             dl.df = pd.merge(dl.df, tdf, on=["id", "time"], how="left")
diff --git a/entropy.py b/limbo/entropy.py
similarity index 100%
rename from entropy.py
rename to limbo/entropy.py
diff --git a/find_cytokinesis.py b/limbo/find_cytokinesis.py
similarity index 100%
rename from find_cytokinesis.py
rename to limbo/find_cytokinesis.py
diff --git a/find_cytokinesis_diane.py b/limbo/find_cytokinesis_diane.py
similarity index 100%
rename from find_cytokinesis_diane.py
rename to limbo/find_cytokinesis_diane.py
diff --git a/search_OMERO.py b/limbo/search_OMERO.py
similarity index 100%
rename from search_OMERO.py
rename to limbo/search_OMERO.py
diff --git a/tsa_inprogress.py b/limbo/tsa_inprogress.py
similarity index 100%
rename from tsa_inprogress.py
rename to limbo/tsa_inprogress.py
diff --git a/plotting.py b/plotting.py
index c4f616e..9676054 100644
--- a/plotting.py
+++ b/plotting.py
@@ -18,6 +18,11 @@ def kymograph(
     title=None,
     returnfig=False,
 ):
+    """
+    Plot a heatmap.
+
+    Typically each row is a single cell and the x-axis shows time.
+    """
     if hue == "births":
         cmap = "Greys"
     elif "growth_rate" in hue:
@@ -62,13 +67,8 @@ def kymograph(
         return fig, ax
 
 
-###
-
-
 def plot_random_time_series(time, values, signalname=None, number=5):
-    """
-    Plots random time series on mouse click and terminates on a key press
-    """
+    """Plot random time series on mouse click and terminates on a key press."""
     fig = plt.figure()
     go = True
 
@@ -97,9 +97,6 @@ def plot_random_time_series(time, values, signalname=None, number=5):
         print(".")
 
 
-###
-
-
 def plot_lineage(
     idx,
     df,
@@ -111,7 +108,8 @@ def plot_lineage(
     plot_G1=False,
 ):
     """
-    Plots the signals for one cell lineage.
+    Plot the signals for one cell lineage.
+
     If "growth_rate" or "volume" is a signal, plots the signal for the
     mother and the different buds.
 
@@ -138,17 +136,17 @@ def plot_lineage(
         raise Exception("idx not part of dataframe")
     signals = gu.makelist(signals)
     nosubplots = len(signals)
-    # show buddingss if possible
+    # show buddings if possible
     if "buddings" in df.columns:
         buddings = df[df.id == idx]["buddings"].to_numpy()
         b_pts = np.where(buddings)[0]
     if "births" in df.columns:
         buddings = df[df.id == idx]["births"].to_numpy()
         b_pts = np.where(buddings)[0]
-        if len(b_pts) == 1:
-            nb_pts = np.concatenate((b_pts, [len(buddings) - 1]))
-        else:
-            nb_pts = b_pts
+    if len(b_pts) == 1:
+        nb_pts = np.concatenate((b_pts, [len(buddings) - 1]))
+    else:
+        nb_pts = b_pts
     # show cytokinesis point if possible
     if cyto_pts_signal and cyto_pts_signal in df.columns:
         cyto = df[df.id == idx][cyto_pts_signal].to_numpy()
@@ -232,9 +230,6 @@ def plot_lineage(
         plt.show()
 
 
-###
-
-
 def plot_replicate_array(
     data,
     t=None,
@@ -247,7 +242,7 @@ def plot_replicate_array(
     show=True,
 ):
     """
-    Plots summary statistics versus axis 1 for an array of replicates.
+    Plot summary statistics versus axis 1 (time) for an array of replicates.
 
     Parameters
     ----------
diff --git a/run_gr.py b/run_gr.py
deleted file mode 100644
index 3d76025..0000000
--- a/run_gr.py
+++ /dev/null
@@ -1,49 +0,0 @@
-import matplotlib.pylab as plt
-import pickle
-import genutils as gu
-from mywela.dataloader import dataloader
-from mywela.growth_rate import find_gr, addgrfrompkl
-
-
-datadir = "/Users/pswain/ECDF/Swainlab/aliby_datasets/Arin"
-dataname = "26643_2022_05_23_flavin_htb2_glucose_20gpL_01_00"
-# dataname = "25681_2022_04_30_flavin_htb2_glucose_10mgpL_01_00"
-
-max_no_cells = None
-use_tsv = True
-
-pxsize = 0.182
-if max_no_cells is None:
-    save = True
-else:
-    save = False
-
-# get data
-dl = dataloader(
-    datadir, outdir="/Users/pswain/Dropbox/wip/uscope_analyses/tsv_data"
-)
-dl.load(dataname, use_tsv=use_tsv)
-
-# data
-b_vol = dl.wide_df("bud_volume")
-m_vol = dl.wide_df("volume")
-births = dl.wide_df("births")
-
-# bud growth rate
-res_b = find_gr(b_vol, births, max_no_cells=max_no_cells, bounds={0: (0, 8)})
-if save:
-    pickle.dump(res_b, open(dataname + "_res_b.pkl", "wb"))
-    gu.figs2pdf(dataname + "_b.pdf")
-    plt.close("all")
-
-# mother growth rate
-res_m = find_gr(m_vol, max_no_cells=max_no_cells)
-if save:
-    pickle.dump(res_m, open(dataname + "_res_m.pkl", "wb"))
-    gu.figs2pdf(dataname + "_m.pdf")
-    plt.close("all")
-
-# add to dataframe
-if save:
-    addgrfrompkl(dl, ".")
-    dl.save(dataname)
-- 
GitLab