From 27f58b18dce9d2e767ef6474da55f08871a43da7 Mon Sep 17 00:00:00 2001 From: Peter Swain <peter.swain@ed.ac.uk> Date: Sat, 18 Feb 2023 18:16:03 +0000 Subject: [PATCH] reorganised, with provisional functions in limbo --- dataloader.py | 30 +++---- figs2pdf.py | 12 +++ growth_rate.py | 87 +++++++++++++++---- entropy.py => limbo/entropy.py | 0 .../find_cytokinesis.py | 0 .../find_cytokinesis_diane.py | 0 search_OMERO.py => limbo/search_OMERO.py | 0 tsa_inprogress.py => limbo/tsa_inprogress.py | 0 plotting.py | 33 +++---- run_gr.py | 49 ----------- 10 files changed, 112 insertions(+), 99 deletions(-) create mode 100644 figs2pdf.py rename entropy.py => limbo/entropy.py (100%) rename find_cytokinesis.py => limbo/find_cytokinesis.py (100%) rename find_cytokinesis_diane.py => limbo/find_cytokinesis_diane.py (100%) rename search_OMERO.py => limbo/search_OMERO.py (100%) rename tsa_inprogress.py => limbo/tsa_inprogress.py (100%) delete mode 100644 run_gr.py diff --git a/dataloader.py b/dataloader.py index 7127279..4f2d320 100644 --- a/dataloader.py +++ b/dataloader.py @@ -6,13 +6,7 @@ import pprint try: from postprocessor.grouper import NameGrouper except ModuleNotFoundError: - try: - import sys - - sys.path.append("/Users/pswain/wip/aliby/src") - from postprocessor.grouper import NameGrouper - except ModuleNotFoundError: - print("Can only load tsv files - cannot find postprocessor") + print("Can only load tsv files - cannot find postprocessor") class dataloader: @@ -70,7 +64,7 @@ class dataloader: kind="line", data=dl.df) """ - def __init__(self, indir=".", outdir=".", ls=True): + def __init__(self, indir=None, outdir=".", ls=True): # from grouper.siglist to abbrevations self.g2a_dict = { "extraction/GFP/max/median": "median_GFP", @@ -87,7 +81,10 @@ class dataloader: self.a2g_dict = {v: k for (k, v) in self.g2a_dict.items()} # establish paths self.outdirpath = Path(outdir) - self.indirpath = Path(indir) + if indir is None: + self.indirpath = None + else: + self.indirpath = Path(indir) if ls: self.ls @@ -104,13 +101,14 @@ class dataloader: .datasets dictionary. """ pp = pprint.PrettyPrinter() - # find raw data - print("\nData directory is", str(self.indirpath.resolve())) - print("Experiments available:") - dirs = [f.name for f in self.indirpath.glob("*") if f.is_dir()] - # directories of data are stored in experiments - self.experiments = {i: name for i, name in enumerate(dirs)} - pp.pprint(self.experiments) + if self.indirpath is not None: + # find h5 data + print("\nData directory is", str(self.indirpath.resolve())) + print("Experiments available:") + dirs = [f.name for f in self.indirpath.glob("*") if f.is_dir()] + # directories of data are stored in experiments + self.experiments = {i: name for i, name in enumerate(dirs)} + pp.pprint(self.experiments) # find processed data print("\nWorking directory is", str(self.outdirpath.resolve())) print("Datasets available:") diff --git a/figs2pdf.py b/figs2pdf.py new file mode 100644 index 0000000..fa269ed --- /dev/null +++ b/figs2pdf.py @@ -0,0 +1,12 @@ +from matplotlib.backends.backend_pdf import PdfPages +import matplotlib.pyplot as plt + + +def figs2pdf(savename): + """Save all open figures to a pdf file.""" + if "." not in savename: + savename += ".pdf" + with PdfPages(savename) as pp: + for i in plt.get_fignums(): + plt.figure(i) + pp.savefig() diff --git a/growth_rate.py b/growth_rate.py index cae9ee1..d3dd568 100644 --- a/growth_rate.py +++ b/growth_rate.py @@ -6,6 +6,71 @@ import numpy as np import pandas as pd import gaussianprocessderivatives as gp +from dataloader import dataloader +from figs2pdf import figs2pdf + + +def find_all_grs( + dataname, + max_no_cells=None, + indir=None, + outdir=".", + use_tsv=True, +): + """ + Find mother and bud growth rates. + + Parameters + ---------- + dataname: str + Name of experiment, passed to dataloader. + max_no_cells: None or int + The maximum number of cells to process. + indir: str (optional) + The directory containing the h5 files. + outdir: str (optional) + The directory containing any tsv files to load and where tsv + files will be saved. + use_tsv: boolean (optional) + If True, load initial data from a tsv file. + + Example + ------- + >>> find_all_grs("Pdr5", max_no_cells=2, outdir="tsv_files") + """ + # only save if all cells are processed + if max_no_cells is None: + save = True + else: + save = False + # get data + dl = dataloader(indir=indir, outdir=outdir) + dl.load(dataname, use_tsv=use_tsv) + b_vol = dl.wide_df("bud_volume") + m_vol = dl.wide_df("volume") + buddings = dl.wide_df("buddings") + # find bud growth rate + res_b = find_gr( + b_vol, + buddings, + max_no_cells=max_no_cells, + bounds={0: (0, 8)}, + ) + if save: + pickle.dump(res_b, open(dataname + "_res_b.pkl", "wb")) + figs2pdf(dataname + "_b.pdf") + plt.close("all") + # find mother growth rate + res_m = find_gr(m_vol, max_no_cells=max_no_cells) + if save: + pickle.dump(res_m, open(dataname + "_res_m.pkl", "wb")) + figs2pdf(dataname + "_m.pdf") + plt.close("all") + # add to dataframe and save + if save: + addgrfrompkl(dl, ".") + dl.save(dataname) + def find_gr( volume_df, @@ -15,9 +80,7 @@ def find_gr( verbose=True, max_no_cells=None, ): - """ - Use a Gaussian process to estimate the single-cell growth rates. - """ + """Use a Gaussian process to find single-cell growth rates.""" # default bounds on hyperparameters # Julian_bounds = {0: (-2, 3), 1: (-2, 0), 2: (0, 3)} if buddings_df is None: @@ -112,12 +175,10 @@ def find_gr( return res_df -### - - def runGP(x, y, bounds, noruns, signals, verbose, title, maxnofigs=70): """ Run GP on a single time series. + Results returned as a dictionary. """ # drop NaNs @@ -164,12 +225,10 @@ def runGP(x, y, bounds, noruns, signals, verbose, title, maxnofigs=70): return res -### - - def reNaN(oldlist, i_nan): """ Add back NaN dropped from an array. + Takes a list of arrays and puts back NaN for each array at indices i_nan. """ @@ -184,13 +243,11 @@ def reNaN(oldlist, i_nan): return oldlist -### - - def addgrfrompkl(dl, pkldir): """ - Load growth rate from pickle files and add to and save dataframe - in a dataloader instance. + Load growth rate from pickle files. + + Add to and save dataframe in a dataloader instance. Arguments --------- @@ -207,5 +264,5 @@ def addgrfrompkl(dl, pkldir): # add to dataframe for res in [res_b, res_m]: for signal in res: - tdf = dl._long_df(res[signal], signal) + tdf = dataloader.long_df(res[signal], signal) dl.df = pd.merge(dl.df, tdf, on=["id", "time"], how="left") diff --git a/entropy.py b/limbo/entropy.py similarity index 100% rename from entropy.py rename to limbo/entropy.py diff --git a/find_cytokinesis.py b/limbo/find_cytokinesis.py similarity index 100% rename from find_cytokinesis.py rename to limbo/find_cytokinesis.py diff --git a/find_cytokinesis_diane.py b/limbo/find_cytokinesis_diane.py similarity index 100% rename from find_cytokinesis_diane.py rename to limbo/find_cytokinesis_diane.py diff --git a/search_OMERO.py b/limbo/search_OMERO.py similarity index 100% rename from search_OMERO.py rename to limbo/search_OMERO.py diff --git a/tsa_inprogress.py b/limbo/tsa_inprogress.py similarity index 100% rename from tsa_inprogress.py rename to limbo/tsa_inprogress.py diff --git a/plotting.py b/plotting.py index c4f616e..9676054 100644 --- a/plotting.py +++ b/plotting.py @@ -18,6 +18,11 @@ def kymograph( title=None, returnfig=False, ): + """ + Plot a heatmap. + + Typically each row is a single cell and the x-axis shows time. + """ if hue == "births": cmap = "Greys" elif "growth_rate" in hue: @@ -62,13 +67,8 @@ def kymograph( return fig, ax -### - - def plot_random_time_series(time, values, signalname=None, number=5): - """ - Plots random time series on mouse click and terminates on a key press - """ + """Plot random time series on mouse click and terminates on a key press.""" fig = plt.figure() go = True @@ -97,9 +97,6 @@ def plot_random_time_series(time, values, signalname=None, number=5): print(".") -### - - def plot_lineage( idx, df, @@ -111,7 +108,8 @@ def plot_lineage( plot_G1=False, ): """ - Plots the signals for one cell lineage. + Plot the signals for one cell lineage. + If "growth_rate" or "volume" is a signal, plots the signal for the mother and the different buds. @@ -138,17 +136,17 @@ def plot_lineage( raise Exception("idx not part of dataframe") signals = gu.makelist(signals) nosubplots = len(signals) - # show buddingss if possible + # show buddings if possible if "buddings" in df.columns: buddings = df[df.id == idx]["buddings"].to_numpy() b_pts = np.where(buddings)[0] if "births" in df.columns: buddings = df[df.id == idx]["births"].to_numpy() b_pts = np.where(buddings)[0] - if len(b_pts) == 1: - nb_pts = np.concatenate((b_pts, [len(buddings) - 1])) - else: - nb_pts = b_pts + if len(b_pts) == 1: + nb_pts = np.concatenate((b_pts, [len(buddings) - 1])) + else: + nb_pts = b_pts # show cytokinesis point if possible if cyto_pts_signal and cyto_pts_signal in df.columns: cyto = df[df.id == idx][cyto_pts_signal].to_numpy() @@ -232,9 +230,6 @@ def plot_lineage( plt.show() -### - - def plot_replicate_array( data, t=None, @@ -247,7 +242,7 @@ def plot_replicate_array( show=True, ): """ - Plots summary statistics versus axis 1 for an array of replicates. + Plot summary statistics versus axis 1 (time) for an array of replicates. Parameters ---------- diff --git a/run_gr.py b/run_gr.py deleted file mode 100644 index 3d76025..0000000 --- a/run_gr.py +++ /dev/null @@ -1,49 +0,0 @@ -import matplotlib.pylab as plt -import pickle -import genutils as gu -from mywela.dataloader import dataloader -from mywela.growth_rate import find_gr, addgrfrompkl - - -datadir = "/Users/pswain/ECDF/Swainlab/aliby_datasets/Arin" -dataname = "26643_2022_05_23_flavin_htb2_glucose_20gpL_01_00" -# dataname = "25681_2022_04_30_flavin_htb2_glucose_10mgpL_01_00" - -max_no_cells = None -use_tsv = True - -pxsize = 0.182 -if max_no_cells is None: - save = True -else: - save = False - -# get data -dl = dataloader( - datadir, outdir="/Users/pswain/Dropbox/wip/uscope_analyses/tsv_data" -) -dl.load(dataname, use_tsv=use_tsv) - -# data -b_vol = dl.wide_df("bud_volume") -m_vol = dl.wide_df("volume") -births = dl.wide_df("births") - -# bud growth rate -res_b = find_gr(b_vol, births, max_no_cells=max_no_cells, bounds={0: (0, 8)}) -if save: - pickle.dump(res_b, open(dataname + "_res_b.pkl", "wb")) - gu.figs2pdf(dataname + "_b.pdf") - plt.close("all") - -# mother growth rate -res_m = find_gr(m_vol, max_no_cells=max_no_cells) -if save: - pickle.dump(res_m, open(dataname + "_res_m.pkl", "wb")) - gu.figs2pdf(dataname + "_m.pdf") - plt.close("all") - -# add to dataframe -if save: - addgrfrompkl(dl, ".") - dl.save(dataname) -- GitLab