Skip to content
Snippets Groups Projects
Commit 27f58b18 authored by pswain's avatar pswain
Browse files

reorganised, with provisional functions in limbo

parent d28fc285
No related branches found
No related tags found
No related merge requests found
...@@ -6,13 +6,7 @@ import pprint ...@@ -6,13 +6,7 @@ import pprint
try: try:
from postprocessor.grouper import NameGrouper from postprocessor.grouper import NameGrouper
except ModuleNotFoundError: except ModuleNotFoundError:
try: print("Can only load tsv files - cannot find postprocessor")
import sys
sys.path.append("/Users/pswain/wip/aliby/src")
from postprocessor.grouper import NameGrouper
except ModuleNotFoundError:
print("Can only load tsv files - cannot find postprocessor")
class dataloader: class dataloader:
...@@ -70,7 +64,7 @@ class dataloader: ...@@ -70,7 +64,7 @@ class dataloader:
kind="line", data=dl.df) kind="line", data=dl.df)
""" """
def __init__(self, indir=".", outdir=".", ls=True): def __init__(self, indir=None, outdir=".", ls=True):
# from grouper.siglist to abbrevations # from grouper.siglist to abbrevations
self.g2a_dict = { self.g2a_dict = {
"extraction/GFP/max/median": "median_GFP", "extraction/GFP/max/median": "median_GFP",
...@@ -87,7 +81,10 @@ class dataloader: ...@@ -87,7 +81,10 @@ class dataloader:
self.a2g_dict = {v: k for (k, v) in self.g2a_dict.items()} self.a2g_dict = {v: k for (k, v) in self.g2a_dict.items()}
# establish paths # establish paths
self.outdirpath = Path(outdir) self.outdirpath = Path(outdir)
self.indirpath = Path(indir) if indir is None:
self.indirpath = None
else:
self.indirpath = Path(indir)
if ls: if ls:
self.ls self.ls
...@@ -104,13 +101,14 @@ class dataloader: ...@@ -104,13 +101,14 @@ class dataloader:
.datasets dictionary. .datasets dictionary.
""" """
pp = pprint.PrettyPrinter() pp = pprint.PrettyPrinter()
# find raw data if self.indirpath is not None:
print("\nData directory is", str(self.indirpath.resolve())) # find h5 data
print("Experiments available:") print("\nData directory is", str(self.indirpath.resolve()))
dirs = [f.name for f in self.indirpath.glob("*") if f.is_dir()] print("Experiments available:")
# directories of data are stored in experiments dirs = [f.name for f in self.indirpath.glob("*") if f.is_dir()]
self.experiments = {i: name for i, name in enumerate(dirs)} # directories of data are stored in experiments
pp.pprint(self.experiments) self.experiments = {i: name for i, name in enumerate(dirs)}
pp.pprint(self.experiments)
# find processed data # find processed data
print("\nWorking directory is", str(self.outdirpath.resolve())) print("\nWorking directory is", str(self.outdirpath.resolve()))
print("Datasets available:") print("Datasets available:")
......
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.pyplot as plt
def figs2pdf(savename):
"""Save all open figures to a pdf file."""
if "." not in savename:
savename += ".pdf"
with PdfPages(savename) as pp:
for i in plt.get_fignums():
plt.figure(i)
pp.savefig()
...@@ -6,6 +6,71 @@ import numpy as np ...@@ -6,6 +6,71 @@ import numpy as np
import pandas as pd import pandas as pd
import gaussianprocessderivatives as gp import gaussianprocessderivatives as gp
from dataloader import dataloader
from figs2pdf import figs2pdf
def find_all_grs(
dataname,
max_no_cells=None,
indir=None,
outdir=".",
use_tsv=True,
):
"""
Find mother and bud growth rates.
Parameters
----------
dataname: str
Name of experiment, passed to dataloader.
max_no_cells: None or int
The maximum number of cells to process.
indir: str (optional)
The directory containing the h5 files.
outdir: str (optional)
The directory containing any tsv files to load and where tsv
files will be saved.
use_tsv: boolean (optional)
If True, load initial data from a tsv file.
Example
-------
>>> find_all_grs("Pdr5", max_no_cells=2, outdir="tsv_files")
"""
# only save if all cells are processed
if max_no_cells is None:
save = True
else:
save = False
# get data
dl = dataloader(indir=indir, outdir=outdir)
dl.load(dataname, use_tsv=use_tsv)
b_vol = dl.wide_df("bud_volume")
m_vol = dl.wide_df("volume")
buddings = dl.wide_df("buddings")
# find bud growth rate
res_b = find_gr(
b_vol,
buddings,
max_no_cells=max_no_cells,
bounds={0: (0, 8)},
)
if save:
pickle.dump(res_b, open(dataname + "_res_b.pkl", "wb"))
figs2pdf(dataname + "_b.pdf")
plt.close("all")
# find mother growth rate
res_m = find_gr(m_vol, max_no_cells=max_no_cells)
if save:
pickle.dump(res_m, open(dataname + "_res_m.pkl", "wb"))
figs2pdf(dataname + "_m.pdf")
plt.close("all")
# add to dataframe and save
if save:
addgrfrompkl(dl, ".")
dl.save(dataname)
def find_gr( def find_gr(
volume_df, volume_df,
...@@ -15,9 +80,7 @@ def find_gr( ...@@ -15,9 +80,7 @@ def find_gr(
verbose=True, verbose=True,
max_no_cells=None, max_no_cells=None,
): ):
""" """Use a Gaussian process to find single-cell growth rates."""
Use a Gaussian process to estimate the single-cell growth rates.
"""
# default bounds on hyperparameters # default bounds on hyperparameters
# Julian_bounds = {0: (-2, 3), 1: (-2, 0), 2: (0, 3)} # Julian_bounds = {0: (-2, 3), 1: (-2, 0), 2: (0, 3)}
if buddings_df is None: if buddings_df is None:
...@@ -112,12 +175,10 @@ def find_gr( ...@@ -112,12 +175,10 @@ def find_gr(
return res_df return res_df
###
def runGP(x, y, bounds, noruns, signals, verbose, title, maxnofigs=70): def runGP(x, y, bounds, noruns, signals, verbose, title, maxnofigs=70):
""" """
Run GP on a single time series. Run GP on a single time series.
Results returned as a dictionary. Results returned as a dictionary.
""" """
# drop NaNs # drop NaNs
...@@ -164,12 +225,10 @@ def runGP(x, y, bounds, noruns, signals, verbose, title, maxnofigs=70): ...@@ -164,12 +225,10 @@ def runGP(x, y, bounds, noruns, signals, verbose, title, maxnofigs=70):
return res return res
###
def reNaN(oldlist, i_nan): def reNaN(oldlist, i_nan):
""" """
Add back NaN dropped from an array. Add back NaN dropped from an array.
Takes a list of arrays and puts back NaN for each array Takes a list of arrays and puts back NaN for each array
at indices i_nan. at indices i_nan.
""" """
...@@ -184,13 +243,11 @@ def reNaN(oldlist, i_nan): ...@@ -184,13 +243,11 @@ def reNaN(oldlist, i_nan):
return oldlist return oldlist
###
def addgrfrompkl(dl, pkldir): def addgrfrompkl(dl, pkldir):
""" """
Load growth rate from pickle files and add to and save dataframe Load growth rate from pickle files.
in a dataloader instance.
Add to and save dataframe in a dataloader instance.
Arguments Arguments
--------- ---------
...@@ -207,5 +264,5 @@ def addgrfrompkl(dl, pkldir): ...@@ -207,5 +264,5 @@ def addgrfrompkl(dl, pkldir):
# add to dataframe # add to dataframe
for res in [res_b, res_m]: for res in [res_b, res_m]:
for signal in res: for signal in res:
tdf = dl._long_df(res[signal], signal) tdf = dataloader.long_df(res[signal], signal)
dl.df = pd.merge(dl.df, tdf, on=["id", "time"], how="left") dl.df = pd.merge(dl.df, tdf, on=["id", "time"], how="left")
File moved
File moved
File moved
File moved
File moved
...@@ -18,6 +18,11 @@ def kymograph( ...@@ -18,6 +18,11 @@ def kymograph(
title=None, title=None,
returnfig=False, returnfig=False,
): ):
"""
Plot a heatmap.
Typically each row is a single cell and the x-axis shows time.
"""
if hue == "births": if hue == "births":
cmap = "Greys" cmap = "Greys"
elif "growth_rate" in hue: elif "growth_rate" in hue:
...@@ -62,13 +67,8 @@ def kymograph( ...@@ -62,13 +67,8 @@ def kymograph(
return fig, ax return fig, ax
###
def plot_random_time_series(time, values, signalname=None, number=5): def plot_random_time_series(time, values, signalname=None, number=5):
""" """Plot random time series on mouse click and terminates on a key press."""
Plots random time series on mouse click and terminates on a key press
"""
fig = plt.figure() fig = plt.figure()
go = True go = True
...@@ -97,9 +97,6 @@ def plot_random_time_series(time, values, signalname=None, number=5): ...@@ -97,9 +97,6 @@ def plot_random_time_series(time, values, signalname=None, number=5):
print(".") print(".")
###
def plot_lineage( def plot_lineage(
idx, idx,
df, df,
...@@ -111,7 +108,8 @@ def plot_lineage( ...@@ -111,7 +108,8 @@ def plot_lineage(
plot_G1=False, plot_G1=False,
): ):
""" """
Plots the signals for one cell lineage. Plot the signals for one cell lineage.
If "growth_rate" or "volume" is a signal, plots the signal for the If "growth_rate" or "volume" is a signal, plots the signal for the
mother and the different buds. mother and the different buds.
...@@ -138,17 +136,17 @@ def plot_lineage( ...@@ -138,17 +136,17 @@ def plot_lineage(
raise Exception("idx not part of dataframe") raise Exception("idx not part of dataframe")
signals = gu.makelist(signals) signals = gu.makelist(signals)
nosubplots = len(signals) nosubplots = len(signals)
# show buddingss if possible # show buddings if possible
if "buddings" in df.columns: if "buddings" in df.columns:
buddings = df[df.id == idx]["buddings"].to_numpy() buddings = df[df.id == idx]["buddings"].to_numpy()
b_pts = np.where(buddings)[0] b_pts = np.where(buddings)[0]
if "births" in df.columns: if "births" in df.columns:
buddings = df[df.id == idx]["births"].to_numpy() buddings = df[df.id == idx]["births"].to_numpy()
b_pts = np.where(buddings)[0] b_pts = np.where(buddings)[0]
if len(b_pts) == 1: if len(b_pts) == 1:
nb_pts = np.concatenate((b_pts, [len(buddings) - 1])) nb_pts = np.concatenate((b_pts, [len(buddings) - 1]))
else: else:
nb_pts = b_pts nb_pts = b_pts
# show cytokinesis point if possible # show cytokinesis point if possible
if cyto_pts_signal and cyto_pts_signal in df.columns: if cyto_pts_signal and cyto_pts_signal in df.columns:
cyto = df[df.id == idx][cyto_pts_signal].to_numpy() cyto = df[df.id == idx][cyto_pts_signal].to_numpy()
...@@ -232,9 +230,6 @@ def plot_lineage( ...@@ -232,9 +230,6 @@ def plot_lineage(
plt.show() plt.show()
###
def plot_replicate_array( def plot_replicate_array(
data, data,
t=None, t=None,
...@@ -247,7 +242,7 @@ def plot_replicate_array( ...@@ -247,7 +242,7 @@ def plot_replicate_array(
show=True, show=True,
): ):
""" """
Plots summary statistics versus axis 1 for an array of replicates. Plot summary statistics versus axis 1 (time) for an array of replicates.
Parameters Parameters
---------- ----------
......
import matplotlib.pylab as plt
import pickle
import genutils as gu
from mywela.dataloader import dataloader
from mywela.growth_rate import find_gr, addgrfrompkl
datadir = "/Users/pswain/ECDF/Swainlab/aliby_datasets/Arin"
dataname = "26643_2022_05_23_flavin_htb2_glucose_20gpL_01_00"
# dataname = "25681_2022_04_30_flavin_htb2_glucose_10mgpL_01_00"
max_no_cells = None
use_tsv = True
pxsize = 0.182
if max_no_cells is None:
save = True
else:
save = False
# get data
dl = dataloader(
datadir, outdir="/Users/pswain/Dropbox/wip/uscope_analyses/tsv_data"
)
dl.load(dataname, use_tsv=use_tsv)
# data
b_vol = dl.wide_df("bud_volume")
m_vol = dl.wide_df("volume")
births = dl.wide_df("births")
# bud growth rate
res_b = find_gr(b_vol, births, max_no_cells=max_no_cells, bounds={0: (0, 8)})
if save:
pickle.dump(res_b, open(dataname + "_res_b.pkl", "wb"))
gu.figs2pdf(dataname + "_b.pdf")
plt.close("all")
# mother growth rate
res_m = find_gr(m_vol, max_no_cells=max_no_cells)
if save:
pickle.dump(res_m, open(dataname + "_res_m.pkl", "wb"))
gu.figs2pdf(dataname + "_m.pdf")
plt.close("all")
# add to dataframe
if save:
addgrfrompkl(dl, ".")
dl.save(dataname)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment