Skip to content
Snippets Groups Projects
Commit 27f58b18 authored by pswain's avatar pswain
Browse files

reorganised, with provisional functions in limbo

parent d28fc285
No related branches found
No related tags found
No related merge requests found
......@@ -6,13 +6,7 @@ import pprint
try:
from postprocessor.grouper import NameGrouper
except ModuleNotFoundError:
try:
import sys
sys.path.append("/Users/pswain/wip/aliby/src")
from postprocessor.grouper import NameGrouper
except ModuleNotFoundError:
print("Can only load tsv files - cannot find postprocessor")
print("Can only load tsv files - cannot find postprocessor")
class dataloader:
......@@ -70,7 +64,7 @@ class dataloader:
kind="line", data=dl.df)
"""
def __init__(self, indir=".", outdir=".", ls=True):
def __init__(self, indir=None, outdir=".", ls=True):
# from grouper.siglist to abbrevations
self.g2a_dict = {
"extraction/GFP/max/median": "median_GFP",
......@@ -87,7 +81,10 @@ class dataloader:
self.a2g_dict = {v: k for (k, v) in self.g2a_dict.items()}
# establish paths
self.outdirpath = Path(outdir)
self.indirpath = Path(indir)
if indir is None:
self.indirpath = None
else:
self.indirpath = Path(indir)
if ls:
self.ls
......@@ -104,13 +101,14 @@ class dataloader:
.datasets dictionary.
"""
pp = pprint.PrettyPrinter()
# find raw data
print("\nData directory is", str(self.indirpath.resolve()))
print("Experiments available:")
dirs = [f.name for f in self.indirpath.glob("*") if f.is_dir()]
# directories of data are stored in experiments
self.experiments = {i: name for i, name in enumerate(dirs)}
pp.pprint(self.experiments)
if self.indirpath is not None:
# find h5 data
print("\nData directory is", str(self.indirpath.resolve()))
print("Experiments available:")
dirs = [f.name for f in self.indirpath.glob("*") if f.is_dir()]
# directories of data are stored in experiments
self.experiments = {i: name for i, name in enumerate(dirs)}
pp.pprint(self.experiments)
# find processed data
print("\nWorking directory is", str(self.outdirpath.resolve()))
print("Datasets available:")
......
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.pyplot as plt
def figs2pdf(savename):
"""Save all open figures to a pdf file."""
if "." not in savename:
savename += ".pdf"
with PdfPages(savename) as pp:
for i in plt.get_fignums():
plt.figure(i)
pp.savefig()
......@@ -6,6 +6,71 @@ import numpy as np
import pandas as pd
import gaussianprocessderivatives as gp
from dataloader import dataloader
from figs2pdf import figs2pdf
def find_all_grs(
dataname,
max_no_cells=None,
indir=None,
outdir=".",
use_tsv=True,
):
"""
Find mother and bud growth rates.
Parameters
----------
dataname: str
Name of experiment, passed to dataloader.
max_no_cells: None or int
The maximum number of cells to process.
indir: str (optional)
The directory containing the h5 files.
outdir: str (optional)
The directory containing any tsv files to load and where tsv
files will be saved.
use_tsv: boolean (optional)
If True, load initial data from a tsv file.
Example
-------
>>> find_all_grs("Pdr5", max_no_cells=2, outdir="tsv_files")
"""
# only save if all cells are processed
if max_no_cells is None:
save = True
else:
save = False
# get data
dl = dataloader(indir=indir, outdir=outdir)
dl.load(dataname, use_tsv=use_tsv)
b_vol = dl.wide_df("bud_volume")
m_vol = dl.wide_df("volume")
buddings = dl.wide_df("buddings")
# find bud growth rate
res_b = find_gr(
b_vol,
buddings,
max_no_cells=max_no_cells,
bounds={0: (0, 8)},
)
if save:
pickle.dump(res_b, open(dataname + "_res_b.pkl", "wb"))
figs2pdf(dataname + "_b.pdf")
plt.close("all")
# find mother growth rate
res_m = find_gr(m_vol, max_no_cells=max_no_cells)
if save:
pickle.dump(res_m, open(dataname + "_res_m.pkl", "wb"))
figs2pdf(dataname + "_m.pdf")
plt.close("all")
# add to dataframe and save
if save:
addgrfrompkl(dl, ".")
dl.save(dataname)
def find_gr(
volume_df,
......@@ -15,9 +80,7 @@ def find_gr(
verbose=True,
max_no_cells=None,
):
"""
Use a Gaussian process to estimate the single-cell growth rates.
"""
"""Use a Gaussian process to find single-cell growth rates."""
# default bounds on hyperparameters
# Julian_bounds = {0: (-2, 3), 1: (-2, 0), 2: (0, 3)}
if buddings_df is None:
......@@ -112,12 +175,10 @@ def find_gr(
return res_df
###
def runGP(x, y, bounds, noruns, signals, verbose, title, maxnofigs=70):
"""
Run GP on a single time series.
Results returned as a dictionary.
"""
# drop NaNs
......@@ -164,12 +225,10 @@ def runGP(x, y, bounds, noruns, signals, verbose, title, maxnofigs=70):
return res
###
def reNaN(oldlist, i_nan):
"""
Add back NaN dropped from an array.
Takes a list of arrays and puts back NaN for each array
at indices i_nan.
"""
......@@ -184,13 +243,11 @@ def reNaN(oldlist, i_nan):
return oldlist
###
def addgrfrompkl(dl, pkldir):
"""
Load growth rate from pickle files and add to and save dataframe
in a dataloader instance.
Load growth rate from pickle files.
Add to and save dataframe in a dataloader instance.
Arguments
---------
......@@ -207,5 +264,5 @@ def addgrfrompkl(dl, pkldir):
# add to dataframe
for res in [res_b, res_m]:
for signal in res:
tdf = dl._long_df(res[signal], signal)
tdf = dataloader.long_df(res[signal], signal)
dl.df = pd.merge(dl.df, tdf, on=["id", "time"], how="left")
File moved
File moved
File moved
File moved
File moved
......@@ -18,6 +18,11 @@ def kymograph(
title=None,
returnfig=False,
):
"""
Plot a heatmap.
Typically each row is a single cell and the x-axis shows time.
"""
if hue == "births":
cmap = "Greys"
elif "growth_rate" in hue:
......@@ -62,13 +67,8 @@ def kymograph(
return fig, ax
###
def plot_random_time_series(time, values, signalname=None, number=5):
"""
Plots random time series on mouse click and terminates on a key press
"""
"""Plot random time series on mouse click and terminates on a key press."""
fig = plt.figure()
go = True
......@@ -97,9 +97,6 @@ def plot_random_time_series(time, values, signalname=None, number=5):
print(".")
###
def plot_lineage(
idx,
df,
......@@ -111,7 +108,8 @@ def plot_lineage(
plot_G1=False,
):
"""
Plots the signals for one cell lineage.
Plot the signals for one cell lineage.
If "growth_rate" or "volume" is a signal, plots the signal for the
mother and the different buds.
......@@ -138,17 +136,17 @@ def plot_lineage(
raise Exception("idx not part of dataframe")
signals = gu.makelist(signals)
nosubplots = len(signals)
# show buddingss if possible
# show buddings if possible
if "buddings" in df.columns:
buddings = df[df.id == idx]["buddings"].to_numpy()
b_pts = np.where(buddings)[0]
if "births" in df.columns:
buddings = df[df.id == idx]["births"].to_numpy()
b_pts = np.where(buddings)[0]
if len(b_pts) == 1:
nb_pts = np.concatenate((b_pts, [len(buddings) - 1]))
else:
nb_pts = b_pts
if len(b_pts) == 1:
nb_pts = np.concatenate((b_pts, [len(buddings) - 1]))
else:
nb_pts = b_pts
# show cytokinesis point if possible
if cyto_pts_signal and cyto_pts_signal in df.columns:
cyto = df[df.id == idx][cyto_pts_signal].to_numpy()
......@@ -232,9 +230,6 @@ def plot_lineage(
plt.show()
###
def plot_replicate_array(
data,
t=None,
......@@ -247,7 +242,7 @@ def plot_replicate_array(
show=True,
):
"""
Plots summary statistics versus axis 1 for an array of replicates.
Plot summary statistics versus axis 1 (time) for an array of replicates.
Parameters
----------
......
import matplotlib.pylab as plt
import pickle
import genutils as gu
from mywela.dataloader import dataloader
from mywela.growth_rate import find_gr, addgrfrompkl
datadir = "/Users/pswain/ECDF/Swainlab/aliby_datasets/Arin"
dataname = "26643_2022_05_23_flavin_htb2_glucose_20gpL_01_00"
# dataname = "25681_2022_04_30_flavin_htb2_glucose_10mgpL_01_00"
max_no_cells = None
use_tsv = True
pxsize = 0.182
if max_no_cells is None:
save = True
else:
save = False
# get data
dl = dataloader(
datadir, outdir="/Users/pswain/Dropbox/wip/uscope_analyses/tsv_data"
)
dl.load(dataname, use_tsv=use_tsv)
# data
b_vol = dl.wide_df("bud_volume")
m_vol = dl.wide_df("volume")
births = dl.wide_df("births")
# bud growth rate
res_b = find_gr(b_vol, births, max_no_cells=max_no_cells, bounds={0: (0, 8)})
if save:
pickle.dump(res_b, open(dataname + "_res_b.pkl", "wb"))
gu.figs2pdf(dataname + "_b.pdf")
plt.close("all")
# mother growth rate
res_m = find_gr(m_vol, max_no_cells=max_no_cells)
if save:
pickle.dump(res_m, open(dataname + "_res_m.pkl", "wb"))
gu.figs2pdf(dataname + "_m.pdf")
plt.close("all")
# add to dataframe
if save:
addgrfrompkl(dl, ".")
dl.save(dataname)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment