Skip to content
Snippets Groups Projects
Commit 723ca2c7 authored by Alán Muñoz's avatar Alán Muñoz
Browse files

sytyle(blackify): cleanup and use pyproject limit

parent b688da7a
No related branches found
No related tags found
No related merge requests found
...@@ -12,7 +12,6 @@ import pandas as pd ...@@ -12,7 +12,6 @@ import pandas as pd
from utils_find_1st import find_1st, cmp_larger from utils_find_1st import find_1st, cmp_larger
import more_itertools as mit import more_itertools as mit
from scipy.signal import savgol_filter
# from scipy.optimize import linear_sum_assignment # from scipy.optimize import linear_sum_assignment
# from scipy.optimize import curve_fit # from scipy.optimize import curve_fit
...@@ -42,12 +41,13 @@ def max_ntps(track: pd.Series) -> int: ...@@ -42,12 +41,13 @@ def max_ntps(track: pd.Series) -> int:
def max_nonstop_ntps(track: pd.Series) -> int: def max_nonstop_ntps(track: pd.Series) -> int:
nona_tracks = track.notna() nona_tracks = track.notna()
consecutive_nonas_grouped = [ consecutive_nonas_grouped = [
len(list(x)) for x in mit.consecutive_groups(np.flatnonzero(nona_tracks)) len(list(x))
for x in mit.consecutive_groups(np.flatnonzero(nona_tracks))
] ]
return max(consecutive_nonas_grouped) return max(consecutive_nonas_grouped)
def get_tracks_ntps(tracks: pd.DataFrame) -> pd.Series: def get_tracks_ntps(tracks: pd.DataFrame) -> pd.FrameorSeriesUnion:
return tracks.apply(max_ntps, axis=1) return tracks.apply(max_ntps, axis=1)
...@@ -73,7 +73,9 @@ def get_avg_grs(tracks: pd.DataFrame) -> pd.DataFrame: ...@@ -73,7 +73,9 @@ def get_avg_grs(tracks: pd.DataFrame) -> pd.DataFrame:
return tracks.apply(get_avg_gr, axis=1) return tracks.apply(get_avg_gr, axis=1)
def clean_tracks(tracks, min_len: int = 15, min_gr: float = 1.0) -> pd.DataFrame: def clean_tracks(
tracks, min_len: int = 15, min_gr: float = 1.0
) -> pd.DataFrame:
""" """
Clean small non-growing tracks and return the reduced dataframe Clean small non-growing tracks and return the reduced dataframe
...@@ -227,7 +229,9 @@ def get_joinable(tracks, smooth=False, tol=0.1, window=5, degree=3) -> dict: ...@@ -227,7 +229,9 @@ def get_joinable(tracks, smooth=False, tol=0.1, window=5, degree=3) -> dict:
clean = clean_tracks( clean = clean_tracks(
tracks, min_len=window + 1, min_gr=0.9 tracks, min_len=window + 1, min_gr=0.9
) # get useful tracks ) # get useful tracks
savgol_on_srs = lambda x: non_uniform_savgol(x.index, x.values, window, degree) savgol_on_srs = lambda x: non_uniform_savgol(
x.index, x.values, window, degree
)
contig = clean.groupby(["trap"]).apply(get_contiguous_pairs) contig = clean.groupby(["trap"]).apply(get_contiguous_pairs)
contig = contig.loc[contig.apply(len) > 0] contig = contig.loc[contig.apply(len) > 0]
flat = set([k for v in contig.values for i in v for j in i for k in j]) flat = set([k for v in contig.values for i in v for j in i for k in j])
...@@ -236,7 +240,9 @@ def get_joinable(tracks, smooth=False, tol=0.1, window=5, degree=3) -> dict: ...@@ -236,7 +240,9 @@ def get_joinable(tracks, smooth=False, tol=0.1, window=5, degree=3) -> dict:
contig = tracks.groupby(["trap"]).apply(get_contiguous_pairs) contig = tracks.groupby(["trap"]).apply(get_contiguous_pairs)
contig = contig.loc[contig.apply(len) > 0] contig = contig.loc[contig.apply(len) > 0]
flat = set([k for v in contig.values for i in v for j in i for k in j]) flat = set([k for v in contig.values for i in v for j in i for k in j])
smoothed_tracks = tracks.loc[flat].apply(lambda x: np.array(x.values), axis=1) smoothed_tracks = tracks.loc[flat].apply(
lambda x: np.array(x.values), axis=1
)
# fetch edges from ids TODO (IF necessary, here we can compare growth rates) # fetch edges from ids TODO (IF necessary, here we can compare growth rates)
idx_to_edge = lambda preposts: [ idx_to_edge = lambda preposts: [
...@@ -263,7 +269,9 @@ def get_joinable(tracks, smooth=False, tol=0.1, window=5, degree=3) -> dict: ...@@ -263,7 +269,9 @@ def get_joinable(tracks, smooth=False, tol=0.1, window=5, degree=3) -> dict:
pre_res.append( pre_res.append(
np.poly1d(np.polyfit(range(len(y)), y, 1))(len(y) + 1), np.poly1d(np.polyfit(range(len(y)), y, 1))(len(y) + 1),
) )
pos_res = [get_means(smoothed_tracks.loc[post], window) for post in posts] pos_res = [
get_means(smoothed_tracks.loc[post], window) for post in posts
]
result.append([pre_res, pos_res]) result.append([pre_res, pos_res])
return result return result
...@@ -296,7 +304,8 @@ def get_joinable(tracks, smooth=False, tol=0.1, window=5, degree=3) -> dict: ...@@ -296,7 +304,8 @@ def get_joinable(tracks, smooth=False, tol=0.1, window=5, degree=3) -> dict:
# match local with global ids # match local with global ids
joinable_ids = [ joinable_ids = [
localid_to_idx(closest_pairs.loc[i], contig.loc[i]) for i in closest_pairs.index localid_to_idx(closest_pairs.loc[i], contig.loc[i])
for i in closest_pairs.index
] ]
return [pair for pairset in joinable_ids for pair in pairset] return [pair for pairset in joinable_ids for pair in pairset]
...@@ -341,7 +350,9 @@ def localid_to_idx(local_ids, contig_trap): ...@@ -341,7 +350,9 @@ def localid_to_idx(local_ids, contig_trap):
for i, pairs in enumerate(local_ids): for i, pairs in enumerate(local_ids):
if len(pairs): if len(pairs):
for left, right in pairs: for left, right in pairs:
lin_pairs.append((contig_trap[i][0][left], contig_trap[i][1][right])) lin_pairs.append(
(contig_trap[i][0][left], contig_trap[i][1][right])
)
return lin_pairs return lin_pairs
...@@ -355,11 +366,14 @@ def get_dMetric_wrap(lst: List, **kwargs): ...@@ -355,11 +366,14 @@ def get_dMetric_wrap(lst: List, **kwargs):
def solve_matrices_wrap(dMetric: List, edges: List, **kwargs): def solve_matrices_wrap(dMetric: List, edges: List, **kwargs):
return [ return [
solve_matrices(mat, edgeset, **kwargs) for mat, edgeset in zip(dMetric, edges) solve_matrices(mat, edgeset, **kwargs)
for mat, edgeset in zip(dMetric, edges)
] ]
def get_dMetric(pre: List[float], post: List[float], tol: Union[float, int] = 1): def get_dMetric(
pre: List[float], post: List[float], tol: Union[float, int] = 1
):
"""Calculate a cost matrix """Calculate a cost matrix
input input
...@@ -376,13 +390,18 @@ def get_dMetric(pre: List[float], post: List[float], tol: Union[float, int] = 1) ...@@ -376,13 +390,18 @@ def get_dMetric(pre: List[float], post: List[float], tol: Union[float, int] = 1)
else: else:
dMetric = np.abs(np.subtract.outer(pre, post)) dMetric = np.abs(np.subtract.outer(pre, post))
dMetric[np.isnan(dMetric)] = tol + 1 + np.nanmax(dMetric) # nans will be filtered dMetric[np.isnan(dMetric)] = (
tol + 1 + np.nanmax(dMetric)
) # nans will be filtered
return dMetric return dMetric
def solve_matrices(dMetric: np.ndarray, prepost: List, tol: Union[float, int] = 1): def solve_matrices(
dMetric: np.ndarray, prepost: List, tol: Union[float, int] = 1
):
""" """
Solve the distance matrices obtained in get_dMetric and/or merged from independent dMetric matrices Solve the distance matrices obtained in get_dMetric and/or merged from
independent dMetric matrices
""" """
ids = solve_matrix(dMetric) ids = solve_matrix(dMetric)
...@@ -399,7 +418,9 @@ def solve_matrices(dMetric: np.ndarray, prepost: List, tol: Union[float, int] = ...@@ -399,7 +418,9 @@ def solve_matrices(dMetric: np.ndarray, prepost: List, tol: Union[float, int] =
return [idx for idx, res in zip(zip(*ids), result) if res <= tol] return [idx for idx, res in zip(zip(*ids), result) if res <= tol]
def get_closest_pairs(pre: List[float], post: List[float], tol: Union[float, int] = 1): def get_closest_pairs(
pre: List[float], post: List[float], tol: Union[float, int] = 1
):
"""Calculate a cost matrix the Hungarian algorithm to pick the best set of """Calculate a cost matrix the Hungarian algorithm to pick the best set of
options options
...@@ -479,12 +500,13 @@ def get_contiguous_pairs(tracks: pd.DataFrame) -> list: ...@@ -479,12 +500,13 @@ def get_contiguous_pairs(tracks: pd.DataFrame) -> list:
:param tracks: (m x n) dataframe where rows are cell tracks and :param tracks: (m x n) dataframe where rows are cell tracks and
columns are timepoints columns are timepoints
:param min_dgr: float minimum difference in growth rate from the interpolation :param min_dgr: float minimum difference in growth rate from
the interpolation
""" """
# indices = np.where(tracks.notna())
mins, maxes = [ mins, maxes = [
tracks.notna().apply(np.where, axis=1).apply(fn) for fn in (np.min, np.max) tracks.notna().apply(np.where, axis=1).apply(fn)
for fn in (np.min, np.max)
] ]
mins_d = mins.groupby(mins).apply(lambda x: x.index.tolist()) mins_d = mins.groupby(mins).apply(lambda x: x.index.tolist())
...@@ -492,7 +514,9 @@ def get_contiguous_pairs(tracks: pd.DataFrame) -> list: ...@@ -492,7 +514,9 @@ def get_contiguous_pairs(tracks: pd.DataFrame) -> list:
# TODO add support for skipping time points # TODO add support for skipping time points
maxes_d = maxes.groupby(maxes).apply(lambda x: x.index.tolist()) maxes_d = maxes.groupby(maxes).apply(lambda x: x.index.tolist())
common = sorted(set(mins_d.index).intersection(maxes_d.index), reverse=True) common = sorted(
set(mins_d.index).intersection(maxes_d.index), reverse=True
)
return [(maxes_d[t], mins_d[t]) for t in common] return [(maxes_d[t], mins_d[t]) for t in common]
......
...@@ -121,8 +121,12 @@ class align(PostProcessABC): ...@@ -121,8 +121,12 @@ class align(PostProcessABC):
# Remove bits of traces before first event # Remove bits of traces before first event
if self.slice_before_first_event: if self.slice_before_first_event:
# minus sign in front of shift_list to shift to the left # minus sign in front of shift_list to shift to the left
mask_aligned = df_shift(mask_aligned, common_index.to_list(), -shift_list) mask_aligned = df_shift(
trace_aligned = df_shift(trace_aligned, common_index.to_list(), -shift_list) mask_aligned, common_index.to_list(), -shift_list
)
trace_aligned = df_shift(
trace_aligned, common_index.to_list(), -shift_list
)
# Do not remove bits of traces before first event # Do not remove bits of traces before first event
else: else:
# Add columns to left, filled with NaNs # Add columns to left, filled with NaNs
...@@ -130,7 +134,11 @@ class align(PostProcessABC): ...@@ -130,7 +134,11 @@ class align(PostProcessABC):
mask_aligned = df_extend_nan(mask_aligned, max_shift) mask_aligned = df_extend_nan(mask_aligned, max_shift)
trace_aligned = df_extend_nan(trace_aligned, max_shift) trace_aligned = df_extend_nan(trace_aligned, max_shift)
# shift each # shift each
mask_aligned = df_shift(mask_aligned, common_index.to_list(), -shift_list) mask_aligned = df_shift(
trace_aligned = df_shift(trace_aligned, common_index.to_list(), -shift_list) mask_aligned, common_index.to_list(), -shift_list
)
trace_aligned = df_shift(
trace_aligned, common_index.to_list(), -shift_list
)
return trace_aligned, mask_aligned return trace_aligned, mask_aligned
...@@ -63,12 +63,16 @@ class crosscorr(PostProcessABC): ...@@ -63,12 +63,16 @@ class crosscorr(PostProcessABC):
# deviation from mean at each time point # deviation from mean at each time point
dmean_A = trace_A - np.nanmean(trace_A, axis=0).reshape((1, n_tps)) dmean_A = trace_A - np.nanmean(trace_A, axis=0).reshape((1, n_tps))
# standard deviation over time for each replicate # standard deviation over time for each replicate
stdA = np.sqrt(np.nanmean(dmean_A ** 2, axis=1).reshape((n_replicates, 1))) stdA = np.sqrt(
np.nanmean(dmean_A**2, axis=1).reshape((n_replicates, 1))
)
if trace_dfB is not None: if trace_dfB is not None:
trace_B = trace_dfB.to_numpy() trace_B = trace_dfB.to_numpy()
# cross correlation # cross correlation
dmean_B = trace_B - np.nanmean(trace_B, axis=0).reshape((1, n_tps)) dmean_B = trace_B - np.nanmean(trace_B, axis=0).reshape((1, n_tps))
stdB = np.sqrt(np.nanmean(dmean_B ** 2, axis=1).reshape((n_replicates, 1))) stdB = np.sqrt(
np.nanmean(dmean_B**2, axis=1).reshape((n_replicates, 1))
)
else: else:
# auto correlation # auto correlation
dmean_B = dmean_A dmean_B = dmean_A
......
...@@ -133,7 +133,10 @@ class mi(PostProcessABC): ...@@ -133,7 +133,10 @@ class mi(PostProcessABC):
n_classes = len(data) n_classes = len(data)
Xo = np.vstack([timeseries for timeseries in data]) Xo = np.vstack([timeseries for timeseries in data])
y = np.hstack( y = np.hstack(
[i * np.ones(timeseries.shape[0]) for i, timeseries in enumerate(data)] [
i * np.ones(timeseries.shape[0])
for i, timeseries in enumerate(data)
]
) )
if self.overtime: if self.overtime:
...@@ -152,10 +155,15 @@ class mi(PostProcessABC): ...@@ -152,10 +155,15 @@ class mi(PostProcessABC):
X = Xo[:, :duration] X = Xo[:, :duration]
# initialise sself.cikit-learn routines # initialise sself.cikit-learn routines
nPCArange = range(1, X.shape[1] + 1) if X.shape[1] < 7 else [3, 4, 5, 6, 7] nPCArange = (
range(1, X.shape[1] + 1) if X.shape[1] < 7 else [3, 4, 5, 6, 7]
)
params = [ params = [
{"project__n_components": nPCArange}, {"project__n_components": nPCArange},
{"classifier__kernel": ["linear"], "classifier__C": self.Crange}, {
"classifier__kernel": ["linear"],
"classifier__C": self.Crange,
},
{ {
"classifier__kernel": ["rbf"], "classifier__kernel": ["rbf"],
"classifier__C": self.Crange, "classifier__C": self.Crange,
...@@ -196,9 +204,13 @@ class mi(PostProcessABC): ...@@ -196,9 +204,13 @@ class mi(PostProcessABC):
y_predict = pipe.predict(X_test) y_predict = pipe.predict(X_test)
# estimate mutual information # estimate mutual information
p_y = 1 / n_classes p_y = 1 / n_classes
p_yhat_given_y = confusion_matrix(y_test, y_predict, normalize="true") p_yhat_given_y = confusion_matrix(
y_test, y_predict, normalize="true"
)
p_yhat = np.sum(p_y * p_yhat_given_y, 0) p_yhat = np.sum(p_y * p_yhat_given_y, 0)
h_yhat = -np.sum(p_yhat[p_yhat > 0] * np.log2(p_yhat[p_yhat > 0])) h_yhat = -np.sum(
p_yhat[p_yhat > 0] * np.log2(p_yhat[p_yhat > 0])
)
log2_p_yhat_given_y = np.ma.log2(p_yhat_given_y).filled(0) log2_p_yhat_given_y = np.ma.log2(p_yhat_given_y).filled(0)
h_yhat_given_y = -np.sum( h_yhat_given_y = -np.sum(
p_y * np.sum(p_yhat_given_y * log2_p_yhat_given_y, 1) p_y * np.sum(p_yhat_given_y * log2_p_yhat_given_y, 1)
......
"""
Main data processing functions. The inputs for this functions must be data series.
"""
import numpy as np
from pandas import Series, DataFrames
def bg_sub(data:Series, background:float):
return data - background
def ratio(data1:Series, data2:Series):
return data1 / data2
def norm_tps(data:Series, tps:list, fun=np.nanmean):
return data / fun(data.loc(axis=1)[tps])
def growth_rate(data:Series, alg=None, filt = 'savgol'):
if alg is None:
alg='standard'
from typing import Dict, List, Union
import re
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
from sklearn.cluster import KMeans
from matplotlib import pyplot as plt
import seaborn as sns
import compress_pickle
from postprocessor.core.postprocessor import PostProcessor
from postprocessor.core.tracks import get_avg_grs, non_uniform_savgol
from postprocessor.core.ph import *
def filter_by_gfp(dfs):
gfps = pd.concat([t[("GFPFast_bgsub", np.maximum, "median")] for t in dfs])
avgs_gfp = gfps.mean(axis=1)
high_gfp = get_high_k2(avgs_gfp)
# high_gfp = avgs_gfp[avgs_gfp > 200]
return high_gfp
def filter_by_area(dfs, min=50):
areas = pd.concat([t[("general", None, "area")] for t in dfs])
avgs_areas = areas[(areas.notna().sum(axis=1) > areas.shape[1] // (1.25))].mean(
axis=1
)
avgs_areas = avgs_areas[(avgs_areas > min)]
return avgs_areas
def get_high_k2(df):
kmeans = KMeans(n_clusters=2)
vals = df.values.reshape(-1, 1)
kmeans.fit(vals)
high_clust_id = kmeans.cluster_centers_.argmax()
return df.loc[kmeans.predict(vals) == high_clust_id]
def get_concats(dfs, keys):
return pd.concat([t.get((keys), pd.DataFrame()) for t in dfs])
def get_dfs(pp):
dfs = [pp.extraction[pp.expt.positions[i]] for i in range(len(pp.expt.positions))]
return dfs
combine_dfs = lambda dfs: {k: get_concats(dfs, k) for k in dfs[0].keys()}
def merge_channels(pp, min_area=50):
dfs = get_dfs(pp)
# rats = get_concats(dfs, ("em_ratio_bgsub", np.maximum, "median"))
# gfps = filter_by_gfp(dfs)
avgs_area = filter_by_area(dfs, min=50)
# ids = [x for x in set(gfps.index).intersection(avgs_area.index)]
ids = avgs_area.index
new_dfs = combine_dfs(dfs)
h = pd.DataFrame(
{
k[0] + "_" + k[2]: v.loc[ids].mean(axis=1)
for k, v in new_dfs.items()
if k[-1] != "imBackground"
}
)
return h
def process_phs(pp, min_area=200):
h = merge_channels(pp, min_area)
h.index.names = ["pos", "trap", "cell"]
ids = h.index
h = h.reset_index()
h["ph"] = h["pos"].apply(lambda x: float(x[3:7].replace("_", ".")))
h["max5_d_med"] = h["mCherry_max2p5pc"] / h["mCherry_median"]
h = h.set_index(ids)
h = h.drop(["pos", "trap", "cell"], axis=1)
return h
def growth_rate(
data: Series, alg=None, filt={"kind": "savgol", "window": 5, "degree": 3}
):
window = filt["window"]
degree = filt["degree"]
if alg is None:
alg = "standard"
if filt: # TODO add support for multiple algorithms
data = Series(
non_uniform_savgol(
data.dropna().index, data.dropna().values, window, degree
),
index=data.dropna().index,
)
return Series(np.convolve(data, diff_kernel, "same"), index=data.dropna().index)
# import numpy as np
# diff_kernel = np.array([1, -1])
# gr = clean.apply(growth_rate, axis=1)
# from postprocessor.core.tracks import non_uniform_savgol, clean_tracks
def sort_df(df, by="first", rev=True):
nona = df.notna()
if by == "len":
idx = nona.sum(axis=1)
elif by == "first":
idx = nona.idxmax(axis=1)
idx = idx.sort_values().index
if rev:
idx = idx[::-1]
return df.loc[idx]
# test = tmp[("GFPFast", np.maximum, "median")]
# test2 = tmp[("pHluorin405", np.maximum, "median")]
# ph = test / test2
# ph = ph.stack().reset_index(1)
# ph.columns = ["tp", "fl"]
def m2p5_med(ext, ch, red=np.maximum):
m2p5pc = ext[(ch, red, "max2p5pc")]
med = ext[(ch, red, "median")]
result = m2p5pc / med
return result
def plot_avg(df):
df = df.stack().reset_index(1)
df.columns = ["tp", "val"]
sns.relplot(x=df["tp"], y=df["val"], kind="line")
plt.show()
def split_data(df: DataFrame, splits: List[int]):
dfs = [df.iloc[:, i:j] for i, j in zip((0,) + splits, splits + (df.shape[1],))]
return dfs
def growth_rate(
data: Series, alg=None, filt={"kind": "savgol", "window": 7, "degree": 3}
):
if alg is None:
alg = "standard"
if filt: # TODO add support for multiple algorithms
window = filt["window"]
degree = filt["degree"]
data = Series(
non_uniform_savgol(
data.dropna().index, data.dropna().values, window, degree
),
index=data.dropna().index,
)
diff_kernel = np.array([1, -1])
return Series(np.convolve(data, diff_kernel, "same"), index=data.dropna().index)
# pp = PostProcessor(source=19831)
# pp.load_tiler_cells()
# f = "/home/alan/Documents/sync_docs/libs/postproc/gluStarv_2_0_x2_dual_phl_ura8_00/extraction"
# pp.load_extraction(
# "/home/alan/Documents/sync_docs/libs/postproc/postprocessor/"
# + pp.expt.name
# + "/extraction/"
# )
# tmp = pp.extraction["phl_ura8_002"]
def _check_bg(data):
for k in list(pp.extraction.values())[0].keys():
for p in pp.expt.positions:
if k not in pp.extraction[p]:
print(p, k)
# data = {
# k: pd.concat([pp.extraction[pos][k] for pos in pp.expt.positions[:-3]])
# for k in list(pp.extraction.values())[0].keys()
# }
def hmap(df, **kwargs):
g = sns.heatmap(sort_df(df), robust=True, cmap="mako_r", **kwargs)
plt.xlabel("")
return g
# from random import randint
# x = randint(0, len(smooth))
# plt.plot(clean.iloc[x], 'b')
# plt.plot(smooth.iloc[x], 'r')
# plt.show()
# data = tmp
# df = data[("general", None, "area")]
# clean = clean_tracks(df, min_len=160)
# clean = clean.loc[clean.notna().sum(axis=1) > 9]
# gr = clean.apply(growth_rate, axis=1)
# splits = (72, 108, 180)
# gr_sp = split_data(gr, splits)
# idx = gr.index
# bg = get_bg(data)
# test = data[("GFPFast", np.maximum, "median")]
# test2 = data[("pHluorin405", np.maximum, "median")]
# ph = (test / test2).loc[idx]
# c = pd.concat((ph.mean(1), gr.max(1)), axis=1)
# c.columns = ["ph", "gr_max"]
# # ph = ph.stack().reset_index(1)
# # ph.columns = ['tp', 'fl']
# ph_sp = split_data(gr, splits)
def get_bg(data):
bg = {}
fl_subkeys = [
x
for x in data.keys()
if x[0] in ["GFP", "GFPFast", "mCherry", "pHluorin405"]
and x[-1] != "imBackground"
]
for k in fl_subkeys:
nk = list(k)
bk = tuple(nk[:-1] + ["imBackground"])
nk = tuple(nk[:-1] + [nk[-1] + "_BgSub"])
tmp = []
for i, v in data[bk].iterrows():
if i in data[k].index:
newdf = data[k].loc[i] / v
newdf.index = pd.MultiIndex.from_tuples([(*i, c) for c in newdf.index])
tmp.append(newdf)
bg[nk] = pd.concat(tmp)
return bg
def calc_ph(bg):
fl_subkeys = [x for x in bg.keys() if x[0] in ["GFP", "GFPFast", "pHluorin405"]]
chs = list(set([x[0] for x in fl_subkeys]))
assert len(chs) == 2, "Too many channels"
ch1 = [x[1:] for x in fl_subkeys if x[0] == chs[0]]
ch2 = [x[1:] for x in fl_subkeys if x[0] == chs[1]]
inter = list(set(ch1).intersection(ch2))
ph = {}
for red_fld in inter:
ph[tuple(("ph",) + red_fld)] = (
bg[tuple((chs[0],) + red_fld)] / bg[tuple((chs[1],) + red_fld)]
)
def get_traps(pp):
t0 = {}
for pos in pp.tiler.positions:
pp.tiler.current_position = pos
t0[pos] = pp.tiler.get_traps_timepoint(
0, channels=[0, pp.tiler.channels.index("mCherry")], z=[0, 1, 2, 3, 4]
)
return t0
def get_pos_ph(pp):
pat = re.compile(r"ph_([0-9]_[0-9][0-9])")
return {
pos: float(pat.findall(pos)[0].replace("_", ".")) for pos in pp.tiler.positions
}
def plot_sample_bf_mch(pp):
bf_mch = get_traps(pp)
ts = [{i: v[:, j, ...] for i, v in bf_mch.items()} for j in [0, 1]]
tsbf = {i: v[:, 0, ...] for i, v in bf_mch.items()}
posdict = {k: v for k, v in get_pos_ph(pp).items()}
posdict = {v: k for k, v in posdict.items()}
posdict = {v: k for k, v in posdict.items()}
ph = np.unique(list(posdict.values())).tolist()
counters = {ph: 0 for ph in ph}
n = [np.random.randint(ts[0][k].shape[0]) for k in posdict.keys()]
fig, axes = plt.subplots(2, 5)
for k, (t, name) in enumerate(zip(ts, ["Bright field", "mCherry"])):
for i, (pos, ph) in enumerate(posdict.items()):
# i = ph.index(posdict[pos])
axes[k, i].grid(False)
axes[k, i].set(
xticklabels=[],
yticklabels=[],
)
axes[k, i].set_xlabel(posdict[pos] if k else None, fontsize=28)
axes[k, i].imshow(
np.maximum.reduce(t[pos][n[i], 0], axis=2),
cmap="gist_gray" if not k else None,
)
# counters[posdict[pos]] += 1
plt.tick_params(
axis="x", # changes apply to the x-axis
which="both", # both major and minor ticks are affected
bottom=False, # ticks along the bottom edge are off
top=False, # ticks along the top edge are off
labelbottom=False,
) # labels along the bottom edge are off
axes[k, 0].set_ylabel(name, fontsize=28)
plt.tight_layout()
plt.show()
# Plotting calibration curve
from scipy.optimize import curve_fit
def fit_calibration(h):
ycols = [x for x in h.columns if "em_ratio" in x]
xcol = "ph"
def objective(x, a, b):
return a * x + b
# fig, axes = plt.subplots(1, len(ycols))
# for i, ycol in enumerate(ycols):
# d = h[[xcol, ycol]]
# params, _ = curve_fit(objective, *[d[col].values for col in d.columns])
# sns.lineplot(x=xcol, y=ycol, data=h, alpha=0.5, err_style="bars", ax=axes[i])
# # sns.lineplot(d[xcol], objective(d[xcol].values, *params), ax=axes[i])
# plt.show()
ycol = "em_ratio_mean"
d = h[[xcol, *ycols]]
tmp = d.groupby("ph").mean()
calibs = {ycol: curve_fit(objective, tmp.index, tmp[ycol])[0] for ycol in ycols}
# sns.lineplot(x=xcol, y=ycol, data=d, alpha=0.5, err_style="bars")
# plt.xlabel("pH")
# plt.ylabel("pHluorin emission ratio")
# sns.lineplot(d[xcol], objective(d[xcol], *params))
return calibs
import pkg_resources
from pathlib import Path, PosixPath
from typing import Union, List, Dict
from datetime import datetime
from compress_pickle import load, dump
from core.experiment import Experiment
from core.cells import Cells
from core.segment import Tiler
from core.io.matlab import matObject
from core.experiment import Experiment
class PostProcessor:
'''
Base class to perform feature extraction.
:param parameters: Parameters class with channels, reduction functions and
extraction functions to use.
:param source: Origin of experiment, if int it is assumed from Omero, if str
or Path
'''
def __init__(self, parameters=None, source: Union[int, str, Path] = None):
# self.params = parameters
if source is not None:
if type(source) is int:
self.expt_id = source
self.load_expt(source, omero=True)
elif type(source) is str or PosixPath:
self.load_expt(source, omero=False)
@property
def channels(self):
if not hasattr(self, '_channels'):
if type(self.params.tree) is dict:
self._channels = tuple(self.params.tree.keys())
return self._channels
@property
def current_position(self):
assert hasattr(self, 'expt'), 'No experiment loaded.'
if hasattr(self, 'tiler'):
assert self.expt.current_position.name == self.tiler.current_position
return self.expt.current_position
def load_expt(self, source: Union[int, str], omero: bool = False) -> None:
if omero:
self.expt = Experiment.from_source(
self.expt_id, #Experiment ID on OMERO
'upload', #OMERO Username
'gothamc1ty', #OMERO Password
'islay.bio.ed.ac.uk', #OMERO host
port=4064 #This is default
)
else:
self.expt = Experiment.from_source(source)
self.expt_id = self.expt.exptID
def load_tiler_cells(self) -> None:
self.tiler = Tiler(self.expt)
self.cells = Cells()
self.cells = self.cells.from_source(
self.expt.current_position.annotation)
def get_pos_mo_bud(self):
annot = self.expt._get_position_annotation(
self.expt.current_position.name)
matob = matObject(annot)
m = matob["timelapseTrapsOmero"].get("cellMothers", None)
if m is not None:
ids = np.nonzero(m.todense())
d = {(self.expt.current_position.name, i, int(m[i, j])): []
for i, j in zip(*ids)}
for i, j, k in zip(*ids, d.keys()):
d[k].append((self.expt.current_position.name, i, j + 1))
else:
print("Pos {} has no mother matrix".format(
self.expt.current_position.name))
d = {}
return d
def get_exp_mo_bud(self):
d = {}
for pos in self.expt.positions:
self.expt.current_position = pos
d = {**d, **self.get_pos_mo_bud()}
self.expt.current_position = self.expt.positions[0]
return d
def load_extraction(self, folder=None) -> None:
if folder is None:
folder = Path(self.expt.name + '/extraction')
self.extraction = {}
for pos in self.expt.positions:
try:
self.extraction[pos] = load(folder / Path(pos + '.gz'))
except:
print(pos, ' not found')
...@@ -121,7 +121,9 @@ class autoreg(PostProcessABC): ...@@ -121,7 +121,9 @@ class autoreg(PostProcessABC):
# ar_coeffs: 1D array of coefficients (i.e. phi values) # ar_coeffs: 1D array of coefficients (i.e. phi values)
sample_acfs_toeplitz = linalg.toeplitz(sample_acfs[0:ar_order]) sample_acfs_toeplitz = linalg.toeplitz(sample_acfs[0:ar_order])
# phi vector goes from 1 to P in Jia & Grima (2020) # phi vector goes from 1 to P in Jia & Grima (2020)
ar_coeffs = linalg.inv(sample_acfs_toeplitz).dot(sample_acfs[1 : ar_order + 1]) ar_coeffs = linalg.inv(sample_acfs_toeplitz).dot(
sample_acfs[1 : ar_order + 1]
)
# defines a dummy phi_0 as 1. This is so that the indices I use in # defines a dummy phi_0 as 1. This is so that the indices I use in
# get_noise_param are consistent with Jia & Grima (2020) # get_noise_param are consistent with Jia & Grima (2020)
ar_coeffs = np.insert(ar_coeffs, 0, 1.0, axis=0) ar_coeffs = np.insert(ar_coeffs, 0, 1.0, axis=0)
...@@ -307,12 +309,18 @@ class autoreg(PostProcessABC): ...@@ -307,12 +309,18 @@ class autoreg(PostProcessABC):
freq_npoints=self.freq_npoints, freq_npoints=self.freq_npoints,
ar_order=optimal_ar_order, ar_order=optimal_ar_order,
), ),
optimal_ar_order optimal_ar_order,
), ),
) )
freqs_df = pd.DataFrame([element.freqs for element in axes], index=signal.index) freqs_df = pd.DataFrame(
power_df = pd.DataFrame([element.power for element in axes], index=signal.index) [element.freqs for element in axes], index=signal.index
order_df = pd.DataFrame([element.order for element in axes], index=signal.index) )
power_df = pd.DataFrame(
[element.power for element in axes], index=signal.index
)
order_df = pd.DataFrame(
[element.order for element in axes], index=signal.index
)
return freqs_df, power_df, order_df return freqs_df, power_df, order_df
...@@ -41,15 +41,14 @@ class births(LineageProcess): ...@@ -41,15 +41,14 @@ class births(LineageProcess):
def load_lineage(self, lineage): def load_lineage(self, lineage):
self.lineage = lineage self.lineage = lineage
def run(self, signal: pd.DataFrame, lineage: np.ndarray = None) -> pd.DataFrame: def run(
self, signal: pd.DataFrame, lineage: np.ndarray = None
) -> pd.DataFrame:
if lineage is None: if lineage is None:
lineage = self.lineage lineage = self.lineage
get_mothers = lambda trap: lineage[:, 1][lineage[:, 0] == trap] def fvi(signal):
# get_daughters = lambda trap: lineage[:, 2][lineage[:, 0] == trap] return signal.apply(lambda x: x.first_valid_index(), axis=1)
# birth_events = signal.groupby("trap").apply(lambda x: x.first_valid_index())
fvi = signal.apply(lambda x: x.first_valid_index(), axis=1)
traps_mothers = { traps_mothers = {
tuple(mo): [] for mo in lineage[:, :2] if tuple(mo) in signal.index tuple(mo): [] for mo in lineage[:, :2] if tuple(mo) in signal.index
...@@ -58,7 +57,9 @@ class births(LineageProcess): ...@@ -58,7 +57,9 @@ class births(LineageProcess):
if (trap, mother) in traps_mothers.keys(): if (trap, mother) in traps_mothers.keys():
traps_mothers[(trap, mother)].append(daughter) traps_mothers[(trap, mother)].append(daughter)
mothers = signal.loc[set(signal.index).intersection(traps_mothers.keys())] mothers = signal.loc[
set(signal.index).intersection(traps_mothers.keys())
]
births = pd.DataFrame( births = pd.DataFrame(
np.zeros((mothers.shape[0], signal.shape[1])).astype(bool), np.zeros((mothers.shape[0], signal.shape[1])).astype(bool),
index=mothers.index, index=mothers.index,
...@@ -68,7 +69,9 @@ class births(LineageProcess): ...@@ -68,7 +69,9 @@ class births(LineageProcess):
for mother_id, daughters in traps_mothers.items(): for mother_id, daughters in traps_mothers.items():
daughters_idx = set( daughters_idx = set(
fvi.loc[ fvi.loc[
fvi.index.intersection(list(product((mother_id[0],), daughters))) fvi.index.intersection(
list(product((mother_id[0],), daughters))
)
].values ].values
).difference({0}) ).difference({0})
births.loc[ births.loc[
......
...@@ -12,7 +12,10 @@ poses = [ ...@@ -12,7 +12,10 @@ poses = [
gr = Group( gr = Group(
GroupParameters( GroupParameters(
signals=["/extraction/general/None/area", "/extraction/mCherry/np_max/median"] signals=[
"/extraction/general/None/area",
"/extraction/mCherry/np_max/median",
]
) )
) )
gr.run( gr.run(
......
...@@ -59,7 +59,6 @@ class Grouper(ABC): ...@@ -59,7 +59,6 @@ class Grouper(ABC):
@property @property
def siglist_grouped(self) -> None: def siglist_grouped(self) -> None:
if not hasattr(self, "_siglist_grouped"): if not hasattr(self, "_siglist_grouped"):
self._siglist_grouped = Counter( self._siglist_grouped = Counter(
[x for s in self.signals.values() for x in s.siglist] [x for s in self.signals.values() for x in s.siglist]
...@@ -117,7 +116,8 @@ class Grouper(ABC): ...@@ -117,7 +116,8 @@ class Grouper(ABC):
nsignals_dif = len(self.signals) - len(sitems) nsignals_dif = len(self.signals) - len(sitems)
if nsignals_dif: if nsignals_dif:
print( print(
f"Grouper:Warning: {nsignals_dif} signals do not contain channel {path}" f"Grouper:Warning: {nsignals_dif} signals do not contain"
f" channel {path}"
) )
if pool or pool is None: if pool or pool is None:
...@@ -195,7 +195,6 @@ class NameGrouper(Grouper): ...@@ -195,7 +195,6 @@ class NameGrouper(Grouper):
return self._group_names return self._group_names
def aggregate_multisignals(self, paths=None, **kwargs): def aggregate_multisignals(self, paths=None, **kwargs):
aggregated = pd.concat( aggregated = pd.concat(
[ [
self.concat_signal(path, reduce_cols=np.nanmean, **kwargs) self.concat_signal(path, reduce_cols=np.nanmean, **kwargs)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment