From 723ca2c79140e195a575ba206e60699ca2a16b3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Al=C3=A1n=20Mu=C3=B1oz?= <amuoz@ed.ac.uk> Date: Wed, 13 Jul 2022 19:41:02 +0100 Subject: [PATCH] sytyle(blackify): cleanup and use pyproject limit --- core/functions/tracks.py | 62 ++++-- core/multisignal/align.py | 16 +- core/multisignal/crosscorr.py | 8 +- core/multisignal/mi.py | 22 +- core/old/cell.py | 23 --- core/old/ph.py | 364 ---------------------------------- core/old/postprocessor.py | 106 ---------- core/processes/autoreg.py | 18 +- core/processes/births.py | 19 +- examples/group.py | 5 +- grouper.py | 5 +- 11 files changed, 108 insertions(+), 540 deletions(-) delete mode 100644 core/old/cell.py delete mode 100644 core/old/ph.py delete mode 100644 core/old/postprocessor.py diff --git a/core/functions/tracks.py b/core/functions/tracks.py index 5fecedd9..b5481329 100644 --- a/core/functions/tracks.py +++ b/core/functions/tracks.py @@ -12,7 +12,6 @@ import pandas as pd from utils_find_1st import find_1st, cmp_larger import more_itertools as mit -from scipy.signal import savgol_filter # from scipy.optimize import linear_sum_assignment # from scipy.optimize import curve_fit @@ -42,12 +41,13 @@ def max_ntps(track: pd.Series) -> int: def max_nonstop_ntps(track: pd.Series) -> int: nona_tracks = track.notna() consecutive_nonas_grouped = [ - len(list(x)) for x in mit.consecutive_groups(np.flatnonzero(nona_tracks)) + len(list(x)) + for x in mit.consecutive_groups(np.flatnonzero(nona_tracks)) ] return max(consecutive_nonas_grouped) -def get_tracks_ntps(tracks: pd.DataFrame) -> pd.Series: +def get_tracks_ntps(tracks: pd.DataFrame) -> pd.FrameorSeriesUnion: return tracks.apply(max_ntps, axis=1) @@ -73,7 +73,9 @@ def get_avg_grs(tracks: pd.DataFrame) -> pd.DataFrame: return tracks.apply(get_avg_gr, axis=1) -def clean_tracks(tracks, min_len: int = 15, min_gr: float = 1.0) -> pd.DataFrame: +def clean_tracks( + tracks, min_len: int = 15, min_gr: float = 1.0 +) -> pd.DataFrame: """ Clean small non-growing tracks and return the reduced dataframe @@ -227,7 +229,9 @@ def get_joinable(tracks, smooth=False, tol=0.1, window=5, degree=3) -> dict: clean = clean_tracks( tracks, min_len=window + 1, min_gr=0.9 ) # get useful tracks - savgol_on_srs = lambda x: non_uniform_savgol(x.index, x.values, window, degree) + savgol_on_srs = lambda x: non_uniform_savgol( + x.index, x.values, window, degree + ) contig = clean.groupby(["trap"]).apply(get_contiguous_pairs) contig = contig.loc[contig.apply(len) > 0] flat = set([k for v in contig.values for i in v for j in i for k in j]) @@ -236,7 +240,9 @@ def get_joinable(tracks, smooth=False, tol=0.1, window=5, degree=3) -> dict: contig = tracks.groupby(["trap"]).apply(get_contiguous_pairs) contig = contig.loc[contig.apply(len) > 0] flat = set([k for v in contig.values for i in v for j in i for k in j]) - smoothed_tracks = tracks.loc[flat].apply(lambda x: np.array(x.values), axis=1) + smoothed_tracks = tracks.loc[flat].apply( + lambda x: np.array(x.values), axis=1 + ) # fetch edges from ids TODO (IF necessary, here we can compare growth rates) idx_to_edge = lambda preposts: [ @@ -263,7 +269,9 @@ def get_joinable(tracks, smooth=False, tol=0.1, window=5, degree=3) -> dict: pre_res.append( np.poly1d(np.polyfit(range(len(y)), y, 1))(len(y) + 1), ) - pos_res = [get_means(smoothed_tracks.loc[post], window) for post in posts] + pos_res = [ + get_means(smoothed_tracks.loc[post], window) for post in posts + ] result.append([pre_res, pos_res]) return result @@ -296,7 +304,8 @@ def get_joinable(tracks, smooth=False, tol=0.1, window=5, degree=3) -> dict: # match local with global ids joinable_ids = [ - localid_to_idx(closest_pairs.loc[i], contig.loc[i]) for i in closest_pairs.index + localid_to_idx(closest_pairs.loc[i], contig.loc[i]) + for i in closest_pairs.index ] return [pair for pairset in joinable_ids for pair in pairset] @@ -341,7 +350,9 @@ def localid_to_idx(local_ids, contig_trap): for i, pairs in enumerate(local_ids): if len(pairs): for left, right in pairs: - lin_pairs.append((contig_trap[i][0][left], contig_trap[i][1][right])) + lin_pairs.append( + (contig_trap[i][0][left], contig_trap[i][1][right]) + ) return lin_pairs @@ -355,11 +366,14 @@ def get_dMetric_wrap(lst: List, **kwargs): def solve_matrices_wrap(dMetric: List, edges: List, **kwargs): return [ - solve_matrices(mat, edgeset, **kwargs) for mat, edgeset in zip(dMetric, edges) + solve_matrices(mat, edgeset, **kwargs) + for mat, edgeset in zip(dMetric, edges) ] -def get_dMetric(pre: List[float], post: List[float], tol: Union[float, int] = 1): +def get_dMetric( + pre: List[float], post: List[float], tol: Union[float, int] = 1 +): """Calculate a cost matrix input @@ -376,13 +390,18 @@ def get_dMetric(pre: List[float], post: List[float], tol: Union[float, int] = 1) else: dMetric = np.abs(np.subtract.outer(pre, post)) - dMetric[np.isnan(dMetric)] = tol + 1 + np.nanmax(dMetric) # nans will be filtered + dMetric[np.isnan(dMetric)] = ( + tol + 1 + np.nanmax(dMetric) + ) # nans will be filtered return dMetric -def solve_matrices(dMetric: np.ndarray, prepost: List, tol: Union[float, int] = 1): +def solve_matrices( + dMetric: np.ndarray, prepost: List, tol: Union[float, int] = 1 +): """ - Solve the distance matrices obtained in get_dMetric and/or merged from independent dMetric matrices + Solve the distance matrices obtained in get_dMetric and/or merged from + independent dMetric matrices """ ids = solve_matrix(dMetric) @@ -399,7 +418,9 @@ def solve_matrices(dMetric: np.ndarray, prepost: List, tol: Union[float, int] = return [idx for idx, res in zip(zip(*ids), result) if res <= tol] -def get_closest_pairs(pre: List[float], post: List[float], tol: Union[float, int] = 1): +def get_closest_pairs( + pre: List[float], post: List[float], tol: Union[float, int] = 1 +): """Calculate a cost matrix the Hungarian algorithm to pick the best set of options @@ -479,12 +500,13 @@ def get_contiguous_pairs(tracks: pd.DataFrame) -> list: :param tracks: (m x n) dataframe where rows are cell tracks and columns are timepoints - :param min_dgr: float minimum difference in growth rate from the interpolation + :param min_dgr: float minimum difference in growth rate from + the interpolation """ - # indices = np.where(tracks.notna()) mins, maxes = [ - tracks.notna().apply(np.where, axis=1).apply(fn) for fn in (np.min, np.max) + tracks.notna().apply(np.where, axis=1).apply(fn) + for fn in (np.min, np.max) ] mins_d = mins.groupby(mins).apply(lambda x: x.index.tolist()) @@ -492,7 +514,9 @@ def get_contiguous_pairs(tracks: pd.DataFrame) -> list: # TODO add support for skipping time points maxes_d = maxes.groupby(maxes).apply(lambda x: x.index.tolist()) - common = sorted(set(mins_d.index).intersection(maxes_d.index), reverse=True) + common = sorted( + set(mins_d.index).intersection(maxes_d.index), reverse=True + ) return [(maxes_d[t], mins_d[t]) for t in common] diff --git a/core/multisignal/align.py b/core/multisignal/align.py index 11bada8a..5baa4fa8 100644 --- a/core/multisignal/align.py +++ b/core/multisignal/align.py @@ -121,8 +121,12 @@ class align(PostProcessABC): # Remove bits of traces before first event if self.slice_before_first_event: # minus sign in front of shift_list to shift to the left - mask_aligned = df_shift(mask_aligned, common_index.to_list(), -shift_list) - trace_aligned = df_shift(trace_aligned, common_index.to_list(), -shift_list) + mask_aligned = df_shift( + mask_aligned, common_index.to_list(), -shift_list + ) + trace_aligned = df_shift( + trace_aligned, common_index.to_list(), -shift_list + ) # Do not remove bits of traces before first event else: # Add columns to left, filled with NaNs @@ -130,7 +134,11 @@ class align(PostProcessABC): mask_aligned = df_extend_nan(mask_aligned, max_shift) trace_aligned = df_extend_nan(trace_aligned, max_shift) # shift each - mask_aligned = df_shift(mask_aligned, common_index.to_list(), -shift_list) - trace_aligned = df_shift(trace_aligned, common_index.to_list(), -shift_list) + mask_aligned = df_shift( + mask_aligned, common_index.to_list(), -shift_list + ) + trace_aligned = df_shift( + trace_aligned, common_index.to_list(), -shift_list + ) return trace_aligned, mask_aligned diff --git a/core/multisignal/crosscorr.py b/core/multisignal/crosscorr.py index 52998514..79f8b4dc 100644 --- a/core/multisignal/crosscorr.py +++ b/core/multisignal/crosscorr.py @@ -63,12 +63,16 @@ class crosscorr(PostProcessABC): # deviation from mean at each time point dmean_A = trace_A - np.nanmean(trace_A, axis=0).reshape((1, n_tps)) # standard deviation over time for each replicate - stdA = np.sqrt(np.nanmean(dmean_A ** 2, axis=1).reshape((n_replicates, 1))) + stdA = np.sqrt( + np.nanmean(dmean_A**2, axis=1).reshape((n_replicates, 1)) + ) if trace_dfB is not None: trace_B = trace_dfB.to_numpy() # cross correlation dmean_B = trace_B - np.nanmean(trace_B, axis=0).reshape((1, n_tps)) - stdB = np.sqrt(np.nanmean(dmean_B ** 2, axis=1).reshape((n_replicates, 1))) + stdB = np.sqrt( + np.nanmean(dmean_B**2, axis=1).reshape((n_replicates, 1)) + ) else: # auto correlation dmean_B = dmean_A diff --git a/core/multisignal/mi.py b/core/multisignal/mi.py index c7a69e02..cddcbaf5 100644 --- a/core/multisignal/mi.py +++ b/core/multisignal/mi.py @@ -133,7 +133,10 @@ class mi(PostProcessABC): n_classes = len(data) Xo = np.vstack([timeseries for timeseries in data]) y = np.hstack( - [i * np.ones(timeseries.shape[0]) for i, timeseries in enumerate(data)] + [ + i * np.ones(timeseries.shape[0]) + for i, timeseries in enumerate(data) + ] ) if self.overtime: @@ -152,10 +155,15 @@ class mi(PostProcessABC): X = Xo[:, :duration] # initialise sself.cikit-learn routines - nPCArange = range(1, X.shape[1] + 1) if X.shape[1] < 7 else [3, 4, 5, 6, 7] + nPCArange = ( + range(1, X.shape[1] + 1) if X.shape[1] < 7 else [3, 4, 5, 6, 7] + ) params = [ {"project__n_components": nPCArange}, - {"classifier__kernel": ["linear"], "classifier__C": self.Crange}, + { + "classifier__kernel": ["linear"], + "classifier__C": self.Crange, + }, { "classifier__kernel": ["rbf"], "classifier__C": self.Crange, @@ -196,9 +204,13 @@ class mi(PostProcessABC): y_predict = pipe.predict(X_test) # estimate mutual information p_y = 1 / n_classes - p_yhat_given_y = confusion_matrix(y_test, y_predict, normalize="true") + p_yhat_given_y = confusion_matrix( + y_test, y_predict, normalize="true" + ) p_yhat = np.sum(p_y * p_yhat_given_y, 0) - h_yhat = -np.sum(p_yhat[p_yhat > 0] * np.log2(p_yhat[p_yhat > 0])) + h_yhat = -np.sum( + p_yhat[p_yhat > 0] * np.log2(p_yhat[p_yhat > 0]) + ) log2_p_yhat_given_y = np.ma.log2(p_yhat_given_y).filled(0) h_yhat_given_y = -np.sum( p_y * np.sum(p_yhat_given_y * log2_p_yhat_given_y, 1) diff --git a/core/old/cell.py b/core/old/cell.py deleted file mode 100644 index f96f30d5..00000000 --- a/core/old/cell.py +++ /dev/null @@ -1,23 +0,0 @@ -""" -Main data processing functions. The inputs for this functions must be data series. -""" - -import numpy as np -from pandas import Series, DataFrames - -def bg_sub(data:Series, background:float): - return data - background - -def ratio(data1:Series, data2:Series): - return data1 / data2 - -def norm_tps(data:Series, tps:list, fun=np.nanmean): - return data / fun(data.loc(axis=1)[tps]) - -def growth_rate(data:Series, alg=None, filt = 'savgol'): - if alg is None: - alg='standard' - - - - diff --git a/core/old/ph.py b/core/old/ph.py deleted file mode 100644 index a28a8055..00000000 --- a/core/old/ph.py +++ /dev/null @@ -1,364 +0,0 @@ -from typing import Dict, List, Union -import re - -import numpy as np -import pandas as pd -from pandas import Series, DataFrame -from sklearn.cluster import KMeans -from matplotlib import pyplot as plt -import seaborn as sns - -import compress_pickle - -from postprocessor.core.postprocessor import PostProcessor -from postprocessor.core.tracks import get_avg_grs, non_uniform_savgol -from postprocessor.core.ph import * - - -def filter_by_gfp(dfs): - gfps = pd.concat([t[("GFPFast_bgsub", np.maximum, "median")] for t in dfs]) - avgs_gfp = gfps.mean(axis=1) - high_gfp = get_high_k2(avgs_gfp) - # high_gfp = avgs_gfp[avgs_gfp > 200] - - return high_gfp - - -def filter_by_area(dfs, min=50): - areas = pd.concat([t[("general", None, "area")] for t in dfs]) - avgs_areas = areas[(areas.notna().sum(axis=1) > areas.shape[1] // (1.25))].mean( - axis=1 - ) - avgs_areas = avgs_areas[(avgs_areas > min)] - - return avgs_areas - - -def get_high_k2(df): - kmeans = KMeans(n_clusters=2) - vals = df.values.reshape(-1, 1) - kmeans.fit(vals) - high_clust_id = kmeans.cluster_centers_.argmax() - - return df.loc[kmeans.predict(vals) == high_clust_id] - - -def get_concats(dfs, keys): - return pd.concat([t.get((keys), pd.DataFrame()) for t in dfs]) - - -def get_dfs(pp): - dfs = [pp.extraction[pp.expt.positions[i]] for i in range(len(pp.expt.positions))] - return dfs - - -combine_dfs = lambda dfs: {k: get_concats(dfs, k) for k in dfs[0].keys()} - - -def merge_channels(pp, min_area=50): - dfs = get_dfs(pp) - # rats = get_concats(dfs, ("em_ratio_bgsub", np.maximum, "median")) - # gfps = filter_by_gfp(dfs) - - avgs_area = filter_by_area(dfs, min=50) - # ids = [x for x in set(gfps.index).intersection(avgs_area.index)] - ids = avgs_area.index - - new_dfs = combine_dfs(dfs) - - h = pd.DataFrame( - { - k[0] + "_" + k[2]: v.loc[ids].mean(axis=1) - for k, v in new_dfs.items() - if k[-1] != "imBackground" - } - ) - return h - - -def process_phs(pp, min_area=200): - h = merge_channels(pp, min_area) - h.index.names = ["pos", "trap", "cell"] - ids = h.index - h = h.reset_index() - - h["ph"] = h["pos"].apply(lambda x: float(x[3:7].replace("_", "."))) - h["max5_d_med"] = h["mCherry_max2p5pc"] / h["mCherry_median"] - - h = h.set_index(ids) - h = h.drop(["pos", "trap", "cell"], axis=1) - return h - - -def growth_rate( - data: Series, alg=None, filt={"kind": "savgol", "window": 5, "degree": 3} -): - window = filt["window"] - degree = filt["degree"] - if alg is None: - alg = "standard" - - if filt: # TODO add support for multiple algorithms - data = Series( - non_uniform_savgol( - data.dropna().index, data.dropna().values, window, degree - ), - index=data.dropna().index, - ) - - return Series(np.convolve(data, diff_kernel, "same"), index=data.dropna().index) - - -# import numpy as np - -# diff_kernel = np.array([1, -1]) -# gr = clean.apply(growth_rate, axis=1) -# from postprocessor.core.tracks import non_uniform_savgol, clean_tracks - - -def sort_df(df, by="first", rev=True): - nona = df.notna() - if by == "len": - idx = nona.sum(axis=1) - elif by == "first": - idx = nona.idxmax(axis=1) - idx = idx.sort_values().index - - if rev: - idx = idx[::-1] - - return df.loc[idx] - - -# test = tmp[("GFPFast", np.maximum, "median")] -# test2 = tmp[("pHluorin405", np.maximum, "median")] -# ph = test / test2 -# ph = ph.stack().reset_index(1) -# ph.columns = ["tp", "fl"] - - -def m2p5_med(ext, ch, red=np.maximum): - m2p5pc = ext[(ch, red, "max2p5pc")] - med = ext[(ch, red, "median")] - - result = m2p5pc / med - - return result - - -def plot_avg(df): - df = df.stack().reset_index(1) - df.columns = ["tp", "val"] - - sns.relplot(x=df["tp"], y=df["val"], kind="line") - plt.show() - - -def split_data(df: DataFrame, splits: List[int]): - dfs = [df.iloc[:, i:j] for i, j in zip((0,) + splits, splits + (df.shape[1],))] - return dfs - - -def growth_rate( - data: Series, alg=None, filt={"kind": "savgol", "window": 7, "degree": 3} -): - if alg is None: - alg = "standard" - - if filt: # TODO add support for multiple algorithms - window = filt["window"] - degree = filt["degree"] - data = Series( - non_uniform_savgol( - data.dropna().index, data.dropna().values, window, degree - ), - index=data.dropna().index, - ) - - diff_kernel = np.array([1, -1]) - - return Series(np.convolve(data, diff_kernel, "same"), index=data.dropna().index) - - -# pp = PostProcessor(source=19831) -# pp.load_tiler_cells() -# f = "/home/alan/Documents/sync_docs/libs/postproc/gluStarv_2_0_x2_dual_phl_ura8_00/extraction" -# pp.load_extraction( -# "/home/alan/Documents/sync_docs/libs/postproc/postprocessor/" -# + pp.expt.name -# + "/extraction/" -# ) -# tmp = pp.extraction["phl_ura8_002"] - - -def _check_bg(data): - for k in list(pp.extraction.values())[0].keys(): - for p in pp.expt.positions: - if k not in pp.extraction[p]: - print(p, k) - - -# data = { -# k: pd.concat([pp.extraction[pos][k] for pos in pp.expt.positions[:-3]]) -# for k in list(pp.extraction.values())[0].keys() -# } - - -def hmap(df, **kwargs): - g = sns.heatmap(sort_df(df), robust=True, cmap="mako_r", **kwargs) - plt.xlabel("") - return g - - -# from random import randint -# x = randint(0, len(smooth)) -# plt.plot(clean.iloc[x], 'b') -# plt.plot(smooth.iloc[x], 'r') -# plt.show() - - -# data = tmp -# df = data[("general", None, "area")] -# clean = clean_tracks(df, min_len=160) -# clean = clean.loc[clean.notna().sum(axis=1) > 9] -# gr = clean.apply(growth_rate, axis=1) -# splits = (72, 108, 180) -# gr_sp = split_data(gr, splits) - -# idx = gr.index - -# bg = get_bg(data) -# test = data[("GFPFast", np.maximum, "median")] -# test2 = data[("pHluorin405", np.maximum, "median")] -# ph = (test / test2).loc[idx] -# c = pd.concat((ph.mean(1), gr.max(1)), axis=1) -# c.columns = ["ph", "gr_max"] -# # ph = ph.stack().reset_index(1) -# # ph.columns = ['tp', 'fl'] - -# ph_sp = split_data(gr, splits) - - -def get_bg(data): - bg = {} - fl_subkeys = [ - x - for x in data.keys() - if x[0] in ["GFP", "GFPFast", "mCherry", "pHluorin405"] - and x[-1] != "imBackground" - ] - for k in fl_subkeys: - nk = list(k) - bk = tuple(nk[:-1] + ["imBackground"]) - nk = tuple(nk[:-1] + [nk[-1] + "_BgSub"]) - tmp = [] - for i, v in data[bk].iterrows(): - if i in data[k].index: - newdf = data[k].loc[i] / v - newdf.index = pd.MultiIndex.from_tuples([(*i, c) for c in newdf.index]) - tmp.append(newdf) - bg[nk] = pd.concat(tmp) - - return bg - - -def calc_ph(bg): - fl_subkeys = [x for x in bg.keys() if x[0] in ["GFP", "GFPFast", "pHluorin405"]] - chs = list(set([x[0] for x in fl_subkeys])) - assert len(chs) == 2, "Too many channels" - ch1 = [x[1:] for x in fl_subkeys if x[0] == chs[0]] - ch2 = [x[1:] for x in fl_subkeys if x[0] == chs[1]] - inter = list(set(ch1).intersection(ch2)) - ph = {} - for red_fld in inter: - ph[tuple(("ph",) + red_fld)] = ( - bg[tuple((chs[0],) + red_fld)] / bg[tuple((chs[1],) + red_fld)] - ) - - -def get_traps(pp): - t0 = {} - for pos in pp.tiler.positions: - pp.tiler.current_position = pos - t0[pos] = pp.tiler.get_traps_timepoint( - 0, channels=[0, pp.tiler.channels.index("mCherry")], z=[0, 1, 2, 3, 4] - ) - - return t0 - - -def get_pos_ph(pp): - pat = re.compile(r"ph_([0-9]_[0-9][0-9])") - return { - pos: float(pat.findall(pos)[0].replace("_", ".")) for pos in pp.tiler.positions - } - - -def plot_sample_bf_mch(pp): - bf_mch = get_traps(pp) - ts = [{i: v[:, j, ...] for i, v in bf_mch.items()} for j in [0, 1]] - tsbf = {i: v[:, 0, ...] for i, v in bf_mch.items()} - - posdict = {k: v for k, v in get_pos_ph(pp).items()} - posdict = {v: k for k, v in posdict.items()} - posdict = {v: k for k, v in posdict.items()} - ph = np.unique(list(posdict.values())).tolist() - counters = {ph: 0 for ph in ph} - n = [np.random.randint(ts[0][k].shape[0]) for k in posdict.keys()] - - fig, axes = plt.subplots(2, 5) - for k, (t, name) in enumerate(zip(ts, ["Bright field", "mCherry"])): - for i, (pos, ph) in enumerate(posdict.items()): - # i = ph.index(posdict[pos]) - axes[k, i].grid(False) - axes[k, i].set( - xticklabels=[], - yticklabels=[], - ) - axes[k, i].set_xlabel(posdict[pos] if k else None, fontsize=28) - axes[k, i].imshow( - np.maximum.reduce(t[pos][n[i], 0], axis=2), - cmap="gist_gray" if not k else None, - ) - # counters[posdict[pos]] += 1 - plt.tick_params( - axis="x", # changes apply to the x-axis - which="both", # both major and minor ticks are affected - bottom=False, # ticks along the bottom edge are off - top=False, # ticks along the top edge are off - labelbottom=False, - ) # labels along the bottom edge are off - axes[k, 0].set_ylabel(name, fontsize=28) - plt.tight_layout() - plt.show() - - -# Plotting calibration curve -from scipy.optimize import curve_fit - - -def fit_calibration(h): - ycols = [x for x in h.columns if "em_ratio" in x] - xcol = "ph" - - def objective(x, a, b): - return a * x + b - - # fig, axes = plt.subplots(1, len(ycols)) - # for i, ycol in enumerate(ycols): - # d = h[[xcol, ycol]] - # params, _ = curve_fit(objective, *[d[col].values for col in d.columns]) - # sns.lineplot(x=xcol, y=ycol, data=h, alpha=0.5, err_style="bars", ax=axes[i]) - # # sns.lineplot(d[xcol], objective(d[xcol].values, *params), ax=axes[i]) - # plt.show() - - ycol = "em_ratio_mean" - d = h[[xcol, *ycols]] - tmp = d.groupby("ph").mean() - calibs = {ycol: curve_fit(objective, tmp.index, tmp[ycol])[0] for ycol in ycols} - # sns.lineplot(x=xcol, y=ycol, data=d, alpha=0.5, err_style="bars") - # plt.xlabel("pH") - # plt.ylabel("pHluorin emission ratio") - # sns.lineplot(d[xcol], objective(d[xcol], *params)) - - return calibs diff --git a/core/old/postprocessor.py b/core/old/postprocessor.py deleted file mode 100644 index 183b277a..00000000 --- a/core/old/postprocessor.py +++ /dev/null @@ -1,106 +0,0 @@ -import pkg_resources -from pathlib import Path, PosixPath -from typing import Union, List, Dict -from datetime import datetime - -from compress_pickle import load, dump - -from core.experiment import Experiment -from core.cells import Cells -from core.segment import Tiler -from core.io.matlab import matObject -from core.experiment import Experiment - - -class PostProcessor: - ''' - Base class to perform feature extraction. - :param parameters: Parameters class with channels, reduction functions and - extraction functions to use. - :param source: Origin of experiment, if int it is assumed from Omero, if str - or Path - ''' - def __init__(self, parameters=None, source: Union[int, str, Path] = None): - # self.params = parameters - if source is not None: - if type(source) is int: - self.expt_id = source - self.load_expt(source, omero=True) - elif type(source) is str or PosixPath: - self.load_expt(source, omero=False) - - @property - def channels(self): - if not hasattr(self, '_channels'): - if type(self.params.tree) is dict: - self._channels = tuple(self.params.tree.keys()) - - return self._channels - - @property - def current_position(self): - assert hasattr(self, 'expt'), 'No experiment loaded.' - - if hasattr(self, 'tiler'): - assert self.expt.current_position.name == self.tiler.current_position - - return self.expt.current_position - - def load_expt(self, source: Union[int, str], omero: bool = False) -> None: - if omero: - self.expt = Experiment.from_source( - self.expt_id, #Experiment ID on OMERO - 'upload', #OMERO Username - 'gothamc1ty', #OMERO Password - 'islay.bio.ed.ac.uk', #OMERO host - port=4064 #This is default - ) - else: - self.expt = Experiment.from_source(source) - self.expt_id = self.expt.exptID - - def load_tiler_cells(self) -> None: - self.tiler = Tiler(self.expt) - self.cells = Cells() - self.cells = self.cells.from_source( - self.expt.current_position.annotation) - - def get_pos_mo_bud(self): - annot = self.expt._get_position_annotation( - self.expt.current_position.name) - matob = matObject(annot) - m = matob["timelapseTrapsOmero"].get("cellMothers", None) - if m is not None: - ids = np.nonzero(m.todense()) - d = {(self.expt.current_position.name, i, int(m[i, j])): [] - for i, j in zip(*ids)} - for i, j, k in zip(*ids, d.keys()): - d[k].append((self.expt.current_position.name, i, j + 1)) - else: - print("Pos {} has no mother matrix".format( - self.expt.current_position.name)) - d = {} - - return d - - def get_exp_mo_bud(self): - d = {} - for pos in self.expt.positions: - self.expt.current_position = pos - d = {**d, **self.get_pos_mo_bud()} - - self.expt.current_position = self.expt.positions[0] - - return d - - def load_extraction(self, folder=None) -> None: - if folder is None: - folder = Path(self.expt.name + '/extraction') - - self.extraction = {} - for pos in self.expt.positions: - try: - self.extraction[pos] = load(folder / Path(pos + '.gz')) - - except: - print(pos, ' not found') diff --git a/core/processes/autoreg.py b/core/processes/autoreg.py index 7a1f58d0..9dc65bc3 100644 --- a/core/processes/autoreg.py +++ b/core/processes/autoreg.py @@ -121,7 +121,9 @@ class autoreg(PostProcessABC): # ar_coeffs: 1D array of coefficients (i.e. phi values) sample_acfs_toeplitz = linalg.toeplitz(sample_acfs[0:ar_order]) # phi vector goes from 1 to P in Jia & Grima (2020) - ar_coeffs = linalg.inv(sample_acfs_toeplitz).dot(sample_acfs[1 : ar_order + 1]) + ar_coeffs = linalg.inv(sample_acfs_toeplitz).dot( + sample_acfs[1 : ar_order + 1] + ) # defines a dummy phi_0 as 1. This is so that the indices I use in # get_noise_param are consistent with Jia & Grima (2020) ar_coeffs = np.insert(ar_coeffs, 0, 1.0, axis=0) @@ -307,12 +309,18 @@ class autoreg(PostProcessABC): freq_npoints=self.freq_npoints, ar_order=optimal_ar_order, ), - optimal_ar_order + optimal_ar_order, ), ) - freqs_df = pd.DataFrame([element.freqs for element in axes], index=signal.index) - power_df = pd.DataFrame([element.power for element in axes], index=signal.index) - order_df = pd.DataFrame([element.order for element in axes], index=signal.index) + freqs_df = pd.DataFrame( + [element.freqs for element in axes], index=signal.index + ) + power_df = pd.DataFrame( + [element.power for element in axes], index=signal.index + ) + order_df = pd.DataFrame( + [element.order for element in axes], index=signal.index + ) return freqs_df, power_df, order_df diff --git a/core/processes/births.py b/core/processes/births.py index 834a273d..84a9e50c 100644 --- a/core/processes/births.py +++ b/core/processes/births.py @@ -41,15 +41,14 @@ class births(LineageProcess): def load_lineage(self, lineage): self.lineage = lineage - def run(self, signal: pd.DataFrame, lineage: np.ndarray = None) -> pd.DataFrame: + def run( + self, signal: pd.DataFrame, lineage: np.ndarray = None + ) -> pd.DataFrame: if lineage is None: lineage = self.lineage - get_mothers = lambda trap: lineage[:, 1][lineage[:, 0] == trap] - # get_daughters = lambda trap: lineage[:, 2][lineage[:, 0] == trap] - - # birth_events = signal.groupby("trap").apply(lambda x: x.first_valid_index()) - fvi = signal.apply(lambda x: x.first_valid_index(), axis=1) + def fvi(signal): + return signal.apply(lambda x: x.first_valid_index(), axis=1) traps_mothers = { tuple(mo): [] for mo in lineage[:, :2] if tuple(mo) in signal.index @@ -58,7 +57,9 @@ class births(LineageProcess): if (trap, mother) in traps_mothers.keys(): traps_mothers[(trap, mother)].append(daughter) - mothers = signal.loc[set(signal.index).intersection(traps_mothers.keys())] + mothers = signal.loc[ + set(signal.index).intersection(traps_mothers.keys()) + ] births = pd.DataFrame( np.zeros((mothers.shape[0], signal.shape[1])).astype(bool), index=mothers.index, @@ -68,7 +69,9 @@ class births(LineageProcess): for mother_id, daughters in traps_mothers.items(): daughters_idx = set( fvi.loc[ - fvi.index.intersection(list(product((mother_id[0],), daughters))) + fvi.index.intersection( + list(product((mother_id[0],), daughters)) + ) ].values ).difference({0}) births.loc[ diff --git a/examples/group.py b/examples/group.py index 1e59978a..990f5616 100644 --- a/examples/group.py +++ b/examples/group.py @@ -12,7 +12,10 @@ poses = [ gr = Group( GroupParameters( - signals=["/extraction/general/None/area", "/extraction/mCherry/np_max/median"] + signals=[ + "/extraction/general/None/area", + "/extraction/mCherry/np_max/median", + ] ) ) gr.run( diff --git a/grouper.py b/grouper.py index 3dd29a3d..a3b947b3 100644 --- a/grouper.py +++ b/grouper.py @@ -59,7 +59,6 @@ class Grouper(ABC): @property def siglist_grouped(self) -> None: - if not hasattr(self, "_siglist_grouped"): self._siglist_grouped = Counter( [x for s in self.signals.values() for x in s.siglist] @@ -117,7 +116,8 @@ class Grouper(ABC): nsignals_dif = len(self.signals) - len(sitems) if nsignals_dif: print( - f"Grouper:Warning: {nsignals_dif} signals do not contain channel {path}" + f"Grouper:Warning: {nsignals_dif} signals do not contain" + f" channel {path}" ) if pool or pool is None: @@ -195,7 +195,6 @@ class NameGrouper(Grouper): return self._group_names def aggregate_multisignals(self, paths=None, **kwargs): - aggregated = pd.concat( [ self.concat_signal(path, reduce_cols=np.nanmean, **kwargs) -- GitLab