diff --git a/__init__.py b/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/examples/basic_processes.py b/examples/basic_processes.py deleted file mode 100644 index c322fd4b68b2dd4f310a44bcd2a4ccbebb366f48..0000000000000000000000000000000000000000 --- a/examples/basic_processes.py +++ /dev/null @@ -1,15 +0,0 @@ -from postprocessor.core.processor import PostProcessor, PostProcessorParameters - -params = PostProcessorParameters.default() -pp = PostProcessor( - "/shared_libs/pipeline-core/scripts/pH_calibration_dual_phl__ura8__by4741__01/ph_5_29_025store.h5", - params, -) -tmp = pp.run() - -import h5py - -# f = h5py.File( -# "/shared_libs/pipeline-core/scripts/pH_calibration_dual_phl__ura8__by4741__01/ph_5_29_025store.h5", -# "a", -# ) diff --git a/examples/group.py b/examples/group.py deleted file mode 100644 index 1e59978acca5cd34c7796cb65d93d95d1657773c..0000000000000000000000000000000000000000 --- a/examples/group.py +++ /dev/null @@ -1,21 +0,0 @@ -from pathlib import Path - -from postprocessor.core.group import GroupParameters, Group - -poses = [ - x.name.split("store")[0] - for x in Path( - "/shared_libs/pipeline-core/scripts/data/ph_calibration_dual_phl_ura8_5_04_5_83_7_69_7_13_6_59__01" - ).rglob("*") - if x.name != "images.h5" -] - -gr = Group( - GroupParameters( - signals=["/extraction/general/None/area", "/extraction/mCherry/np_max/median"] - ) -) -gr.run( - central_store="/shared_libs/pipeline-core/scripts/data/ph_calibration_dual_phl_ura8_5_04_5_83_7_69_7_13_6_59__01", - poses=poses, -) diff --git a/examples/testing.py b/examples/testing.py deleted file mode 100644 index d5420d8337710e6a41f47a7cec436ed2a765b5f0..0000000000000000000000000000000000000000 --- a/examples/testing.py +++ /dev/null @@ -1,220 +0,0 @@ -from typing import Dict, List, Union -import re - -import numpy as np -import pandas as pd -from pandas import Series, DataFrame -from sklearn.cluster import KMeans -from matplotlib import pyplot as plt -import seaborn as sns - -import compress_pickle - -from postprocessor.core.postprocessor import PostProcessor -from postprocessor.core.tracks import get_avg_grs, non_uniform_savgol -from postprocessor.core.tracks import clean_tracks, merge_tracks, join_tracks -from postprocessor.core.ph import * - -sns.set_context("talk", font_scale=1.8) -sns.set_theme(style="whitegrid") - -# pp_c = PostProcessor(source=19920) # 19916 -# pp_c.load_tiler_cells() -# # f = "/home/alan/Documents/libs/extraction/extraction/examples/gluStarv_2_0_x2_dual_phl_ura8_00/extraction" -# # f = "/home/alan/Documents/tmp/pH_calibration_dual_phl__ura8__by4741__01/extraction/" -# f = "/home/alan/Documents/tmp/pH_calibration_dual_phl__ura8__by4741_Alan4_00/extraction/" -# pp_c.load_extraction(f) - -# calib = process_phs(pp_c) -# # c = calib.loc[(5.4 < calib["ph"]) & (calib["ph"] < 8)].groupby("ph").mean() -# sns.violinplot(x="ph", y="em_ratio_mean", data=calib) -# plt.show() - - -# bring timelapse data and convert it to pH - -pp = PostProcessor(source=19831) # 19831 -pp.load_tiler_cells() -f = "/home/alan/Documents/tmp/gluStarv_2_0_x2_dual_phl_ura8_00/extraction/" -# f = "/home/alan/Documents/tmp/downUpshift_2_0_2_glu_dual_phluorin__glt1_psa1_ura7__thrice_00/extraction/" -pp.load_extraction(f) - -import compress_pickle - -compress_pickle.dump(pp.extraction, "/home/alan/extraction_example.pkl") - -if True: # Load extracted data or pkld version - new_dfs = compress_pickle.load("/home/alan/Documents/tmp/new_dfs.gz") -# Combine dataframes -else: - new_dfs = combine_dfs(get_dfs(pp)) - # t = [x.index for x in new_dfs.values()] - # i = set(t[0]) - # for j in t: - # i = i.intersection(j) - new_dfs = { - k: v - for k, v in new_dfs.items() - if k[2] != "imBackground" - and k[2] != "median" - and ~(((k[0] == "GFPFast") | (k[0] == "pHluorin405")) and k[2] == "max2p5pc") - } - -del pp -compress_pickle.dump(new_dfs, "/home/alan/Documents/tmp/new_dfs.gz") - - -def get_clean_dfs(dfs=None): - if dfs is None: - clean_dfs = compress_pickle.load("/home/alan/Documents/tmp/clean_dfs.gz") - else: - - # Clean timelapse - clean = clean_tracks(new_dfs[("general", None, "area")]) - tra, joint = merge_tracks(clean) - clean_dfs = new_dfs - i_ids = set(clean.index).intersection( - clean_dfs[("general", None, "area")].index - ) - clean_dfs = {k: v.loc[i_ids] for k, v in clean_dfs.items()} - clean_dfs = {k: join_tracks(v, joint, drop=True) for k, v in clean_dfs.items()} - - del new_dfs - compress_pickle.dump(clean_dfs, "/home/alan/Documents/tmp/clean_dfs.gz") - - -def plot_ph_hmap(clean_dfs): - GFPFast = clean_dfs[("GFPFast", np.maximum, "mean")] - phluorin = clean_dfs[("pHluorin405", np.maximum, "mean")] - ph = GFPFast / phluorin - ph = ph.loc[ph.notna().sum(axis=1) > 0.7 * ph.shape[1]] - ph = 1 / ph - - fig, ax = plt.subplots() - hmap(ph, cbar_kws={"label": r"emission ratio $\propto$ pH"}) - plt.xlabel("Time (hours)") - plt.ylabel("Cells") - xticks = plt.xticks(fontsize=15)[0] - ax.set(yticklabels=[], xticklabels=[str(round(i * 5 / 60, 1)) for i in xticks]) - # plt.setp(ax.get_xticklabels(), Rotation=90) - plt.show() - - -def fit_calibs(c, h): - h = process_phs(pp_c) - h["ratio"] = h["GFPFast_bgsub_median"] / h["pHluorin405_bgsub_median"] - sns.lineplot(x="ph", y="ratio", data=h, err_style="bars") - plt.show() - - calibs = fit_calibration(c) - for k, params in calibs.items(): - i, j = ("_".join(k.split("_")[:-1]), k.split("_")[-1]) - if j == "mean" and "k2" not in k: - clean_dfs[k] = objective(clean_dfs[i, np.maximum, j], *params) - - -# max 2.5% / med -def plot_ratio_vs_max2p5(h): - fig, ax = plt.subplots() - sns.regplot( - x="em_ratio_median", - y="mCherry_max2p5pc", - data=h, - scatter=False, - ax=ax, - color="teal", - ) - sns.scatterplot(x="em_ratio_median", y="max5_d_med", data=h, hue="ph", ax=ax) - plt.xlabel(r"Fluorescence ratio $R \propto (1/pH)$") - plt.ylabel("Max 2.5% px / median") - plt.show() - - -em = clean_dfs[("em_ratio", np.maximum, "mean")] -area = clean_dfs[("general", None, "area")] - - -def get_grs(clean_dfs): - area = clean_dfs[("general", None, "area")] - area = area.loc[area.notna().sum(axis=1) > 10] - return area.apply(growth_rate, axis=1) - - -def get_agg(dfs, rng): - # df dict of DataFrames containing an area/vol one TODO generalise this beyond area - # rng tuple of section to use - grs = get_grs(dfs) - smooth = grs.loc(axis=1)[list(range(rng[0], rng[1]))].dropna(how="all") - - aggregate_mean = lambda dfs, rng: pd.concat( - { - k[0] + "_" + k[2]: dfs[k].loc[smooth.index, rng[0] : rng[1]].mean(axis=1) - for k in clean_dfs.keys() - }, - axis=1, - ) - # f_comp_df = comp_df.loc[(comp_df["gr"] > 0) & (area.notna().sum(axis=1) > 50)] - - agg = aggregate_mean(dfs, rng) - agg["max2_med"] = agg["mCherry_max2p5pc"] / agg["mCherry_mean"] - - for c in agg.columns: - agg[c + "_log"] = np.log(agg[c]) - - agg["gr_mean"] = smooth.loc[set(agg.index).intersection(smooth.index)].mean(axis=1) - agg["gr_max"] = smooth.loc[set(agg.index).intersection(smooth.index)].max(axis=1) - - return agg - - -def plot_scatter_fit(x, y, data, hue=None, xlabel=None, ylabel=None, ylim=None): - fig, ax = plt.subplots() - sns.regplot(x=x, y=y, data=data, scatter=False, ax=ax) - sns.scatterplot(x=x, y=y, data=data, ax=ax, alpha=0.1, hue=hue) - # plt.show() - if xlabel is not None: - plt.xlabel(xlabel) - if ylabel is not None: - plt.ylabel(ylabel) - if ylim is not None: - plt.ylim(ylim) - - fig.savefig( - "/home/alan/Documents/sync_docs/drafts/third_year_pres/figs/" - + str(len(data)) - + "_" - + x - + "_vs_" - + y - + ".png", - dpi=200, - ) - - -from extraction.core.argo import Argo, annot_from_dset - - -def additional_feats(aggs): - aggs["gr_mean_norm"] = aggs["gr_mean"] * 12 - aggs["log_ratio_r"] = np.log(1 / aggs["em_ratio_mean"]) - return aggs - - -def compare_methods_ph_calculation(dfs): - GFPFast = dfs[("GFPFast", np.maximum, "mean")] - phluorin = dfs[("pHluorin405", np.maximum, "mean")] - ph = GFPFast / phluorin - - sns.scatterplot( - dfs["em_ratio", np.maximum, "mean"].values.flatten(), - ph.values.flatten(), - alpha=0.1, - ) - plt.xlabel("ratio_median") - plt.ylabel("median_ratio") - plt.title("Comparison of ph calculation") - plt.show() - - -# get the number of changes in a bool list -nchanges = lambda x: sum([i for i, j in zip(x[:-2], x[1:]) if operator.xor(i, j)]) diff --git a/gluStarv_2_0_x2_dual_phl_ura8_00/extraction/joinable_pairs.gz b/gluStarv_2_0_x2_dual_phl_ura8_00/extraction/joinable_pairs.gz deleted file mode 100644 index b0098173a329e2ccb2e036673a4876d41079e49f..0000000000000000000000000000000000000000 Binary files a/gluStarv_2_0_x2_dual_phl_ura8_00/extraction/joinable_pairs.gz and /dev/null differ diff --git a/gluStarv_2_0_x2_dual_phl_ura8_00/extraction/phl_ura8_024.gz.REMOVED.git-id b/gluStarv_2_0_x2_dual_phl_ura8_00/extraction/phl_ura8_024.gz.REMOVED.git-id deleted file mode 100644 index 7081c4b152d24b218247a0597af294a1c5961ca5..0000000000000000000000000000000000000000 --- a/gluStarv_2_0_x2_dual_phl_ura8_00/extraction/phl_ura8_024.gz.REMOVED.git-id +++ /dev/null @@ -1 +0,0 @@ -ea300840eaf24164ad3eae825360b48d437d5575 \ No newline at end of file diff --git a/postprocessor/__init__.py b/postprocessor/__init__.py deleted file mode 100644 index e5a0d9b4834ec8f46d6e0d1256c6dcaad2e460fe..0000000000000000000000000000000000000000 --- a/postprocessor/__init__.py +++ /dev/null @@ -1 +0,0 @@ -#!/usr/bin/env python3 diff --git a/postprocessor/__pycache__/processor.cpython-37.pyc b/postprocessor/__pycache__/processor.cpython-37.pyc deleted file mode 100644 index 748204fad28982f10fc9d95f84a1df10002fbd3e..0000000000000000000000000000000000000000 Binary files a/postprocessor/__pycache__/processor.cpython-37.pyc and /dev/null differ diff --git a/postprocessor/functions/__init__.py b/postprocessor/functions/__init__.py deleted file mode 100644 index e5a0d9b4834ec8f46d6e0d1256c6dcaad2e460fe..0000000000000000000000000000000000000000 --- a/postprocessor/functions/__init__.py +++ /dev/null @@ -1 +0,0 @@ -#!/usr/bin/env python3 diff --git a/postprocessor/functions/__pycache__/tracks.cpython-37.pyc b/postprocessor/functions/__pycache__/tracks.cpython-37.pyc deleted file mode 100644 index e6533c963f5eca8ba6bf984e7b83824829fd3e40..0000000000000000000000000000000000000000 Binary files a/postprocessor/functions/__pycache__/tracks.cpython-37.pyc and /dev/null differ diff --git a/postprocessor/functions/test_hdf.py b/postprocessor/functions/test_hdf.py deleted file mode 100644 index 5bd3b1a12e9b08743b8058d17ef4bb23719ac6a4..0000000000000000000000000000000000000000 --- a/postprocessor/functions/test_hdf.py +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env python3 -# c=CellsHDF.from_source("/home/alan/Documents/sync_docs/PhD/tmp/DO6MS2_003store.h5") -import h5py -from core.cells import Cells -import pandas as pd - -# f = h5py.File("/home/alan/Documents/sync_docs/PhD/tmp/DO6MS2_003store.h5") -fname = "/shared_libs/pipeline-core/scripts/data/20191026_ss_experiments_01/DO6MS2_003store.h5" -f = h5py.File(fname) -tracks = f["/extraction/general/None/area"][()] -cell = Cells.from_source(fname) -from postprocessor.core.processes.picker import Picker, PickerParameters - -picker = Picker(cells=cell, parameters=PickerParameters.default()) - -from postprocessor.core.processor import PostProcessor, PostProParameters - -pp = PostProcessor(filename=fname, parameters=PostProParameters.default()) -pp.run() diff --git a/postprocessor/functions/tracks.py b/postprocessor/functions/tracks.py deleted file mode 100644 index e47e60231da31e2c23acf5e97f5b721edbffdfee..0000000000000000000000000000000000000000 --- a/postprocessor/functions/tracks.py +++ /dev/null @@ -1,429 +0,0 @@ -""" -Functions to process, filter and merge tracks. -""" - -# from collections import Counter - -from copy import copy -from typing import Union, List - -import numpy as np -import pandas as pd -from utils_find_1st import find_1st, cmp_larger - -import more_itertools as mit -from scipy.signal import savgol_filter - -# from scipy.optimize import linear_sum_assignment -# from scipy.optimize import curve_fit - -from matplotlib import pyplot as plt - - -def load_test_dset(): - # Load development dataset to test functions - return pd.DataFrame( - { - ("a", 1, 1): [2, 5, np.nan, 6, 8] + [np.nan] * 5, - ("a", 1, 2): list(range(2, 12)), - ("a", 1, 3): [np.nan] * 8 + [6, 7], - ("a", 1, 4): [np.nan] * 5 + [9, 12, 10, 14, 18], - }, - index=range(1, 11), - ).T - - -def max_ntps(track: pd.Series) -> int: - # Get number of timepoints - indices = np.where(track.notna()) - return np.max(indices) - np.min(indices) - - -def max_nonstop_ntps(track: pd.Series) -> int: - nona_tracks = track.notna() - consecutive_nonas_grouped = [ - len(list(x)) for x in mit.consecutive_groups(np.flatnonzero(nona_tracks)) - ] - return max(consecutive_nonas_grouped) - - -def get_tracks_ntps(tracks: pd.DataFrame) -> pd.Series: - return tracks.apply(max_ntps, axis=1) - - -def get_avg_gr(track: pd.Series) -> int: - """ - Get average growth rate for a track. - - :param tracks: Series with volume and timepoints as indices - """ - ntps = max_ntps(track) - vals = track.dropna().values - gr = (vals[-1] - vals[0]) / ntps - return gr - - -def get_avg_grs(tracks: pd.DataFrame) -> pd.DataFrame: - """ - Get average growth rate for a group of tracks - - :param tracks: (m x n) dataframe where rows are cell tracks and - columns are timepoints - """ - return tracks.apply(get_avg_gr, axis=1) - - -def clean_tracks(tracks, min_len: int = 15, min_gr: float = 1.0) -> pd.DataFrame: - """ - Clean small non-growing tracks and return the reduced dataframe - - :param tracks: (m x n) dataframe where rows are cell tracks and - columns are timepoints - :param min_len: int number of timepoints cells must have not to be removed - :param min_gr: float Minimum mean growth rate to assume an outline is growing - """ - ntps = get_tracks_ntps(tracks) - grs = get_avg_grs(tracks) - - growing_long_tracks = tracks.loc[(ntps >= min_len) & (grs > min_gr)] - - return growing_long_tracks - - -def merge_tracks(tracks, drop=False, **kwargs) -> pd.DataFrame: - """ - Join tracks that are contiguous and within a volume threshold of each other - - :param tracks: (m x n) dataframe where rows are cell tracks and - columns are timepoints - :param kwargs: args passed to get_joinable - - returns - - :joint_tracks: (m x n) Dataframe where rows are cell tracks and - columns are timepoints. Merged tracks are still present but filled - with np.nans. - """ - - # calculate tracks that can be merged until no more traps can be merged - joinable_pairs = get_joinable(tracks, **kwargs) - if joinable_pairs: - tracks = join_tracks(tracks, joinable_pairs, drop=drop) - joint_ids = get_joint_ids(joinable_pairs) - - return (tracks, joinable_pairs) - - -def get_joint_ids(merging_seqs) -> dict: - """ - Convert a series of merges into a dictionary where - the key is the cell_id of destination and the value a list - of the other track ids that were merged into the key - - :param merging_seqs: list of tuples of indices indicating the - sequence of merging events. It is important for this to be in sequential order - - How it works: - - The order of merging matters for naming, always the leftmost track will keep the id - - For example, having tracks (a, b, c, d) and the iterations of merge events: - - 0 a b c d - 1 a b cd - 2 ab cd - 3 abcd - - We shold get: - - output {a:a, b:a, c:a, d:a} - - """ - if not merging_seqs: - return {} - - targets, origins = list(zip(*merging_seqs)) - static_tracks = set(targets).difference(origins) - - joint = {track_id: track_id for track_id in static_tracks} - for target, origin in merging_seqs: - joint[origin] = target - - moved_target = [ - k for k, v in joint.items() if joint[v] != v and v in joint.values() - ] - - for orig in moved_target: - joint[orig] = rec_bottom(joint, orig) - - return { - k: v for k, v in joint.items() if k != v - } # remove ids that point to themselves - - -def rec_bottom(d, k): - if d[k] == k: - return k - else: - return rec_bottom(d, d[k]) - - -def join_tracks(tracks, joinable_pairs, drop=True) -> pd.DataFrame: - """ - Join pairs of tracks from later tps towards the start. - - :param tracks: (m x n) dataframe where rows are cell tracks and - columns are timepoints - - returns (copy) - - :param joint_tracks: (m x n) Dataframe where rows are cell tracks and - columns are timepoints. Merged tracks are still present but filled - with np.nans. - :param drop: bool indicating whether or not to drop moved rows - - """ - - tmp = copy(tracks) - for target, source in joinable_pairs: - tmp.loc[target] = join_track_pair(tmp.loc[target], tmp.loc[source]) - - if drop: - tmp = tmp.drop(source) - - return tmp - - -def join_track_pair(target, source): - tgt_copy = copy(target) - end = find_1st(target.values[::-1], 0, cmp_larger) - tgt_copy.iloc[-end:] = source.iloc[-end:].values - return tgt_copy - - -def get_joinable(tracks, smooth=False, tol=0.1, window=5, degree=3) -> dict: - """ - Get the pair of track (without repeats) that have a smaller error than the - tolerance. If there is a track that can be assigned to two or more other - ones, it chooses the one with a lowest error. - - :param tracks: (m x n) dataframe where rows are cell tracks and - columns are timepoints - :param tol: float or int threshold of average (prediction error/std) necessary - to consider two tracks the same. If float is fraction of first track, - if int it is absolute units. - :param window: int value of window used for savgol_filter - :param degree: int value of polynomial degree passed to savgol_filter - - """ - - # Commented because we are not smoothing in this step yet - # candict = {k:v for d in contig.values for k,v in d.items()} - - # smooth all relevant tracks - - if smooth: # Apply savgol filter TODO fix nans affecting edge placing - clean = clean_tracks( - tracks, min_len=window + 1, min_gr=0.9 - ) # get useful tracks - savgol_on_srs = lambda x: non_uniform_savgol(x.index, x.values, window, degree) - contig = clean.groupby(["trap"]).apply(get_contiguous_pairs) - contig = contig.loc[contig.apply(len) > 0] - flat = set([k for v in contig.values for i in v for j in i for k in j]) - smoothed_tracks = clean.loc[flat].apply(savgol_on_srs, 1) - else: - contig = tracks.groupby(["trap"]).apply(get_contiguous_pairs) - contig = contig.loc[contig.apply(len) > 0] - flat = set([k for v in contig.values for i in v for j in i for k in j]) - smoothed_tracks = tracks.loc[flat].apply(lambda x: np.array(x.values), axis=1) - - # fetch edges from ids TODO (IF necessary, here we can compare growth rates) - idx_to_edge = lambda preposts: [ - ( - [get_val(smoothed_tracks.loc[pre], -1) for pre in pres], - [get_val(smoothed_tracks.loc[post], 0) for post in posts], - ) - for pres, posts in preposts - ] - edges = contig.apply(idx_to_edge) - - closest_pairs = edges.apply(get_vec_closest_pairs, tol=tol) - - # match local with global ids - joinable_ids = [ - localid_to_idx(closest_pairs.loc[i], contig.loc[i]) for i in closest_pairs.index - ] - - return [pair for pairset in joinable_ids for pair in pairset] - - -get_val = lambda x, n: x[~np.isnan(x)][n] if len(x[~np.isnan(x)]) else np.nan - - -def localid_to_idx(local_ids, contig_trap): - """Fetch then original ids from a nested list with joinable local_ids - - input - :param local_ids: list of list of pairs with cell ids to be joint - :param local_ids: list of list of pairs with corresponding cell ids - - return - list of pairs with (experiment-level) ids to be joint - """ - lin_pairs = [] - for i, pairs in enumerate(local_ids): - if len(pairs): - for left, right in pairs: - lin_pairs.append((contig_trap[i][0][left], contig_trap[i][1][right])) - return lin_pairs - - -def get_vec_closest_pairs(lst: List, **kwags): - return [get_closest_pairs(*l, **kwags) for l in lst] - - -def get_closest_pairs(pre: List[float], post: List[float], tol: Union[float, int] = 1): - """Calculate a cost matrix the Hungarian algorithm to pick the best set of - options - - input - :param pre: list of floats with edges on left - :param post: list of floats with edges on right - :param tol: int or float if int metrics of tolerance, if float fraction - - returns - :: list of indices corresponding to the best solutions for matrices - - """ - if len(pre) > len(post): - dMetric = np.abs(np.subtract.outer(post, pre)) - else: - dMetric = np.abs(np.subtract.outer(pre, post)) - # dMetric[np.isnan(dMetric)] = tol + 1 + np.nanmax(dMetric) # nans will be filtered - # ids = linear_sum_assignment(dMetric) - dMetric[np.isnan(dMetric)] = tol + 1 + np.nanmax(dMetric) # nans will be filtered - - ids = solve_matrix(dMetric) - if not len(ids[0]): - return [] - - norm = ( - np.array(pre)[ids[len(pre) > len(post)]] if tol < 1 else 1 - ) # relative or absolute tol - result = dMetric[ids] / norm - ids = ids if len(pre) < len(post) else ids[::-1] - - return [idx for idx, res in zip(zip(*ids), result) if res <= tol] - - -def solve_matrix(dMetric): - """ - Solve cost matrix focusing on getting the smallest cost at each iteration. - - input - :param dMetric: np.array cost matrix - - returns - tuple of np.arrays indicating picks with lowest individual value - """ - glob_is = [] - glob_js = [] - if (~np.isnan(dMetric)).any(): - tmp = copy(dMetric) - std = sorted(tmp[~np.isnan(tmp)]) - while (~np.isnan(std)).any(): - v = std[0] - i_s, j_s = np.where(tmp == v) - i = i_s[0] - j = j_s[0] - tmp[i, :] += np.nan - tmp[:, j] += np.nan - glob_is.append(i) - glob_js.append(j) - - std = sorted(tmp[~np.isnan(tmp)]) - - return (np.array(glob_is), np.array(glob_js)) - - -def plot_joinable(tracks, joinable_pairs, max=64): - """ - Convenience plotting function for debugging and data vis - """ - - nx = 8 - ny = 8 - _, axes = plt.subplots(nx, ny) - for i in range(nx): - for j in range(ny): - if i * ny + j < len(joinable_pairs): - ax = axes[i, j] - pre, post = joinable_pairs[i * ny + j] - pre_srs = tracks.loc[pre].dropna() - post_srs = tracks.loc[post].dropna() - ax.plot(pre_srs.index, pre_srs.values, "b") - # try: - # totrange = np.arange(pre_srs.index[0],post_srs.index[-1]) - # ax.plot(totrange, interpolate(pre_srs, totrange), 'r-') - # except: - # pass - ax.plot(post_srs.index, post_srs.values, "g") - - plt.show() - - -def get_contiguous_pairs(tracks: pd.DataFrame) -> list: - """ - Get all pair of contiguous track ids from a tracks dataframe. - - :param tracks: (m x n) dataframe where rows are cell tracks and - columns are timepoints - :param min_dgr: float minimum difference in growth rate from the interpolation - """ - # indices = np.where(tracks.notna()) - - mins, maxes = [ - tracks.notna().apply(np.where, axis=1).apply(fn) for fn in (np.min, np.max) - ] - - mins_d = mins.groupby(mins).apply(lambda x: x.index.tolist()) - mins_d.index = mins_d.index - 1 # make indices equal - # TODO add support for skipping time points - maxes_d = maxes.groupby(maxes).apply(lambda x: x.index.tolist()) - - common = sorted(set(mins_d.index).intersection(maxes_d.index), reverse=True) - - return [(maxes_d[t], mins_d[t]) for t in common] - - -# def fit_track(track: pd.Series, obj=None): -# if obj is None: -# obj = objective - -# x = track.dropna().index -# y = track.dropna().values -# popt, _ = curve_fit(obj, x, y) - -# return popt - -# def interpolate(track, xs) -> list: -# ''' -# Interpolate next timepoint from a track - -# :param track: pd.Series of volume growth over a time period -# :param t: int timepoint to interpolate -# ''' -# popt = fit_track(track) -# # perr = np.sqrt(np.diag(pcov)) -# return objective(np.array(xs), *popt) - - -# def objective(x,a,b,c,d) -> float: -# # return (a)/(1+b*np.exp(c*x))+d -# return (((x+d)*a)/((x+d)+b))+c - -# def cand_pairs_to_dict(candidates): -# d={x:[] for x,_ in candidates} -# for x,y in candidates: -# d[x].append(y) -# return d diff --git a/postprocessor/group.py b/postprocessor/group.py deleted file mode 100644 index 99cf00cf775f034d69be6f29fd7d9bb77dba5f54..0000000000000000000000000000000000000000 --- a/postprocessor/group.py +++ /dev/null @@ -1,130 +0,0 @@ -""" -Class to group multiple positions into one using one of several criteria. -""" - -from pathlib import Path -import re - -import h5py -import pandas as pd - -from postprocessor.core.io.base import groupsort -from postprocessor.core.io.signal import Signal - -from postprocessor.core.processes.base import ParametersABC, ProcessABC - - -class GroupParameters(ParametersABC): - def __init__(self, by="name", processes=[], signals=[]): - self.by = by - self.signals = signals - self.processes = processes - - @classmethod - def default(cls): - return cls.from_dict({"by": "name", "signals": [], "processes": []}) - - -class Group(ProcessABC): - def __init__(self, parameters): - super().__init__(parameters) - - def get_position_filenames(self, exp_root, poses): - """ - Get filenames as a dictionary where the key is the position and value the filename. - """ - central_store = Path(exp_root) / "store.h5" - if central_store.exists(): - hdf = h5py.File(central_store, "r") - self.filenames = [pos.attrs["filename"] for pos in hdf["/positions/"]] - hdf.close() - else: # If no central store just list position files in expt root folder - fullfiles = [x for x in Path(exp_root).glob("*store.h5")] - files = [x.name for x in Path(exp_root).glob("*store.h5")] - filenames = [False for _ in poses] - for i, pos in enumerate(poses): - matches = [ - True if re.match(pos + ".*.h5", fname) else False for fname in files - ] - if any(matches): - assert sum(matches) == 1, "More than one match" - filenames[i] = (pos, fullfiles[matches.index(True)]) - - self.filenames = {fname[0]: fname[1] for fname in filenames if fname} - - self.positions = list(self.filenames.keys()) - return self.filenames - - def get_signals(self): - # hdf = h5py.File(central_store, "r") - # keys_d = groupsort(keys) - self.signals = {pos: {} for pos in self.filenames.keys()} - for pos, fname in self.filenames.items(): - for signal in self.parameters.signals: - self.signals[pos][signal] = pd.read_hdf(fname, signal) - - return self.signals - - def gen_groups(self): - if self.by == "group": # Use group names in metadata - pass - elif self.by == "name": # Infer groups from signal concatenation - # Remove last four characters and find commonalities larger than 4 - # characters between posnames and group them that way. - groupnames = list(set([x[:-3] for x in self.positions])) - self.group_signal_tree = {group: [] for group in groupnames} - self.poses_grouped = {group: [] for group in groupnames} - for pos in self.positions: - group = groupnames[groupnames.index(pos[:-3])] - self.group_signal_tree[group].append(self.signals[pos]) - self.poses_grouped[group].append(pos) - - elif ( - type(self.by) == tuple - ): # Manually give groups as tuple or list of positions - pass - - def concat_signals(self): - self.concated_signals = {group: {} for group in self.group_signal_tree} - for k, group in self.group_signal_tree.items(): - for signal in self.parameters.signals: - self.concated_signals[k][signal] = pd.concat( - [g[signal] for g in group], keys=self.poses_grouped[k] - ) - - return self.concated_signals - - def process_signals(self, grouped_signals): - pass - - def run(self, central_store, poses): - - self.get_position_filenames(central_store, poses) - self.get_signals() - self.gen_groups() - self.concat_signals() - # processed_signals = self.process_signals(grouped_signals) - - return self.concated_signals - # return processed_signals - - -poses = [ - x.name.split("store")[0] - for x in Path( - "/shared_libs/pipeline-core/scripts/data/ph_calibration_dual_phl_ura8_5_04_5_83_7_69_7_13_6_59__01" - ).rglob("*") - if x.name != "images.h5" -] -gr = Group( - GroupParameters( - signals=["/extraction/general/None/area", "/extraction/mCherry/np_max/median"] - ) -) -gr.run( - central_store="/shared_libs/pipeline-core/scripts/data/ph_calibration_dual_phl_ura8_5_04_5_83_7_69_7_13_6_59__01", - poses=poses, -) -signal = Signal( - "/shared_libs/pipeline-core/scripts/data/ph_calibration_dual_phl_ura8_5_04_5_83_7_69_7_13_6_59__01/ph_5_04_001store.h5" -) diff --git a/postprocessor/old/cell.py b/postprocessor/old/cell.py deleted file mode 100644 index f96f30d53e8bc462044eb40bb6417d1454c3f742..0000000000000000000000000000000000000000 --- a/postprocessor/old/cell.py +++ /dev/null @@ -1,23 +0,0 @@ -""" -Main data processing functions. The inputs for this functions must be data series. -""" - -import numpy as np -from pandas import Series, DataFrames - -def bg_sub(data:Series, background:float): - return data - background - -def ratio(data1:Series, data2:Series): - return data1 / data2 - -def norm_tps(data:Series, tps:list, fun=np.nanmean): - return data / fun(data.loc(axis=1)[tps]) - -def growth_rate(data:Series, alg=None, filt = 'savgol'): - if alg is None: - alg='standard' - - - - diff --git a/postprocessor/old/ph.py b/postprocessor/old/ph.py deleted file mode 100644 index a28a8055c8c64fe1ff84f9a6b5692f5d722bb260..0000000000000000000000000000000000000000 --- a/postprocessor/old/ph.py +++ /dev/null @@ -1,364 +0,0 @@ -from typing import Dict, List, Union -import re - -import numpy as np -import pandas as pd -from pandas import Series, DataFrame -from sklearn.cluster import KMeans -from matplotlib import pyplot as plt -import seaborn as sns - -import compress_pickle - -from postprocessor.core.postprocessor import PostProcessor -from postprocessor.core.tracks import get_avg_grs, non_uniform_savgol -from postprocessor.core.ph import * - - -def filter_by_gfp(dfs): - gfps = pd.concat([t[("GFPFast_bgsub", np.maximum, "median")] for t in dfs]) - avgs_gfp = gfps.mean(axis=1) - high_gfp = get_high_k2(avgs_gfp) - # high_gfp = avgs_gfp[avgs_gfp > 200] - - return high_gfp - - -def filter_by_area(dfs, min=50): - areas = pd.concat([t[("general", None, "area")] for t in dfs]) - avgs_areas = areas[(areas.notna().sum(axis=1) > areas.shape[1] // (1.25))].mean( - axis=1 - ) - avgs_areas = avgs_areas[(avgs_areas > min)] - - return avgs_areas - - -def get_high_k2(df): - kmeans = KMeans(n_clusters=2) - vals = df.values.reshape(-1, 1) - kmeans.fit(vals) - high_clust_id = kmeans.cluster_centers_.argmax() - - return df.loc[kmeans.predict(vals) == high_clust_id] - - -def get_concats(dfs, keys): - return pd.concat([t.get((keys), pd.DataFrame()) for t in dfs]) - - -def get_dfs(pp): - dfs = [pp.extraction[pp.expt.positions[i]] for i in range(len(pp.expt.positions))] - return dfs - - -combine_dfs = lambda dfs: {k: get_concats(dfs, k) for k in dfs[0].keys()} - - -def merge_channels(pp, min_area=50): - dfs = get_dfs(pp) - # rats = get_concats(dfs, ("em_ratio_bgsub", np.maximum, "median")) - # gfps = filter_by_gfp(dfs) - - avgs_area = filter_by_area(dfs, min=50) - # ids = [x for x in set(gfps.index).intersection(avgs_area.index)] - ids = avgs_area.index - - new_dfs = combine_dfs(dfs) - - h = pd.DataFrame( - { - k[0] + "_" + k[2]: v.loc[ids].mean(axis=1) - for k, v in new_dfs.items() - if k[-1] != "imBackground" - } - ) - return h - - -def process_phs(pp, min_area=200): - h = merge_channels(pp, min_area) - h.index.names = ["pos", "trap", "cell"] - ids = h.index - h = h.reset_index() - - h["ph"] = h["pos"].apply(lambda x: float(x[3:7].replace("_", "."))) - h["max5_d_med"] = h["mCherry_max2p5pc"] / h["mCherry_median"] - - h = h.set_index(ids) - h = h.drop(["pos", "trap", "cell"], axis=1) - return h - - -def growth_rate( - data: Series, alg=None, filt={"kind": "savgol", "window": 5, "degree": 3} -): - window = filt["window"] - degree = filt["degree"] - if alg is None: - alg = "standard" - - if filt: # TODO add support for multiple algorithms - data = Series( - non_uniform_savgol( - data.dropna().index, data.dropna().values, window, degree - ), - index=data.dropna().index, - ) - - return Series(np.convolve(data, diff_kernel, "same"), index=data.dropna().index) - - -# import numpy as np - -# diff_kernel = np.array([1, -1]) -# gr = clean.apply(growth_rate, axis=1) -# from postprocessor.core.tracks import non_uniform_savgol, clean_tracks - - -def sort_df(df, by="first", rev=True): - nona = df.notna() - if by == "len": - idx = nona.sum(axis=1) - elif by == "first": - idx = nona.idxmax(axis=1) - idx = idx.sort_values().index - - if rev: - idx = idx[::-1] - - return df.loc[idx] - - -# test = tmp[("GFPFast", np.maximum, "median")] -# test2 = tmp[("pHluorin405", np.maximum, "median")] -# ph = test / test2 -# ph = ph.stack().reset_index(1) -# ph.columns = ["tp", "fl"] - - -def m2p5_med(ext, ch, red=np.maximum): - m2p5pc = ext[(ch, red, "max2p5pc")] - med = ext[(ch, red, "median")] - - result = m2p5pc / med - - return result - - -def plot_avg(df): - df = df.stack().reset_index(1) - df.columns = ["tp", "val"] - - sns.relplot(x=df["tp"], y=df["val"], kind="line") - plt.show() - - -def split_data(df: DataFrame, splits: List[int]): - dfs = [df.iloc[:, i:j] for i, j in zip((0,) + splits, splits + (df.shape[1],))] - return dfs - - -def growth_rate( - data: Series, alg=None, filt={"kind": "savgol", "window": 7, "degree": 3} -): - if alg is None: - alg = "standard" - - if filt: # TODO add support for multiple algorithms - window = filt["window"] - degree = filt["degree"] - data = Series( - non_uniform_savgol( - data.dropna().index, data.dropna().values, window, degree - ), - index=data.dropna().index, - ) - - diff_kernel = np.array([1, -1]) - - return Series(np.convolve(data, diff_kernel, "same"), index=data.dropna().index) - - -# pp = PostProcessor(source=19831) -# pp.load_tiler_cells() -# f = "/home/alan/Documents/sync_docs/libs/postproc/gluStarv_2_0_x2_dual_phl_ura8_00/extraction" -# pp.load_extraction( -# "/home/alan/Documents/sync_docs/libs/postproc/postprocessor/" -# + pp.expt.name -# + "/extraction/" -# ) -# tmp = pp.extraction["phl_ura8_002"] - - -def _check_bg(data): - for k in list(pp.extraction.values())[0].keys(): - for p in pp.expt.positions: - if k not in pp.extraction[p]: - print(p, k) - - -# data = { -# k: pd.concat([pp.extraction[pos][k] for pos in pp.expt.positions[:-3]]) -# for k in list(pp.extraction.values())[0].keys() -# } - - -def hmap(df, **kwargs): - g = sns.heatmap(sort_df(df), robust=True, cmap="mako_r", **kwargs) - plt.xlabel("") - return g - - -# from random import randint -# x = randint(0, len(smooth)) -# plt.plot(clean.iloc[x], 'b') -# plt.plot(smooth.iloc[x], 'r') -# plt.show() - - -# data = tmp -# df = data[("general", None, "area")] -# clean = clean_tracks(df, min_len=160) -# clean = clean.loc[clean.notna().sum(axis=1) > 9] -# gr = clean.apply(growth_rate, axis=1) -# splits = (72, 108, 180) -# gr_sp = split_data(gr, splits) - -# idx = gr.index - -# bg = get_bg(data) -# test = data[("GFPFast", np.maximum, "median")] -# test2 = data[("pHluorin405", np.maximum, "median")] -# ph = (test / test2).loc[idx] -# c = pd.concat((ph.mean(1), gr.max(1)), axis=1) -# c.columns = ["ph", "gr_max"] -# # ph = ph.stack().reset_index(1) -# # ph.columns = ['tp', 'fl'] - -# ph_sp = split_data(gr, splits) - - -def get_bg(data): - bg = {} - fl_subkeys = [ - x - for x in data.keys() - if x[0] in ["GFP", "GFPFast", "mCherry", "pHluorin405"] - and x[-1] != "imBackground" - ] - for k in fl_subkeys: - nk = list(k) - bk = tuple(nk[:-1] + ["imBackground"]) - nk = tuple(nk[:-1] + [nk[-1] + "_BgSub"]) - tmp = [] - for i, v in data[bk].iterrows(): - if i in data[k].index: - newdf = data[k].loc[i] / v - newdf.index = pd.MultiIndex.from_tuples([(*i, c) for c in newdf.index]) - tmp.append(newdf) - bg[nk] = pd.concat(tmp) - - return bg - - -def calc_ph(bg): - fl_subkeys = [x for x in bg.keys() if x[0] in ["GFP", "GFPFast", "pHluorin405"]] - chs = list(set([x[0] for x in fl_subkeys])) - assert len(chs) == 2, "Too many channels" - ch1 = [x[1:] for x in fl_subkeys if x[0] == chs[0]] - ch2 = [x[1:] for x in fl_subkeys if x[0] == chs[1]] - inter = list(set(ch1).intersection(ch2)) - ph = {} - for red_fld in inter: - ph[tuple(("ph",) + red_fld)] = ( - bg[tuple((chs[0],) + red_fld)] / bg[tuple((chs[1],) + red_fld)] - ) - - -def get_traps(pp): - t0 = {} - for pos in pp.tiler.positions: - pp.tiler.current_position = pos - t0[pos] = pp.tiler.get_traps_timepoint( - 0, channels=[0, pp.tiler.channels.index("mCherry")], z=[0, 1, 2, 3, 4] - ) - - return t0 - - -def get_pos_ph(pp): - pat = re.compile(r"ph_([0-9]_[0-9][0-9])") - return { - pos: float(pat.findall(pos)[0].replace("_", ".")) for pos in pp.tiler.positions - } - - -def plot_sample_bf_mch(pp): - bf_mch = get_traps(pp) - ts = [{i: v[:, j, ...] for i, v in bf_mch.items()} for j in [0, 1]] - tsbf = {i: v[:, 0, ...] for i, v in bf_mch.items()} - - posdict = {k: v for k, v in get_pos_ph(pp).items()} - posdict = {v: k for k, v in posdict.items()} - posdict = {v: k for k, v in posdict.items()} - ph = np.unique(list(posdict.values())).tolist() - counters = {ph: 0 for ph in ph} - n = [np.random.randint(ts[0][k].shape[0]) for k in posdict.keys()] - - fig, axes = plt.subplots(2, 5) - for k, (t, name) in enumerate(zip(ts, ["Bright field", "mCherry"])): - for i, (pos, ph) in enumerate(posdict.items()): - # i = ph.index(posdict[pos]) - axes[k, i].grid(False) - axes[k, i].set( - xticklabels=[], - yticklabels=[], - ) - axes[k, i].set_xlabel(posdict[pos] if k else None, fontsize=28) - axes[k, i].imshow( - np.maximum.reduce(t[pos][n[i], 0], axis=2), - cmap="gist_gray" if not k else None, - ) - # counters[posdict[pos]] += 1 - plt.tick_params( - axis="x", # changes apply to the x-axis - which="both", # both major and minor ticks are affected - bottom=False, # ticks along the bottom edge are off - top=False, # ticks along the top edge are off - labelbottom=False, - ) # labels along the bottom edge are off - axes[k, 0].set_ylabel(name, fontsize=28) - plt.tight_layout() - plt.show() - - -# Plotting calibration curve -from scipy.optimize import curve_fit - - -def fit_calibration(h): - ycols = [x for x in h.columns if "em_ratio" in x] - xcol = "ph" - - def objective(x, a, b): - return a * x + b - - # fig, axes = plt.subplots(1, len(ycols)) - # for i, ycol in enumerate(ycols): - # d = h[[xcol, ycol]] - # params, _ = curve_fit(objective, *[d[col].values for col in d.columns]) - # sns.lineplot(x=xcol, y=ycol, data=h, alpha=0.5, err_style="bars", ax=axes[i]) - # # sns.lineplot(d[xcol], objective(d[xcol].values, *params), ax=axes[i]) - # plt.show() - - ycol = "em_ratio_mean" - d = h[[xcol, *ycols]] - tmp = d.groupby("ph").mean() - calibs = {ycol: curve_fit(objective, tmp.index, tmp[ycol])[0] for ycol in ycols} - # sns.lineplot(x=xcol, y=ycol, data=d, alpha=0.5, err_style="bars") - # plt.xlabel("pH") - # plt.ylabel("pHluorin emission ratio") - # sns.lineplot(d[xcol], objective(d[xcol], *params)) - - return calibs diff --git a/postprocessor/old/postprocessor.py b/postprocessor/old/postprocessor.py deleted file mode 100644 index 7e024d2656d5885025cfe0bb4314cdc7499a3da5..0000000000000000000000000000000000000000 --- a/postprocessor/old/postprocessor.py +++ /dev/null @@ -1,106 +0,0 @@ -import pkg_resources -from pathlib import Path, PosixPath -from typing import Union, List, Dict -from datetime import datetime - -from compress_pickle import load, dump - -from core.experiment import Experiment -from core.cells import Cells -from core.segment import Tiler -from core.io.matlab import matObject -from core.experiment import Experiment - - -class PostProcessor: - ''' - Base class to perform feature extraction. - :param parameters: Parameters class with channels, reduction functions and - extraction functions to use. - :param source: Origin of experiment, if int it is assumed from Omero, if str - or Path - ''' - def __init__(self, parameters=None, source: Union[int, str, Path] = None): - # self.params = parameters - if source is not None: - if type(source) is int: - self.expt_id = source - self.load_expt(source, omero=True) - elif type(source) is str or PosixPath: - self.load_expt(source, omero=False) - - @property - def channels(self): - if not hasattr(self, '_channels'): - if type(self.params.tree) is dict: - self._channels = tuple(self.params.tree.keys()) - - return self._channels - - @property - def current_position(self): - assert hasattr(self, 'expt'), 'No experiment loaded.' - - if hasattr(self, 'tiler'): - assert self.expt.current_position.name == self.tiler.current_position - - return self.expt.current_position - - def load_expt(self, source: Union[int, str], omero: bool = False) -> None: - if omero: - self.expt = Experiment.from_source( - self.expt_id, #Experiment ID on OMERO - 'upload', #OMERO Username - '***REMOVED***', #OMERO Password - 'islay.bio.ed.ac.uk', #OMERO host - port=4064 #This is default - ) - else: - self.expt = Experiment.from_source(source) - self.expt_id = self.expt.exptID - - def load_tiler_cells(self) -> None: - self.tiler = Tiler(self.expt) - self.cells = Cells() - self.cells = self.cells.from_source( - self.expt.current_position.annotation) - - def get_pos_mo_bud(self): - annot = self.expt._get_position_annotation( - self.expt.current_position.name) - matob = matObject(annot) - m = matob["timelapseTrapsOmero"].get("cellMothers", None) - if m is not None: - ids = np.nonzero(m.todense()) - d = {(self.expt.current_position.name, i, int(m[i, j])): [] - for i, j in zip(*ids)} - for i, j, k in zip(*ids, d.keys()): - d[k].append((self.expt.current_position.name, i, j + 1)) - else: - print("Pos {} has no mother matrix".format( - self.expt.current_position.name)) - d = {} - - return d - - def get_exp_mo_bud(self): - d = {} - for pos in self.expt.positions: - self.expt.current_position = pos - d = {**d, **self.get_pos_mo_bud()} - - self.expt.current_position = self.expt.positions[0] - - return d - - def load_extraction(self, folder=None) -> None: - if folder is None: - folder = Path(self.expt.name + '/extraction') - - self.extraction = {} - for pos in self.expt.positions: - try: - self.extraction[pos] = load(folder / Path(pos + '.gz')) - - except: - print(pos, ' not found') diff --git a/postprocessor/processes/__init__.py b/postprocessor/processes/__init__.py deleted file mode 100644 index e5a0d9b4834ec8f46d6e0d1256c6dcaad2e460fe..0000000000000000000000000000000000000000 --- a/postprocessor/processes/__init__.py +++ /dev/null @@ -1 +0,0 @@ -#!/usr/bin/env python3 diff --git a/postprocessor/processes/__pycache__/aggregate.cpython-37.pyc b/postprocessor/processes/__pycache__/aggregate.cpython-37.pyc deleted file mode 100644 index a3329e1b0b398db7653dfa93286601c8f4c87a8b..0000000000000000000000000000000000000000 Binary files a/postprocessor/processes/__pycache__/aggregate.cpython-37.pyc and /dev/null differ diff --git a/postprocessor/processes/__pycache__/base.cpython-37.pyc b/postprocessor/processes/__pycache__/base.cpython-37.pyc deleted file mode 100644 index 338fe6150c0698a4364f21070ac77f7ddd552de5..0000000000000000000000000000000000000000 Binary files a/postprocessor/processes/__pycache__/base.cpython-37.pyc and /dev/null differ diff --git a/postprocessor/processes/__pycache__/bud_metric.cpython-37.pyc b/postprocessor/processes/__pycache__/bud_metric.cpython-37.pyc deleted file mode 100644 index 6c98020d909ca54c89d360f709f984643ae6614d..0000000000000000000000000000000000000000 Binary files a/postprocessor/processes/__pycache__/bud_metric.cpython-37.pyc and /dev/null differ diff --git a/postprocessor/processes/__pycache__/davol.cpython-37.pyc b/postprocessor/processes/__pycache__/davol.cpython-37.pyc deleted file mode 100644 index fd6c1cfe05e07980b127afa8c23a6d8a7dc373da..0000000000000000000000000000000000000000 Binary files a/postprocessor/processes/__pycache__/davol.cpython-37.pyc and /dev/null differ diff --git a/postprocessor/processes/__pycache__/dsignal.cpython-37.pyc b/postprocessor/processes/__pycache__/dsignal.cpython-37.pyc deleted file mode 100644 index ae3eaab217c9a2bd1df200c944881a311e458e84..0000000000000000000000000000000000000000 Binary files a/postprocessor/processes/__pycache__/dsignal.cpython-37.pyc and /dev/null differ diff --git a/postprocessor/processes/__pycache__/merger.cpython-37.pyc b/postprocessor/processes/__pycache__/merger.cpython-37.pyc deleted file mode 100644 index 8c41739500db19de52db510b9f163a829183682f..0000000000000000000000000000000000000000 Binary files a/postprocessor/processes/__pycache__/merger.cpython-37.pyc and /dev/null differ diff --git a/postprocessor/processes/__pycache__/picker.cpython-37.pyc b/postprocessor/processes/__pycache__/picker.cpython-37.pyc deleted file mode 100644 index ddf7c31ac2c8ba2e995849346fb59c9f6c85c7e2..0000000000000000000000000000000000000000 Binary files a/postprocessor/processes/__pycache__/picker.cpython-37.pyc and /dev/null differ diff --git a/postprocessor/processes/aggregate.py b/postprocessor/processes/aggregate.py deleted file mode 100644 index cfe8e8f03ad5a5cb8ee4c1a58f1dcb837eaedc4f..0000000000000000000000000000000000000000 --- a/postprocessor/processes/aggregate.py +++ /dev/null @@ -1,74 +0,0 @@ -from itertools import cycle - -import numpy as np -import pandas as pd - -from agora.base import ParametersABC, ProcessABC - - -class aggregateParameters(ParametersABC): - """ - Parameters - reduction: str to be passed to a dataframe for collapsing across columns - """ - - def __init__(self, reductions): - super().__init__() - self.reductions = reductions - - @classmethod - def default(cls): - return cls.from_dict({"reductions": ["mean", "median", "max"]}) - - -class aggregate(ProcessABC): - """ - aggregate multiple datasets - """ - - def __init__(self, parameters: aggregateParameters): - super().__init__(parameters) - - def run(self, signals): - names = np.array([signal.index.names for signal in signals]) - index = signals[0].index - for s in signals[0:]: - index = index.intersection(s.index) - - tmp_signals = [s.loc[index] for s in signals] - for i, s in enumerate(signals): - tmp_signals[i].name = s.name - signals = tmp_signals - - assert len(signals), "Signals is empty" - - bad_words = { - "postprocessing", - "extraction", - "None", - "np_max", - "", - } - get_keywords = lambda df: [ - ind - for item in df.name.split("/") - for ind in item.split("/") - if ind not in bad_words - ] - colnames = [ - "_".join(get_keywords(s) + [red]) - for s in signals - for red in self.parameters.reductions - ] - concat = pd.concat( - [ - getattr(signal, red)(axis=1) - for signal in signals - for red in self.parameters.reductions - ], - names=signals[0].index.names, - axis=1, - ) - concat.columns = colnames - - return concat diff --git a/postprocessor/processes/base.py b/postprocessor/processes/base.py deleted file mode 100644 index 142bd6c3fa2c5ef0fc578d3d09bfa47530482619..0000000000000000000000000000000000000000 --- a/postprocessor/processes/base.py +++ /dev/null @@ -1 +0,0 @@ -from agora.base import ParametersABC, ProcessABC \ No newline at end of file diff --git a/postprocessor/processes/births.py b/postprocessor/processes/births.py deleted file mode 100644 index e5a0d9b4834ec8f46d6e0d1256c6dcaad2e460fe..0000000000000000000000000000000000000000 --- a/postprocessor/processes/births.py +++ /dev/null @@ -1 +0,0 @@ -#!/usr/bin/env python3 diff --git a/postprocessor/processes/bud_metric.py b/postprocessor/processes/bud_metric.py deleted file mode 100644 index 518ddf5231c96458763fca35f52762def97e8b73..0000000000000000000000000000000000000000 --- a/postprocessor/processes/bud_metric.py +++ /dev/null @@ -1,65 +0,0 @@ -import numpy as np -import pandas as pd -from agora.base import ParametersABC, ProcessABC - - -class bud_metricParameters(ParametersABC): - """ - Parameters - """ - - def __init__(self, mode="longest"): - super().__init__() - self.mode = mode - - @classmethod - def default(cls): - return cls.from_dict({"mode": "longest"}) - - -class bud_metric(ProcessABC): - """ - Obtain the volume of daughter cells - if 'longest' assumes a single mother per trap. - """ - - def __init__(self, parameters: bud_metricParameters): - super().__init__(parameters) - - def run(self, signal: pd.DataFrame): - if self.parameters.mode is "longest": - result = self.get_bud_metric_wrap(signal) - - return result - - @staticmethod - def get_bud_metric(signal): - mother_id = signal.index[signal.notna().sum(axis=1).argmax()] - - nomother = signal.drop(mother_id) - - starts = nomother.apply(pd.Series.first_valid_index, axis=1).sort_values() - - ranges = [np.arange(i, j) for i, j in zip(starts[:-1], starts[1:])] - ranges.append(np.arange(starts.iloc[-1], signal.columns[-1])) - - bud_metric = pd.concat( - [signal.loc[i, rng] for i, rng in zip(starts.index, ranges)] - ) - srs = pd.Series(bud_metric, index=signal.columns, name=mother_id) - - return srs - - def get_bud_metric_wrap(self, signals): - srs = [ - self.get_bud_metric(signals.loc[trap]) - for trap in signals.index.unique(level="trap") - ] - index = [ - (trap, mother.name) - for trap, mother in zip(signals.index.unique(level="trap"), srs) - ] - - concatenated = pd.concat(srs, keys=index, axis=1, sort=True).T.sort_index() - concatenated.index.names = ["trap", "cell_label"] - return concatenated diff --git a/postprocessor/processes/dsignal.py b/postprocessor/processes/dsignal.py deleted file mode 100644 index 484ad59944c12cd12005c02b9470b84d8f2f19d7..0000000000000000000000000000000000000000 --- a/postprocessor/processes/dsignal.py +++ /dev/null @@ -1,28 +0,0 @@ -import pandas as pd - -from postprocessor.core.processes.base import ParametersABC, ProcessABC - - -class dsignalParameters(ParametersABC): - """ - :window: Number of timepoints to consider for signal. - """ - - def __init__(self, window: int): - self.window = window - - @classmethod - def default(cls): - return cls.from_dict({"window": 3}) - - -class dsignal(ProcessABC): - """ - Calculate the change in a signal depending on a window - """ - - def __init__(self, parameters: dsignalParameters): - super().__init__(parameters) - - def run(self, signal: pd.DataFrame): - return signal.rolling(window=self.parameters.window, axis=1).mean().diff(axis=1) diff --git a/postprocessor/processes/gpsignal.py b/postprocessor/processes/gpsignal.py deleted file mode 100644 index f6cdb45fbd1416630382246fb3152c9d99184076..0000000000000000000000000000000000000000 --- a/postprocessor/processes/gpsignal.py +++ /dev/null @@ -1,97 +0,0 @@ -"""Gaussian process fit of a Signal.""" -import logging - -from postprocessor.core.processes.base import ParametersABC, ProcessABC -import numpy as np -import pandas as pd - -import gaussian_processes.gaussianprocess as gp - -def estimate_gr(volume, dt, noruns, bounds, verbose): - """ - Parameters - ---------- - - volume : pd.Series - The volume series of a given cell - dt : float - The time interval in hours - noruns : int - The number of runs for optimisation - bounds : dict - The hyperparameter bounds used for optimisation - verbose : bool - If True, prints results - - Returns - ------- - """ - volume = volume.values - n = len(volume) - idx = np.arange(n) - t = idx * dt - y = volume[volume > 0] - x = t[volume > 0] - idx = idx[volume > 0] - # Fit the GP - mg = gp.maternGP(bounds, x, y) - mg.findhyperparameters(noruns=noruns) - if verbose: - mg.results() # Prints out the hyperparameters - mg.predict(x, derivs=2) # Saves the predictions to object - # Save the predictions to a csv file so they can be reused - results = dict(time=mg.x, volume=mg.y, fit_time=mg.xnew, fit_volume=mg.f, - growth_rate=mg.df, d_growth_rate=mg.ddf, - volume_var=mg.fvar, growth_rate_var=mg.dfvar, - d_growth_rate_var=mg.ddfvar) - for name, value in results.items(): - results[name] = np.full((n, ), np.nan) - results[name][idx] = value - return results - -# Give that to a writer: NOTE the writer needs to learn how to write the -# output of a process that returns multiple signals like this one does. - -class gpParameters(ParametersABC): - default_dict = dict(dt=5, - noruns=5, - bounds={0: (-2, 3), - 1: (-2, 1), - 2: (-4, -1)}, - verbose=False) - def __init__(self, dt, noruns, bounds, verbose): - """ - Parameters - ---------- - dt : float - The time step between time points, in minutes - noruns : int - The number of times the optimisation is tried - bounds : dict - Hyperparameter bounds for the Matern Kernel - verbose : bool - Determines whether to print hyperparameter results - """ - self.dt = dt - self.noruns = noruns - self.bounds = bounds - self.verbose = verbose - - @classmethod - def default(cls): - return cls.from_dict(cls.default_dict) - - -class GPSignal(ProcessABC): - """Gaussian process fit of a Signal. - """ - def __init__(self, parameters: gpParameters): - super().__init__(parameters) - - def run(self, signal: pd.DataFrame): - results = signal.apply(lambda x: estimate_gr(x, - **self.parameters.to_dict()), - result_type='expand').T - multi_signal = {name: pd.DataFrame(np.vstack(results[name])) - for name in results.columns} - return multi_signal diff --git a/postprocessor/processes/merger.py b/postprocessor/processes/merger.py deleted file mode 100644 index a79227fac3e9522b6c9ac2a218751c179f1b7477..0000000000000000000000000000000000000000 --- a/postprocessor/processes/merger.py +++ /dev/null @@ -1,60 +0,0 @@ -from agora.base import ParametersABC, ProcessABC -from postprocessor.core.functions.tracks import get_joinable - - -class mergerParameters(ParametersABC): - """ - :param tol: float or int threshold of average (prediction error/std) necessary - to consider two tracks the same. If float is fraction of first track, - if int it is absolute units. - :param window: int value of window used for savgol_filter - :param degree: int value of polynomial degree passed to savgol_filter - """ - - def __init__( - self, - tolerance: float, - smooth: bool = False, - window: int = 5, - degree: int = 3, - min_avg_delta: float = 0.9, - ): - - self.tolerance = tolerance - - self.smooth = smooth - - self.window = window - - self.degree = degree - - self.min_avg_delta = min_avg_delta - - @classmethod - def default(cls): - return cls.from_dict( - { - "smooth": False, - "tolerance": 0.2, - "window": 5, - "degree": 3, - "min_avg_delta": 0.9, - } - ) - - -class merger(ProcessABC): - """ - TODO check why it needs to be run a few times to complete the merging - """ - - def __init__(self, parameters): - super().__init__(parameters) - - def run(self, signal): - joinable = get_joinable(signal, tol=self.parameters.tolerance) - # merged, _ = merge_tracks(signal) # , min_len=self.window + 1) - # indices = (*zip(*merged.index.tolist()),) - # names = merged.index.names - # return {name: ids for name, ids in zip(names, indices)} - return joinable diff --git a/postprocessor/processes/peaks.py b/postprocessor/processes/peaks.py deleted file mode 100644 index 15ddf950ea1ebb16a6a46177474dbec14489e1f3..0000000000000000000000000000000000000000 --- a/postprocessor/processes/peaks.py +++ /dev/null @@ -1,44 +0,0 @@ -from scipy.signal import argrelmax, argrelmin -from postprocessor.core.processes.base import ParametersABC, ProcessABC - - -class PeaksParameters(ParametersABC): - """ - Parameters - type : str {minima, maxima, "all"}. Determines which type of peaks to identify - order : int Parameter to pass to scipy.signal.argrelextrema indicating - how many points to use for comparison. - """ - - def __init__(self, type): - self.type = type - - @classmethod - def default(cls): - return cls.from_dict({"type": "minima", "order": 3}) - - -class Peaks(ProcessABC): - """ - Identifies a signal sharply dropping. - """ - - def __init__(self, parameters: PeaksParameters): - super().__init__(parameters) - - def run(self, signal: pd.DataFrame): - """ - Returns a boolean dataframe with the same shape as the - original signal but with peaks as true values. - """ - peaks_mat = np.zeros_like(signal, dtype=bool) - - comparator = np.less if self.parameters.type is "minima" else np.greater - peaks_ids = argrelextrema(new_df, comparator=comparator, order=order) - peaks_mat[peak_ids] = True - - return pd.DataFrame( - peaks_mat, - index=signal.index, - columns=signal.columns, - ) diff --git a/postprocessor/processes/picker.py b/postprocessor/processes/picker.py deleted file mode 100644 index a7221e6f8146609a59921e3752826aedac6c3802..0000000000000000000000000000000000000000 --- a/postprocessor/processes/picker.py +++ /dev/null @@ -1,339 +0,0 @@ -import seaborn as sns -from matplotlib import pyplot as plt # TODO DELETE THIS - -from typing import Tuple, Union, List -from abc import ABC, abstractmethod - -from itertools import groupby - -from utils_find_1st import find_1st, cmp_equal -import numpy as np -import pandas as pd - -from pcore.cells import CellsHDF - -from agora.base import ParametersABC, ProcessABC -from postprocessor.core.functions.tracks import max_ntps, max_nonstop_ntps - - -class pickerParameters(ParametersABC): - def __init__( - self, - sequence: List[str] = ["lineage", "condition"], - ): - self.sequence = sequence - - @classmethod - def default(cls): - return cls.from_dict( - { - "sequence": [ - # ("lineage", "intersection", "families"), - ("condition", "intersection", "any_present", 0.8), - ("condition", "intersection", "growing", 40), - ("condition", "intersection", "present", 8), - ("condition", "intersection", "mother_buds", 5, 0.8), - # ("lineage", "full_families", "intersection"), - ], - } - ) - - -class picker(ProcessABC): - """ - :cells: Cell object passed to the constructor - :condition: Tuple with condition and associated parameter(s), conditions can be - "present", "nonstoply_present" or "quantile". - Determines the thersholds or fractions of signals/signals to use. - :lineage: str {"mothers", "daughters", "families" (mothers AND daughters), "orphans"}. Mothers/daughters picks cells with those tags, families pick the union of both and orphans the difference between the total and families. - """ - - def __init__( - self, - parameters: pickerParameters, - cells: CellsHDF, - ): - super().__init__(parameters=parameters) - - self._cells = cells - - @staticmethod - def mother_assign_to_mb_matrix(ma: List[np.array]): - # Convert from list of lists to mother_bud sparse matrix - ncells = sum([len(t) for t in ma]) - mb_matrix = np.zeros((ncells, ncells), dtype=bool) - c = 0 - for cells in ma: - for d, m in enumerate(cells): - if m: - mb_matrix[c + d, c + m - 1] = True - - c += len(cells) - - return mb_matrix - - @staticmethod - def mother_assign_from_dynamic(ma, label, trap, ntraps: int): - """ - Interpolate the list of lists containing the associated mothers from the mother_assign_dynamic feature - """ - idlist = list(zip(trap, label)) - cell_gid = np.unique(idlist, axis=0) - - last_lin_preds = [ - find_1st(((label[::-1] == lbl) & (trap[::-1] == tr)), True, cmp_equal) - for tr, lbl in cell_gid - ] - mother_assign_sorted = ma[last_lin_preds] - - traps = cell_gid[:, 0] - iterator = groupby(zip(traps, mother_assign_sorted), lambda x: x[0]) - d = {key: [x[1] for x in group] for key, group in iterator} - nested_massign = [d.get(i, []) for i in range(ntraps)] - - return nested_massign - - def pick_by_lineage(self, signals, how): - - idx = signals.index - - if how: - mothers = set(self.mothers) - daughters = set(self.daughters) - # daughters, mothers = np.where(mother_bud_mat) - - search = lambda a, b: np.where( - np.in1d( - np.ravel_multi_index(np.array(a).T, np.array(a).max(0) + 1), - np.ravel_multi_index(np.array(b).T, np.array(a).max(0) + 1), - ) - ) - if how == "mothers": - idx = mothers - elif how == "daughters": - idx = daughters - elif how == "daughters_w_mothers": - present_mothers = idx.intersection(mothers) - idx = set( - [ - tuple(x) - for m in present_mothers - for x in np.array(self.daughters)[search(self.mothers, m)] - ] - ) - - print("associated daughters: ", idx) - elif how == "mothers_w_daughters": - present_daughters = idx.intersection(daughters) - idx = set( - [ - tuple(x) - for d in present_daughters - for x in np.array(self.mothers)[search(self.daughters, d)] - ] - ) - elif how == "full_families": - present_mothers = idx.intersection(mothers) - dwm_idx = set( - [ - tuple(x) - for m in present_mothers - for x in np.array(self.daughters)[ - search(np.array(self.mothers), m) - ] - ] - ) - present_daughters = idx.intersection(daughters) - mwd_idx = set( - [ - tuple(x) - for d in present_daughters - for x in np.array(self.mothers)[ - search(np.array(self.daughters), d) - ] - ] - ) - idx = mwd_idx.union(dwm_idx) - elif how == "families" or how == "orphans": - families = mothers.union(daughters) - if how == "families": - idx = families - elif how == "orphans": - idx = idx.diference(families) - - idx = idx.intersection(signals.index) - - return idx - - def pick_by_condition(self, signals, condition, thresh): - idx = self.switch_case(signals, condition, thresh) - return idx - - def get_mothers_daughters(self): - ma = self._cells["mother_assign_dynamic"] - trap = self._cells["trap"] - label = self._cells["cell_label"] - nested_massign = self.mother_assign_from_dynamic( - ma, label, trap, self._cells.ntraps - ) - # mother_bud_mat = self.mother_assign_to_mb_matrix(nested_massign) - - idx = set( - [ - (tid, i + 1) - for tid, x in enumerate(nested_massign) - for i in range(len(x)) - ] - ) - mothers, daughters = zip( - *[ - ((tid, m), (tid, d)) - for tid, trapcells in enumerate(nested_massign) - for d, m in enumerate(trapcells, 1) - if m - ] - ) - return mothers, daughters - - def run(self, signals): - indices = set(signals.index) - self.mothers, self.daughters = self.get_mothers_daughters() - for alg, op, *params in self.sequence: - if alg is "lineage": - param1 = params[0] - new_indices = getattr(self, "pick_by_" + alg)( - signals.loc[list(indices)], param1 - ) - else: - param1, *param2 = params - new_indices = getattr(self, "pick_by_" + alg)( - signals.loc[list(indices)], param1, param2 - ) - - if op is "union": - # new_indices = new_indices.intersection(set(signals.index)) - new_indices = indices.union(new_indices) - - indices = indices.intersection(new_indices) - - return np.array(list(indices)) - - @staticmethod - def switch_case( - signals: pd.DataFrame, - condition: str, - threshold: Union[float, int, list], - ): - if len(threshold) == 1: - threshold = [_as_int(*threshold, signals.shape[1])] - case_mgr = { - "any_present": lambda s, thresh: any_present(s, thresh), - "present": lambda s, thresh: s.notna().sum(axis=1) > thresh, - "nonstoply_present": lambda s, thresh: s.apply(thresh, axis=1) > thresh, - "growing": lambda s, thresh: s.diff(axis=1).sum(axis=1) > thresh, - "mother_buds": lambda s, p1, p2: mother_buds_wrap(s, p1, p2) - # "quantile": [np.quantile(signals.values[signals.notna()], threshold)], - } - return set(signals.index[case_mgr[condition](signals, *threshold)]) - - -def any_present(signals, threshold): - """ - Returns a mask for cells, True if there is a cell in that trap that was present for more than :threshold: timepoints. - """ - any_present = pd.Series( - np.sum( - [ - np.isin([x[0] for x in signals.index], i) & v - for i, v in (signals.notna().sum(axis=1) > threshold) - .groupby("trap") - .any() - .items() - ], - axis=0, - ).astype(bool), - index=signals.index, - ) - return any_present - - -from copy import copy - - -def mother_buds(df, min_budgrowth_t, min_mobud_ratio): - """ - Parameters - ---------- - signals : pd.DataFrame - min_budgrowth_t: Minimal number of timepoints we lock reassignment after assigning bud - min_initial_size: Minimal mother-bud ratio at the assignment - #TODO incorporate bud-assignment data? - - # If more than one bud start in the same time point pick the smallest one - """ - - ntps = df.notna().sum(axis=1) - mother_id = df.index[ntps.argmax()] - nomother = df.drop(mother_id) - if not len(nomother): - return [] - nomother = ( # Clean short-lived cells outside our mother cell's timepoints - nomother.loc[ - nomother.apply( - lambda x: x.first_valid_index() >= df.loc[mother_id].first_valid_index() - and x.last_valid_index() <= df.loc[mother_id].last_valid_index(), - axis=1, - ) - ] - ) - - start = nomother.apply(pd.Series.first_valid_index, axis=1) - - # clean duplicates - duplicates = start.duplicated(False) - if duplicates.any(): - dup_tps = np.unique(start[duplicates]) - idx, tps = zip( - *[(nomother.loc[start == tp, tp].idxmin(), tp) for tp in dup_tps] - ) - start = start[~duplicates] - start = pd.concat( - (start, pd.Series(tps, index=idx, dtype="int", name="cell_label")) - ) - nomother = nomother.loc[start.index] - nomother.index = nomother.index.astype("int") - - d_to_mother = nomother[start] - df.loc[mother_id, start] * min_mobud_ratio - size_filter = d_to_mother[ - d_to_mother.apply(lambda x: x.dropna().iloc[0], axis=1) < 0 - ] - cols_sorted = ( - size_filter.sort_index(axis=1) - .apply(pd.Series.first_valid_index, axis=1) - .sort_values() - ) - if not len(cols_sorted): - return [] - bud_candidates = cols_sorted.loc[ - [True, *(np.diff(cols_sorted.values) > min_budgrowth_t)] - ] - - return [mother_id] + [int(i) for i in bud_candidates.index.tolist()] - - -def mother_buds_wrap(signals, *args): - ids = [] - for trap in signals.index.unique(level="trap"): - df = signals.loc[trap] - selected_ids = mother_buds(df, *args) - ids += [(trap, i) for i in selected_ids] - - idx_srs = pd.Series(False, signals.index).astype(bool) - idx_srs.loc[ids] = True - return idx_srs - - -def _as_int(threshold: Union[float, int], ntps: int): - if type(threshold) is float: - threshold = ntps * threshold - return threshold diff --git a/postprocessor/processes/savgol.py b/postprocessor/processes/savgol.py deleted file mode 100644 index d0256736bb83a26e253302017babe122545d8260..0000000000000000000000000000000000000000 --- a/postprocessor/processes/savgol.py +++ /dev/null @@ -1,152 +0,0 @@ -import numpy as np -import pandas as pd - -from postprocessor.core.processes.base import ParametersABC, ProcessABC - - -class savgolParameters(ParametersABC): - """ - Parameters - - window : int (odd) - Window length of datapoints. Must be odd and smaller than x - polynom : int - The order of polynom used. Must be smaller than the window size - """ - - def __init__(self, window, polynom): - self.window = window - self.polynom = polynom - - @classmethod - def default(cls): - return cls.from_dict({"window": 3, "polynom": 2}) - - -class savgol(ProcessABC): - """ - Apply Savitzky-Golay filter (works with NaNs, but it might return - NaN regions). - """ - - def __init__(self, parameters: savgolParameters): - super().__init__(parameters) - - def run(self, signal: pd.DataFrame): - savgol_on_srs = lambda x: self.non_uniform_savgol( - x.index, x.values, self.parameters.window, self.parameters.polynom - ) - return signal.apply(savgol_on_srs, 1) - - @staticmethod - def non_uniform_savgol(x, y, window: int, polynom: int): - """ - Applies a Savitzky-Golay filter to y with non-uniform spacing - as defined in x - - This is based on https://dsp.stackexchange.com/questions/1676/savitzky-golay-smoothing-filter-for-not-equally-spaced-data - The borders are interpolated like scipy.signal.savgol_filter would do - - source: https://dsp.stackexchange.com/a/64313 - - Parameters - ---------- - x : array_like - List of floats representing the x values of the data - y : array_like - List of floats representing the y values. Must have same length - as x - window : int (odd) - Window length of datapoints. Must be odd and smaller than x - polynom : int - The order of polynom used. Must be smaller than the window size - - Returns - ------- - np.array of float - The smoothed y values - """ - if len(x) != len(y): - raise ValueError('"x" and "y" must be of the same size') - - if len(x) < window: - raise ValueError("The data size must be larger than the window size") - - if type(window) is not int: - raise TypeError('"window" must be an integer') - - if window % 2 == 0: - raise ValueError('The "window" must be an odd integer') - - if type(polynom) is not int: - raise TypeError('"polynom" must be an integer') - - if polynom >= window: - raise ValueError('"polynom" must be less than "window"') - - half_window = window // 2 - polynom += 1 - - # Initialize variables - A = np.empty((window, polynom)) # Matrix - tA = np.empty((polynom, window)) # Transposed matrix - t = np.empty(window) # Local x variables - y_smoothed = np.full(len(y), np.nan) - - # Start smoothing - for i in range(half_window, len(x) - half_window, 1): - # Center a window of x values on x[i] - for j in range(0, window, 1): - t[j] = x[i + j - half_window] - x[i] - - # Create the initial matrix A and its transposed form tA - for j in range(0, window, 1): - r = 1.0 - for k in range(0, polynom, 1): - A[j, k] = r - tA[k, j] = r - r *= t[j] - - # Multiply the two matrices - tAA = np.matmul(tA, A) - - # Invert the product of the matrices - tAA = np.linalg.inv(tAA) - - # Calculate the pseudoinverse of the design matrix - coeffs = np.matmul(tAA, tA) - - # Calculate c0 which is also the y value for y[i] - y_smoothed[i] = 0 - for j in range(0, window, 1): - y_smoothed[i] += coeffs[0, j] * y[i + j - half_window] - - # If at the end or beginning, store all coefficients for the polynom - if i == half_window: - first_coeffs = np.zeros(polynom) - for j in range(0, window, 1): - for k in range(polynom): - first_coeffs[k] += coeffs[k, j] * y[j] - elif i == len(x) - half_window - 1: - last_coeffs = np.zeros(polynom) - for j in range(0, window, 1): - for k in range(polynom): - last_coeffs[k] += coeffs[k, j] * y[len(y) - window + j] - - # Interpolate the result at the left border - for i in range(0, half_window, 1): - y_smoothed[i] = 0 - x_i = 1 - for j in range(0, polynom, 1): - y_smoothed[i] += first_coeffs[j] * x_i - x_i *= x[i] - x[half_window] - - # Interpolate the result at the right border - for i in range(len(x) - half_window, len(x), 1): - y_smoothed[i] = 0 - x_i = 1 - for j in range(0, polynom, 1): - y_smoothed[i] += last_coeffs[j] * x_i - x_i *= x[i] - x[-half_window - 1] - - return y_smoothed diff --git a/postprocessor/processes/template.py b/postprocessor/processes/template.py deleted file mode 100644 index 6fad758f7be89e37d40d3ba15e7d6674e25ba56b..0000000000000000000000000000000000000000 --- a/postprocessor/processes/template.py +++ /dev/null @@ -1,31 +0,0 @@ -import numpy as np -import pandas as pd - -from agora.base import ParametersABC, ProcessABC - - -class TemplateParameters(ParametersABC): - """ - Parameters - """ - - def __init__( - self, - ): - super().__init__() - - @classmethod - def default(cls): - return cls.from_dict({}) - - -class Template(ProcessABC): - """ - Template for process class. - """ - - def __init__(self, parameters: TemplateParameters): - super().__init__(parameters) - - def run(self): - pass diff --git a/postprocessor/processor.py b/postprocessor/processor.py deleted file mode 100644 index 81c54b6f19f60df1316223df87ad8dbb883c1ef3..0000000000000000000000000000000000000000 --- a/postprocessor/processor.py +++ /dev/null @@ -1,316 +0,0 @@ -import h5py -from typing import List, Dict, Union -from pydoc import locate - -import numpy as np -import pandas as pd - -from tqdm import tqdm - -from agora.base import ParametersABC -from pcore.io.writer import Writer -from pcore.io.signal import Signal - -from pcore.cells import Cells -from postprocessor.core.processes.merger import mergerParameters, merger -from postprocessor.core.processes.picker import pickerParameters, picker - - -class PostProcessorParameters(ParametersABC): - """ - Anthology of parameters used for postprocessing - :merger: - :picker: parameters for picker - :processes: Dict processes:[objectives], 'processes' are defined in ./processes/ - while objectives are relative or absolute paths to datasets. If relative paths the - post-processed addresses are used. - - """ - - def __init__( - self, - targets={}, - parameters={}, - outpaths={}, - ): - self.targets: Dict = targets - self.parameters: Dict = parameters - self.outpaths: Dict = outpaths - - def __getitem__(self, item): - return getattr(self, item) - - @classmethod - def default(cls, kind=None): - if kind == "defaults" or kind == None: - return cls( - targets={ - "prepost": { - "merger": "/extraction/general/None/area", - "picker": ["/extraction/general/None/area"], - }, - "processes": ( - ( - "bud_metric", - [ - "/extraction/general/None/volume", - "/extraction/em_ratio/np_max/mean", - "/extraction/em_ratio/np_max/median", - ], - ), - ( - "dsignal", - [ - "/extraction/general/None/volume", - "/extraction/em_ratio/np_max/mean", - "/extraction/em_ratio/np_max/median", - "/extraction/em_ratio_bgsub/np_max/mean", - "/extraction/em_ratio_bgsub/np_max/median", - "/postprocessing/bud_metric/extraction_general_None_volume", - "/postprocessing/bud_metric/extraction_em_ratio_np_max_mean", - "/postprocessing/bud_metric/extraction_em_ratio_np_max_median", - ], - ), - ( - "aggregate", - [ - [ - "/extraction/general/None/volume", - "/extraction/em_ratio/np_max/mean", - "/extraction/em_ratio/np_max/median", - "/extraction/em_ratio_bgsub/np_max/mean", - "/extraction/em_ratio_bgsub/np_max/median", - "/extraction/gsum/np_max/median", - "/extraction/gsum/np_max/mean", - "postprocessing/bud_metric/extraction_general_None_volume", - "postprocessing/bud_metric/extraction_em_ratio_np_max_mean", - "postprocessing/bud_metric/extraction_em_ratio_np_max_median", - "postprocessing/dsignal/extraction_general_None_volume", - "postprocessing/dsignal/postprocessing_bud_metric_extraction_general_None_volume", - "postprocessing/dsignal/postprocessing_bud_metric_extraction_em_ratio_np_max_median", - "postprocessing/dsignal/postprocessing_bud_metric_extraction_em_ratio_np_max_mean", - ] - ], - ), - # "savgol": ["/extraction/general/None/area"], - ), - }, - parameters={ - "prepost": { - "merger": mergerParameters.default(), - "picker": pickerParameters.default(), - } - }, - outpaths={"aggregate": "/postprocessing/experiment_wide/aggregated/"}, - ) - - def to_dict(self): - return {k: _if_dict(v) for k, v in self.__dict__.items()} - - -class PostProcessor: - def __init__(self, filename, parameters): - self.parameters = parameters - self._filename = filename - self._signal = Signal(filename) - self._writer = Writer(filename) - - # self.outpaths = parameters["outpaths"] - self.merger = merger(parameters["parameters"]["prepost"]["merger"]) - - self.picker = picker( - parameters=parameters["parameters"]["prepost"]["picker"], - cells=Cells.from_source(filename), - ) - self.classfun = { - process: self.get_process(process) - for process, _ in parameters["targets"]["processes"] - } - self.parameters_classfun = { - process: self.get_parameters(process) - for process, _ in parameters["targets"]["processes"] - } - self.targets = parameters["targets"] - - @staticmethod - def get_process(process): - """ - Dynamically import a process class from the 'processes' folder. - Assumes process filename and class name are the same - """ - return locate("postprocessor.core.processes." + process + "." + process) - - @staticmethod - def get_parameters(process): - """ - Dynamically import a process class from the 'processes' folder. - Assumes process filename and class name are the same - """ - return locate( - "postprocessor.core.processes." + process + "." + process + "Parameters" - ) - - def run_prepost(self): - """Important processes run before normal post-processing ones""" - - merge_events = self.merger.run(self._signal[self.targets["prepost"]["merger"]]) - - with h5py.File(self._filename, "r") as f: - prev_idchanges = self._signal.get_merges() - - changes_history = list(prev_idchanges) + [np.array(x) for x in merge_events] - self._writer.write("modifiers/merges", data=changes_history) - - with h5py.File(self._filename, "a") as f: # TODO Remove this once done tweaking - if "modifiers/picks" in f: - del f["modifiers/picks"] - - indices = self.picker.run(self._signal[self.targets["prepost"]["picker"][0]]) - from collections import Counter - - mothers, daughters = np.array(self.picker.mothers), np.array( - self.picker.daughters - ) - self.tmp = [y for y in Counter([tuple(x) for x in mothers]).items() if y[1] > 2] - self._writer.write( - "postprocessing/lineage", - data=pd.MultiIndex.from_arrays( - np.append(mothers, daughters[:, 1].reshape(-1, 1), axis=1).T, - names=["trap", "mother_label", "daughter_label"], - ), - overwrite="overwrite", - ) - - # apply merge to mother-daughter - moset = set([tuple(x) for x in mothers]) - daset = set([tuple(x) for x in daughters]) - picked_set = set([tuple(x) for x in indices]) - with h5py.File(self._filename, "a") as f: - merge_events = f["modifiers/merges"][()] - merged_moda = set([tuple(x) for x in merge_events[:, 0, :]]).intersection( - set([*moset, *daset, *picked_set]) - ) - search = lambda a, b: np.where( - np.in1d( - np.ravel_multi_index(a.T, a.max(0) + 1), - np.ravel_multi_index(b.T, a.max(0) + 1), - ) - ) - - for target, source in merge_events: - if ( - tuple(source) in moset - ): # update mother to lowest positive index among the two - mother_ids = search(mothers, source) - mothers[mother_ids] = ( - target[0], - self.pick_mother(mothers[mother_ids][0][1], target[1]), - ) - if tuple(source) in daset: - daughters[search(daughters, source)] = target - if tuple(source) in picked_set: - indices[search(indices, source)] = target - - self._writer.write( - "postprocessing/lineage_merged", - data=pd.MultiIndex.from_arrays( - np.append(mothers, daughters[:, 1].reshape(-1, 1), axis=1).T, - names=["trap", "mother_label", "daughter_label"], - ), - overwrite="overwrite", - ) - - self._writer.write( - "modifiers/picks", - data=pd.MultiIndex.from_arrays( - indices.T, - names=["trap", "cell_label"], - ), - overwrite="overwrite", - ) - - @staticmethod - def pick_mother(a, b): - """Update the mother id following this priorities: - - The mother has a lower id - """ - x = max(a, b) - if min([a, b]): - x = [a, b][np.argmin([a, b])] - return x - - def run(self): - self.run_prepost() - - for process, datasets in tqdm(self.targets["processes"]): - if process in self.parameters["parameters"].get( - "processes", {} - ): # If we assigned parameters - parameters = self.parameters_classfun[process](self.parameters[process]) - - else: - parameters = self.parameters_classfun[process].default() - - loaded_process = self.classfun[process](parameters) - for dataset in datasets: - # print("Processing", process, "for", dataset) - - if isinstance(dataset, list): # multisignal process - signal = [self._signal[d] for d in dataset] - elif isinstance(dataset, str): - signal = self._signal[dataset] - else: - raise ("Incorrect dataset") - - result = loaded_process.run(signal) - - if process in self.parameters.to_dict()["outpaths"]: - outpath = self.parameters.to_dict()["outpaths"][process] - elif isinstance(dataset, list): - # If no outpath defined, place the result in the minimum common - # branch of all signals used - prefix = "".join( - prefix + c[0] - for c in takewhile( - lambda x: all(x[0] == y for y in x), zip(*dataset) - ) - ) - outpath = ( - prefix - + "_".join( # TODO check that it always finishes in '/' - [d[len(prefix) :].replace("/", "_") for d in dataset] - ) - ) - elif isinstance(dataset, str): - outpath = dataset[1:].replace("/", "_") - else: - raise ("Outpath not defined", type(dataset)) - - if process not in self.parameters.to_dict()["outpaths"]: - outpath = "/postprocessing/" + process + "/" + outpath - - if isinstance(result, dict): # Multiple Signals as output - for k, v in result: - self.write_result( - outpath + f"/{k}", - v, - metadata={}, - ) - else: - self.write_result( - outpath, - result, - metadata={}, - ) - - def write_result( - self, path: str, result: Union[List, pd.DataFrame, np.ndarray], metadata: Dict - ): - self._writer.write(path, result, meta=metadata) - - -def _if_dict(item): - if hasattr(item, "to_dict"): - item = item.to_dict() - return item