From 95848a069e9b710444e7a54dc95dfad5d88717ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Al=C3=A1n=20Mu=C3=B1oz?= <amuoz@ed.ac.uk> Date: Tue, 23 Nov 2021 17:34:03 +0000 Subject: [PATCH] add changes for 3rd year report --- pcore/grouper.py | 62 ++++++++++------- scripts/check_aggregated_metrics.py | 53 +++++++++++++++ scripts/explore_h5.py | 43 ++++++++++++ scripts/group.py | 11 +++ scripts/group2.py | 101 ++++++++++++++++++++++++++++ scripts/postprocess.py | 18 +++++ scripts/postprocess_folder.py | 91 ++----------------------- 7 files changed, 270 insertions(+), 109 deletions(-) create mode 100644 scripts/check_aggregated_metrics.py create mode 100644 scripts/explore_h5.py create mode 100644 scripts/group.py create mode 100644 scripts/group2.py create mode 100644 scripts/postprocess.py diff --git a/pcore/grouper.py b/pcore/grouper.py index 0fa5e619..9f61fc6f 100644 --- a/pcore/grouper.py +++ b/pcore/grouper.py @@ -37,15 +37,22 @@ class Grouper(ABC): def group_names(): pass - def concat_signal(self, path, reduce_cols=None, axis=0): + def concat_signal(self, path, reduce_cols=None, axis=0, pool=8): group_names = self.group_names sitems = self.signals.items() - with Pool(8) as p: - signals = p.map( - lambda x: concat_signal_ind(path, group_names, x[0], x[1]), - sitems, - ) - + if pool: + with Pool(pool) as p: + signals = p.map( + lambda x: concat_signal_ind(path, group_names, x[0], x[1]), + sitems, + ) + else: + signals = [ + concat_signal_ind(path, group_names, name, signal) + for name, signal in sitems + ] + + signals = [s for s in signals if s is not None] sorted = pd.concat(signals, axis=axis).sort_index() if reduce_cols: sorted = sorted.apply(np.nanmean, axis=1) @@ -95,22 +102,26 @@ class NameGrouper(Grouper): return self._group_names - def aggregate_multisignals(self, paths=None): + def aggregate_multisignals(self, paths=None, **kwargs): aggregated = pd.concat( - [self.concat_signal(path, reduce_cols=np.nanmean) for path in paths], axis=1 - ) - ph = pd.Series( [ - self.ph_from_group(x[list(aggregated.index.names).index("group")]) - for x in aggregated.index + self.concat_signal(path, reduce_cols=np.nanmean, **kwargs) + for path in paths ], - index=aggregated.index, - name="media_pH", + axis=1, ) - self.aggregated = pd.concat((aggregated, ph), axis=1) + # ph = pd.Series( + # [ + # self.ph_from_group(x[list(aggregated.index.names).index("group")]) + # for x in aggregated.index + # ], + # index=aggregated.index, + # name="media_pH", + # ) + # self.aggregated = pd.concat((aggregated, ph), axis=1) - return self.aggregated + return aggregated class phGrouper(NameGrouper): @@ -152,10 +163,13 @@ class phGrouper(NameGrouper): def concat_signal_ind(path, group_names, group, signal): print("Looking at ", group) - combined = signal[path] - combined["position"] = group - combined["group"] = group_names[group] - combined.set_index(["group", "position"], inplace=True, append=True) - combined.index = combined.index.swaplevel(-2, 0).swaplevel(-1, 1) - - return combined + try: + combined = signal[path] + combined["position"] = group + combined["group"] = group_names[group] + combined.set_index(["group", "position"], inplace=True, append=True) + combined.index = combined.index.swaplevel(-2, 0).swaplevel(-1, 1) + + return combined + except: + return None diff --git a/scripts/check_aggregated_metrics.py b/scripts/check_aggregated_metrics.py new file mode 100644 index 00000000..c2ea39d3 --- /dev/null +++ b/scripts/check_aggregated_metrics.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 + + +folder = Path( + "/home/alan/Documents/dev/stoa_libs/pipeline-core/data/2021_11_04_doseResponse_raf_1_15_2_glu_01_2_dual_phluorin_whi5_constantMedia_00/2021_11_04_doseResponse_raf_1_15_2_glu_01_2_dual_phluorin_whi5_constantMedia_00/" +) +g = NameGrouper(folder) +# g.signals = {k: v for k, v in g.signals.items() if str(v) != filename} +# signame = "extraction/general/None/volume" +# signame = "extraction/em_ratio_bgsub/np_max/median" +# signame = ( +# "postprocessing/dsignal/postprocessing_bud_metric_extraction_general_None_volume" +# ) + +c = g.concat_signal(signame) +d = c.loc[c.notna().sum(axis=1) > 50] +sns.lineplot( + data=d.melt(ignore_index=False).reset_index(), x="variable", y="value", hue="group" +) +plt.show() + +# Aggregated metrics +signame = "postprocessing/experiment_wide/aggregated" +grid = sns.pairplot( + c[[i for i in c.columns if i.endswith("mean")]].reset_index(level="group"), + plot_kws=dict(alpha=0.5), + hue="group", +) +for ax in grid.axes.flat[:2]: + ax.tick_params(axis="x", labelrotation=90) + ax.tick_params(axis="y", labelrotation=90) + +plt.savefig("pairplot.png", dpi=400) +plt.show() + +for s in g.signals.values(): + with h5py.File(s.filename, "r") as f: + if "postprocessing/experiment_wide" not in f: + print(s.filename) + +filepath = "/shared_libs/pydask/pipeline-core/data/bak_kcl/YST_1510_009.h5" +with h5py.File(filepath, "a") as f: + print(f["postprocessing"].keys()) + # if "postprocessing" in f: + # del f["/postprocessing"] + # if "modifiers" in f: + # del f["/modifiers"] + + params = PostProcessorParameters.default() + pp = PostProcessor(filepath, params) + pp.run() + s = Signal(filepath) + s.datasets diff --git a/scripts/explore_h5.py b/scripts/explore_h5.py new file mode 100644 index 00000000..22d09e39 --- /dev/null +++ b/scripts/explore_h5.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python3 +from pcore.io.signal import Signal + +fpath = "/home/alan/Documents/dev/stoa_libs/pipeline-core/data/2021_11_04_doseResponse_raf_1_15_2_glu_01_2_dual_phluorin_whi5_constantMedia_00/2021_11_04_doseResponse_raf_1_15_2_glu_01_2_dual_phluorin_whi5_constantMedia_00/raf_15_s17.h5" + +s = Signal(fpath) + +import numpy as np +import seaborn as sns +from matplotlib import pyplot as plt + +# df = s["/extraction/general/None/area"] +df = 1 / s["extraction/GFPFast/np_max/median"] +df = df.div(df.mean(axis=1), axis=0) + +vol = s["postprocessing/bud_metric/extraction_general_None_volume"] +vol = vol.div(vol.mean(axis=1), axis=0) +# sns.heatmap(df.sort_index(), robust=True) +# plt.show() + + +import pandas as pd + +n = 3 +fig, axes = plt.subplots(n, 1) +trap_ids = df.index.get_level_values("trap") +trapn = trap_ids[np.random.randint(len(trap_ids), size=n)] +for i, t in enumerate(trapn): + subdf = df.loc[t].melt(ignore_index=False).reset_index() + subdf["ch"] = "ratio" + voldf = vol.loc[t].melt(ignore_index=False).reset_index() + voldf["ch"] = "vol" + combined = pd.concat((voldf, subdf), axis=0) + sns.scatterplot( + data=combined, + x="variable", + y="value", + ax=axes[i], + hue="ch", + palette="Set2", + style="cell_label", + ) +plt.show() diff --git a/scripts/group.py b/scripts/group.py new file mode 100644 index 00000000..d445ff27 --- /dev/null +++ b/scripts/group.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python3 +from pathlib import Path + +from pcore.grouper import NameGrouper + +path = Path( + "/home/alan/Documents/dev/stoa_libs/pipeline-core/data/2021_11_04_doseResponse_raf_1_15_2_glu_01_2_dual_phluorin_whi5_constantMedia_00/2021_11_04_doseResponse_raf_1_15_2_glu_01_2_dual_phluorin_whi5_constantMedia_00" +) +ng = NameGrouper(path) +# agg = ng.aggregate_multisignals(["extraction/general/None/area"], pool=0) +agg = ng.concat_signal("extraction/general/None/area") diff --git a/scripts/group2.py b/scripts/group2.py new file mode 100644 index 00000000..21632d6e --- /dev/null +++ b/scripts/group2.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python3 + + +from pcore.grouper import NameGrouper + +folder = Path( + "/home/alan/Documents/dev/stoa_libs/pipeline-core/data/2021_11_04_doseResponse_raf_1_15_2_glu_01_2_dual_phluorin_whi5_constantMedia_00/2021_11_04_doseResponse_raf_1_15_2_glu_01_2_dual_phluorin_whi5_constantMedia_00/" +) +g = NameGrouper(folder) +# g.signals = {k: v for k, v in g.signals.items() if str(v) != filename} +# signame = "postprocessing/experiment_wide/aggregated" +# signame = "extraction/general/None/volume" + + +def get_df(signame): + c = g.concat_signal(signame) + d = c.loc[c.notna().sum(axis=1) > 60] + # for i in c.columns: + # if "ratio" in i: + # c[i] = 1 / c[i] + + # splits = [x.split("_") for x in c.columns] + # new = [ + # [ + # x + # for x in s + # if x + # not in [ + # "general", + # "metric", + # "postprocessing", + # "extraction", + # "None", + # "np", + # "max", + # "em", + # ] + # ] + # for s in splits + # ] + # joint = ["_".join(n) for n in new] + # c.columns = joint + + return d + + +signame = ( + "postprocessing/dsignal/postprocessing_bud_metric_extraction_general_None_volume" +) +signame = "extraction/mCherry/np_max/max2p5pc" +window = 15 +whi5 = get_df(signame) +whi5 = whi5.div(whi5.mean(axis=1), axis=0) +whi5_movavg = whi5.apply(lambda x: pd.Series(moving_average(x.values, window)), axis=1) +ph = 1 / get_df("extraction/em_ratio/np_max/median") +ph = ph.div(ph.mean(axis=1), axis=0) +ph_movavg = ph.apply(lambda x: pd.Series(moving_average(x.values, window)), axis=1) +ph_norm = ph.iloc(axis=1)[window // 2 : -window // 2] / ph_movavg +whi5_norm = whi5.iloc(axis=1)[window // 2 : -window // 2] / whi5_movavg +rand = np.random.randint(len(whi5), size=4) + +melted = ph_norm.iloc[rand].melt(ignore_index=False).reset_index() +melted["signal"] = "ph" +combined = whi5_norm.iloc[rand].melt(ignore_index=False).reset_index() +combined["signal"] = "whi5" +combined = pd.concat((melted, combined)) +combined["t_id"] = [str(x) + y for x, y in zip(combined["trap"], combined["position"])] +h = sns.FacetGrid(combined, col="t_id", col_wrap=2) +h.map_dataframe(sns.scatterplot, x="variable", y="value", hue="signal") +h.add_legend() +plt.show() + + +def moving_average(a, n=3): + ret = np.cumsum(a, dtype=float) + ret[n:] = ret[n:] - ret[:-n] + return ret[n - 1 :] / n + + +import numpy as np + +import seaborn as sns +from matplotlib import pyplot as plt + +sns.set_style("darkgrid") +sns.lineplot( + data=d.melt(ignore_index=False).reset_index("group"), + x="variable", + y="value", + # y="gsum_median_mean", + # y="dsignal_postprocessing_bud_metric_extraction_general_None_volume_max", + # y="dsignal_extraction_general_None_volume_max", + # hue="position", + hue="group", + # size="general_volume", + # alpha=0.5, + palette="muted", + # ci=None, +) +# plt.xlim((0, 5)) +plt.show() diff --git a/scripts/postprocess.py b/scripts/postprocess.py new file mode 100644 index 00000000..5b983551 --- /dev/null +++ b/scripts/postprocess.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python3 + +import h5py +from postprocessor.core.processor import PostProcessorParameters, PostProcessor + +from pathlib import Path +dpath = Path("/home/alan/Documents/dev/stoa_libs/pipeline-core/data/2021_11_04_doseResponse_raf_1_15_2_glu_01_2_dual_phluorin_whi5_constantMedia_00/2021_11_04_doseResponse_raf_1_15_2_glu_01_2_dual_phluorin_whi5_constantMedia_00/") + +for fpath in fpath.glob('*.h5'): + with h5py.File(fpath, "a") as f: + if "postprocessing" in f: + del f["/postprocessing"] + if "modifiers" in f: + del f["/modifiers"] + + params = PostProcessorParameters.default().to_dict() + pp = PostProcessor(fpath, params) + pp.run() diff --git a/scripts/postprocess_folder.py b/scripts/postprocess_folder.py index 9c6b4f5a..b91e8c58 100644 --- a/scripts/postprocess_folder.py +++ b/scripts/postprocess_folder.py @@ -3,11 +3,13 @@ from pathlib import Path import h5py from postprocessor.core.processor import PostProcessorParameters, PostProcessor -from core.io.signal import Signal +from pcore.io.signal import Signal from pathos.multiprocessing import Pool -folder = "/shared_libs/pydask/pipeline-core/data/bak_kcl/" +folder = Path( + "/home/alan/Documents/dev/stoa_libs/pipeline-core/data/2021_11_04_doseResponse_raf_1_15_2_glu_01_2_dual_phluorin_whi5_constantMedia_00/2021_11_04_doseResponse_raf_1_15_2_glu_01_2_dual_phluorin_whi5_constantMedia_00/" +) def process_file(filepath): @@ -25,92 +27,11 @@ def process_file(filepath): s = Signal(filepath) # s.datasets # df = s["/extraction/general/None/area"] + except Exception as e: print(filepath, " failed") + print(e) with Pool(10) as p: results = p.map(lambda x: process_file(x), Path(folder).rglob("*.h5")) - - -from core.grouper import NameGrouper - -g = NameGrouper(folder) -# g.signals = {k: v for k, v in g.signals.items() if str(v) != filename} -signame = "postprocessing/experiment_wide/aggregated" -shortname = "aggregated" -c = g.concat_signal(signame) -for i in c.columns: - if "ratio" in i: - c[i] = 1 / c[i] - -splits = [x.split("_") for x in c.columns] -new = [ - [ - x - for x in s - if x - not in [ - "general", - "metric", - "postprocessing", - "extraction", - "None", - "np", - "max", - "em", - ] - ] - for s in splits -] -joint = ["_".join(n) for n in new] -c.columns = joint -# cf = c.loc[ -# c["dsignal_postprocessing_bud_metric_extraction_general_None_volume"].notna() -# ] -import seaborn as sns -from matplotlib import pyplot as plt - -sns.set_style("darkgrid") -sns.lmplot( - data=c.reset_index(), - x="em_ratio_median_mean", - # y="gsum_median_mean", - # y="dsignal_postprocessing_bud_metric_extraction_general_None_volume_max", - # y="dsignal_extraction_general_None_volume_max", - # hue="position", - hue="group", - # size="general_volume", - # alpha=0.5, - palette="muted", - ci=None, -) -# plt.xlim((0, 5)) -plt.show() - -grid = sns.pairplot(c[[i for i in c.columns if i.endswith("max")]]) -for ax in grid.axes.flat[:2]: - ax.tick_params(axis="x", labelrotation=90) - ax.tick_params(axis="y", labelrotation=90) - -plt.savefig("pairplot.png", dpi=400) -plt.show() - -for s in g.signals.values(): - with h5py.File(s.filename, "r") as f: - if "postprocessing/experiment_wide" not in f: - print(s.filename) - -filepath = "/shared_libs/pydask/pipeline-core/data/bak_kcl/YST_1510_009.h5" -with h5py.File(filepath, "a") as f: - print(f["postprocessing"].keys()) - # if "postprocessing" in f: - # del f["/postprocessing"] - # if "modifiers" in f: - # del f["/modifiers"] - - params = PostProcessorParameters.default() - pp = PostProcessor(filepath, params) - pp.run() - s = Signal(filepath) - s.datasets -- GitLab