diff --git a/pcore/grouper.py b/pcore/grouper.py index 8da2f67ac08826314a5aed0b8f8a281ca072377e..0fa5e6194cb323ed0a5a4ba581d30342b89518c1 100644 --- a/pcore/grouper.py +++ b/pcore/grouper.py @@ -2,6 +2,7 @@ from abc import ABC, abstractmethod, abstractproperty from pathlib import Path +from pathos.multiprocessing import Pool import h5py import numpy as np @@ -9,9 +10,6 @@ import pandas as pd from pcore.io.signal import Signal -# fname = "/shared_libs/pipeline-core/data/2021_04_19_pH_calibration_dual_phl__ura8__by4741_Alan4_00" -fname = "/shared_libs/pydask/pipeline-core/data/2021_08_21_KCl_pH_00/" - class Grouper(ABC): """ @@ -40,15 +38,13 @@ class Grouper(ABC): pass def concat_signal(self, path, reduce_cols=None, axis=0): - signals = [] - for group, signal in self.signals.items(): - print("looking at", signal.filename) - combined = signal[path] - combined["position"] = group - combined["group"] = self.group_names[group] - combined.set_index(["group", "position"], inplace=True, append=True) - combined.index = combined.index.swaplevel(-2, 0).swaplevel(-1, 1) - signals.append(combined) + group_names = self.group_names + sitems = self.signals.items() + with Pool(8) as p: + signals = p.map( + lambda x: concat_signal_ind(path, group_names, x[0], x[1]), + sitems, + ) sorted = pd.concat(signals, axis=axis).sort_index() if reduce_cols: @@ -154,28 +150,12 @@ class phGrouper(NameGrouper): return aggregated -# g = NameGrouper(fname) -# signame = "/extraction/em_ratio/np_max/mean" -# shortname = "_".join((signame.split("/")[2], signame.split("/")[4])) -# c = g.concat_signal(signame) -# d = c[c.notna().sum(axis=1) > c.shape[1] * 0.8] -# e = d.melt(var_name="tp", ignore_index=False, value_name=shortname).reset_index() -# e[shortname] = 1 / e[shortname] - -# Plot comparable to Ivan's -# sns.lineplot( -# data=e, -# x="tp", -# y=shortname, -# hue="group", -# palette=["blue", "orange", "yellow", "purple", "green"], -# ) -# plt.title(signame) -# plt.ylabel(shortname) -# plt.show() - -# Check if traplocs make sense -# for traplocs in tlocs.values(): -# x, y = list(zip(*traplocs)) -# plt.scatter(x, y) -# plt.show() +def concat_signal_ind(path, group_names, group, signal): + print("Looking at ", group) + combined = signal[path] + combined["position"] = group + combined["group"] = group_names[group] + combined.set_index(["group", "position"], inplace=True, append=True) + combined.index = combined.index.swaplevel(-2, 0).swaplevel(-1, 1) + + return combined diff --git a/setup.py b/setup.py index a344ae9df77883c990aedc65ae45f4e69b10ee44..e6e978a7687368346021360b9b580a198caf8a35 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ from setuptools import setup, find_packages -print("find_packages outputs ", find_packages('pcore')) +print("find_packages outputs ", find_packages("pcore")) setup( name="pipeline-core", version="0.1.1-dev", @@ -27,6 +27,7 @@ setup( "tables", "imageio==2.8.0", "omero-py>=5.6.2", + "pathos", "zeroc-ice==3.6.5", "tensorflow>=1.15,<=2.3", "baby@git+ssh://git@git.ecdf.ed.ac.uk/swain-lab/python-pipeline/baby.git@master",