Skip to content
Snippets Groups Projects
Commit 6a9fc718 authored by Alán Muñoz's avatar Alán Muñoz
Browse files

add aggregate

Former-commit-id: e52f6431728e6baa44cd9fdd5493cc6563e2e085
parent 6d38194c
No related branches found
No related tags found
No related merge requests found
from itertools import cycle
import numpy as np
import pandas as pd
from agora.base import ParametersABC, ProcessABC
class aggregateParameters(ParametersABC):
"""
Parameters
reduction: str to be passed to a dataframe for collapsing across columns
"""
def __init__(self, reduction):
super().__init__()
self.reduction = reduction
@classmethod
def default(cls):
return cls.from_dict({"reduction": "median"})
class aggregate(ProcessABC):
"""
aggregate multiple datasets
"""
def __init__(self, parameters: aggregateParameters):
super().__init__(parameters)
def run(self, signals):
names = np.array([signal.index.names for signal in signals])
if not np.all(names == names[0]):
"Not all indices are the same, selecting smallest set"
index = signals[0].index
for s in signals[0:]:
index = index.intersection(s.index)
signals = [s.loc[index] for s in signals]
assert len(signals), "Signals is empty"
bad_words = {
"postprocessing",
"extraction",
"None",
"np",
"general",
}
get_keywords = lambda df: [
ind
for item in df.name.split("/")
for ind in item.split("/")
if ind not in bad_words
]
colnames = ["_".join(get_keywords(s)) for s in signals]
concat = pd.concat(
[getattr(signal, self.parameters.reduction)(axis=1) for signal in signals],
names=signals[0].index.names,
axis=1,
)
concat.columns = colnames
return concat
import numpy as np
import pandas as pd
from agora.base import ParametersABC, ProcessABC
class bud_metricParameters(ParametersABC):
"""
Parameters
"""
def __init__(self, mode="longest"):
super().__init__()
self.mode = mode
@classmethod
def default(cls):
return cls.from_dict({"mode": "longest"})
class bud_metric(ProcessABC):
"""
Obtain the volume of daughter cells
if 'longest' assumes a single mother per trap.
"""
def __init__(self, parameters: bud_metricParameters):
super().__init__(parameters)
def run(self, signal: pd.DataFrame):
if self.parameters.mode is "longest":
result = self.get_bud_metric_wrap(signal)
return result
@staticmethod
def get_bud_metric(signal):
mother_id = signal.index[signal.notna().sum(axis=1).argmax()]
nomother = signal.drop(mother_id)
starts = nomother.apply(pd.Series.first_valid_index, axis=1).sort_values()
ranges = [np.arange(i, j) for i, j in zip(starts[:-1], starts[1:])]
ranges.append(np.arange(starts.iloc[-1], signal.columns[-1]))
bud_metric = pd.concat(
[signal.loc[i, rng] for i, rng in zip(starts.index, ranges)]
)
srs = pd.Series(bud_metric, index=signal.columns, name=mother_id)
return srs
def get_bud_metric_wrap(self, signals):
srs = [
self.get_bud_metric(signals.loc[trap])
for trap in signals.index.unique(level="trap")
]
index = [
(trap, mother.name)
for trap, mother in zip(signals.index.unique(level="trap"), srs)
]
concatenated = pd.concat(srs, keys=index, axis=1, sort=True).T.sort_index()
concatenated.index.names = ["trap", "cell_label"]
return concatenated
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment