diff --git a/core/processes/aggregate.py b/core/processes/aggregate.py new file mode 100644 index 0000000000000000000000000000000000000000..87efbe2646f51654e6366a6365586c963b6fe0b8 --- /dev/null +++ b/core/processes/aggregate.py @@ -0,0 +1,65 @@ +from itertools import cycle + +import numpy as np +import pandas as pd + +from agora.base import ParametersABC, ProcessABC + + +class aggregateParameters(ParametersABC): + """ + Parameters + reduction: str to be passed to a dataframe for collapsing across columns + """ + + def __init__(self, reduction): + super().__init__() + self.reduction = reduction + + @classmethod + def default(cls): + return cls.from_dict({"reduction": "median"}) + + +class aggregate(ProcessABC): + """ + aggregate multiple datasets + """ + + def __init__(self, parameters: aggregateParameters): + super().__init__(parameters) + + def run(self, signals): + names = np.array([signal.index.names for signal in signals]) + if not np.all(names == names[0]): + "Not all indices are the same, selecting smallest set" + index = signals[0].index + for s in signals[0:]: + index = index.intersection(s.index) + + signals = [s.loc[index] for s in signals] + + assert len(signals), "Signals is empty" + + bad_words = { + "postprocessing", + "extraction", + "None", + "np", + "general", + } + get_keywords = lambda df: [ + ind + for item in df.name.split("/") + for ind in item.split("/") + if ind not in bad_words + ] + colnames = ["_".join(get_keywords(s)) for s in signals] + concat = pd.concat( + [getattr(signal, self.parameters.reduction)(axis=1) for signal in signals], + names=signals[0].index.names, + axis=1, + ) + concat.columns = colnames + + return concat diff --git a/core/processes/bud_metric.py b/core/processes/bud_metric.py new file mode 100644 index 0000000000000000000000000000000000000000..518ddf5231c96458763fca35f52762def97e8b73 --- /dev/null +++ b/core/processes/bud_metric.py @@ -0,0 +1,65 @@ +import numpy as np +import pandas as pd +from agora.base import ParametersABC, ProcessABC + + +class bud_metricParameters(ParametersABC): + """ + Parameters + """ + + def __init__(self, mode="longest"): + super().__init__() + self.mode = mode + + @classmethod + def default(cls): + return cls.from_dict({"mode": "longest"}) + + +class bud_metric(ProcessABC): + """ + Obtain the volume of daughter cells + if 'longest' assumes a single mother per trap. + """ + + def __init__(self, parameters: bud_metricParameters): + super().__init__(parameters) + + def run(self, signal: pd.DataFrame): + if self.parameters.mode is "longest": + result = self.get_bud_metric_wrap(signal) + + return result + + @staticmethod + def get_bud_metric(signal): + mother_id = signal.index[signal.notna().sum(axis=1).argmax()] + + nomother = signal.drop(mother_id) + + starts = nomother.apply(pd.Series.first_valid_index, axis=1).sort_values() + + ranges = [np.arange(i, j) for i, j in zip(starts[:-1], starts[1:])] + ranges.append(np.arange(starts.iloc[-1], signal.columns[-1])) + + bud_metric = pd.concat( + [signal.loc[i, rng] for i, rng in zip(starts.index, ranges)] + ) + srs = pd.Series(bud_metric, index=signal.columns, name=mother_id) + + return srs + + def get_bud_metric_wrap(self, signals): + srs = [ + self.get_bud_metric(signals.loc[trap]) + for trap in signals.index.unique(level="trap") + ] + index = [ + (trap, mother.name) + for trap, mother in zip(signals.index.unique(level="trap"), srs) + ] + + concatenated = pd.concat(srs, keys=index, axis=1, sort=True).T.sort_index() + concatenated.index.names = ["trap", "cell_label"] + return concatenated