Skip to content
Snippets Groups Projects
Commit 9f9d2ba0 authored by pswain's avatar pswain
Browse files

docs for budding; re-write of budmetric

parent 4f302a13
No related branches found
No related tags found
No related merge requests found
...@@ -70,7 +70,7 @@ class Signal(BridgeH5): ...@@ -70,7 +70,7 @@ class Signal(BridgeH5):
@staticmethod @staticmethod
def add_name(df, name): def add_name(df, name):
"""Add column of identical strings to a dataframe.""" """TODO"""
df.name = name df.name = name
return df return df
......
#!/usr/bin/env python3 #!/usr/bin/env python3
import re
import typing as t
import numpy as np import numpy as np
import pandas as pd
from agora.io.bridge import groupsort from agora.io.bridge import groupsort
from itertools import groupby
def mb_array_to_dict(mb_array: np.ndarray): def mb_array_to_dict(mb_array: np.ndarray):
...@@ -19,4 +15,3 @@ def mb_array_to_dict(mb_array: np.ndarray): ...@@ -19,4 +15,3 @@ def mb_array_to_dict(mb_array: np.ndarray):
for trap, mo_da in groupsort(mb_array).items() for trap, mo_da in groupsort(mb_array).items()
for mo, daughters in groupsort(mo_da).items() for mo, daughters in groupsort(mo_da).items()
} }
...@@ -20,8 +20,9 @@ class BudMetricParameters(LineageProcessParameters): ...@@ -20,8 +20,9 @@ class BudMetricParameters(LineageProcessParameters):
class BudMetric(LineageProcess): class BudMetric(LineageProcess):
""" """
Requires mother-bud information to create a new dataframe where the indices are mother ids and Requires mother-bud information to create a new dataframe where the
values are the daughters' values for a given signal. indices are mother ids and values are the daughters' values for a
given signal.
""" """
def __init__(self, parameters: BudMetricParameters): def __init__(self, parameters: BudMetricParameters):
...@@ -38,7 +39,6 @@ class BudMetric(LineageProcess): ...@@ -38,7 +39,6 @@ class BudMetric(LineageProcess):
else: else:
assert "mother_label" in signal.index.names assert "mother_label" in signal.index.names
lineage = signal.index.to_list() lineage = signal.index.to_list()
return self.get_bud_metric(signal, mb_array_to_dict(lineage)) return self.get_bud_metric(signal, mb_array_to_dict(lineage))
@staticmethod @staticmethod
...@@ -48,7 +48,9 @@ class BudMetric(LineageProcess): ...@@ -48,7 +48,9 @@ class BudMetric(LineageProcess):
""" """
signal: Daughter-inclusive dataframe signal: Daughter-inclusive dataframe
md: Mother-daughters dictionary where key is mother's index and its values are a list of daughter indices md: dictionary where key is mother's index,
defined as (trap, cell_label), and its values are a list of
daughter indices, as (trap, cell_label).
Get fvi (First Valid Index) for all cells Get fvi (First Valid Index) for all cells
Create empty matrix Create empty matrix
...@@ -61,63 +63,81 @@ class BudMetric(LineageProcess): ...@@ -61,63 +63,81 @@ class BudMetric(LineageProcess):
Convert matrix into dataframe using mother indices Convert matrix into dataframe using mother indices
""" """
mothers_mat = np.zeros((len(md), signal.shape[1]))
cells_were_dropped = 0 # Flag determines if mothers (1), daughters (2) or both were missing (3)
md_index = signal.index md_index = signal.index
if ( # md_index should only comprise (trap, cell_label)
"mother_label" not in md_index.names if "mother_label" not in md_index.names:
): # Generate mother label from md dict if unavailable # dict with daughter indices as keys
d = {v: k for k, values in md.items() for v in values} d = {v: k for k, values in md.items() for v in values}
# generate mother_label in signal using the mother's cell_label
signal["mother_label"] = list( signal["mother_label"] = list(
map(lambda x: d.get(x, [0])[-1], signal.index) map(lambda x: d.get(x, [0])[-1], signal.index)
) )
signal.set_index("mother_label", append=True, inplace=True) signal.set_index("mother_label", append=True, inplace=True)
related_items = set( # combine mothers and daughter indices
[*md.keys(), *[y for x in md.values() for y in x]] mothers_index = md.keys()
) daughters_index = [y for x in md.values() for y in x]
md_index = md_index.intersection(related_items) relations = set([*mothers_index, *daughters_index])
elif "mother_label" in md_index.names: # keep from md_index only mother and daughters
md_index = md_index.droplevel("mother_label") md_index = md_index.intersection(relations)
else: else:
raise ("Unavailable relationship information") md_index = md_index.droplevel("mother_label")
if len(md_index) < len(signal): if len(md_index) < len(signal):
print("Dropped cells before bud_metric") # TODO log print("Dropped cells before applying bud_metric") # TODO log
# restrict signal to the cells in md_index, moving mother_label to do so
signal = ( signal = (
signal.reset_index("mother_label") signal.reset_index("mother_label")
.loc(axis=0)[md_index] .loc(axis=0)[md_index]
.set_index("mother_label", append=True) .set_index("mother_label", append=True)
) )
# restrict to daughters: cells with a mother
names = list(signal.index.names) mother_labels = signal.index.get_level_values("mother_label")
del names[-2] daughter_df = signal.loc[mother_labels > 0]
# join data for daughters with the same mother
output_df = ( output_df = daughter_df.groupby(["trap", "mother_label"]).apply(
signal.loc[signal.index.get_level_values("mother_label") > 0] lambda x: _combine_daughter_tracks(x)
.groupby(names)
.apply(lambda x: _combine_daughter_tracks(x))
) )
output_df.columns = signal.columns output_df.columns = signal.columns
output_df["padding_level"] = 0 output_df["padding_level"] = 0
output_df.set_index("padding_level", append=True, inplace=True) output_df.set_index("padding_level", append=True, inplace=True)
if len(output_df): if len(output_df):
output_df.index.names = signal.index.names output_df.index.names = signal.index.names
return output_df return output_df
def _combine_daughter_tracks(tracks: t.Collection[pd.Series]): def _combine_daughter_tracks(tracks: pd.DataFrame):
""" """
Combine multiple time series of cells into one, overwriting values Combine multiple time series of daughter cells into one time series.
prioritising the most recent entity.
At any one time, a mother cell should have only one daughter.
Two daughters are still sometimes present at the same time point, and we
then choose the daughter that appears first.
TODO We need to fix examples with more than one daughter at a time point.
Parameters
----------
tracks: a Signal
Data for all daughters, which are distinguished by different cell_labels,
for a particular trap and mother_label.
""" """
sorted_da_ids = tracks.sort_index(level="cell_label") # sort by daughter IDs
sorted_da_ids.index = range(len(sorted_da_ids)) bud_df = tracks.sort_index(level="cell_label")
tp_fvt = sorted_da_ids.apply(lambda x: x.first_valid_index(), axis=0) # remove multi-index
tp_fvt = sorted_da_ids.columns.get_indexer(tp_fvt) bud_df.index = range(len(bud_df))
tp_fvt[tp_fvt < 0] = len(sorted_da_ids) - 1 # find which row of sorted_df has the daughter for each time point
tp_fvt: pd.Series = bud_df.apply(lambda x: x.first_valid_index(), axis=0)
_metric = np.choose(tp_fvt, sorted_da_ids.values) # combine data for all daughters
return pd.Series(_metric, index=tracks.columns) combined_tracks = np.nan * np.ones(tracks.columns.size)
for bud_row in np.unique(tp_fvt.dropna().values).astype(int):
ilocs = np.where(tp_fvt.values == bud_row)[0]
combined_tracks[ilocs] = bud_df.values[bud_row, ilocs]
# TODO delete old version
tp_fvt = bud_df.columns.get_indexer(tp_fvt)
tp_fvt[tp_fvt == -1] = len(bud_df) - 1
old = np.choose(tp_fvt, bud_df.values)
assert (
(combined_tracks == old) | (np.isnan(combined_tracks) & np.isnan(old))
).all(), "yikes"
return pd.Series(combined_tracks, index=tracks.columns)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment