Skip to content
Snippets Groups Projects
Commit bbf30f85 authored by Alán Muñoz's avatar Alán Muñoz
Browse files

[WIP]fix(chainer): refresh standard processing

parent e1c36f24
No related branches found
No related tags found
No related merge requests found
...@@ -47,20 +47,25 @@ class Signal(BridgeH5): ...@@ -47,20 +47,25 @@ class Signal(BridgeH5):
def __getitem__(self, dsets: t.Union[str, t.Collection]): def __getitem__(self, dsets: t.Union[str, t.Collection]):
"""Get and potentially pre-process data from h5 file and return as a dataframe.""" """Get and potentially pre-process data from h5 file and return as a dataframe."""
if isinstance(dsets, str): # no pre-processing if isinstance(dsets, str): # no pre-processing
df = self.apply_prepost(dsets) return self.get(dsets)
return self.add_name(df, dsets)
elif isinstance(dsets, list): # pre-processing elif isinstance(dsets, list): # pre-processing
is_bgd = [dset.endswith("imBackground") for dset in dsets] is_bgd = [dset.endswith("imBackground") for dset in dsets]
# Check we are not comparing tile-indexed and cell-indexed data # Check we are not comparing tile-indexed and cell-indexed data
assert sum(is_bgd) == 0 or sum(is_bgd) == len( assert sum(is_bgd) == 0 or sum(is_bgd) == len(
dsets dsets
), "Tile data and cell data can't be mixed" ), "Tile data and cell data can't be mixed"
return [ return [self.get(dset) for dset in dsets]
self.add_name(self.apply_prepost(dset), dset) for dset in dsets
]
else: else:
raise Exception(f"Invalid type {type(dsets)} to get datasets") raise Exception(f"Invalid type {type(dsets)} to get datasets")
def get(self, dsets: t.Union[str, t.Collection], **kwargs):
"""Get and potentially pre-process data from h5 file and return as a dataframe."""
if isinstance(dsets, str): # no pre-processing
df = get_raw(dsets, **kwargs)
prepost_applied = self.apply_prepost(dsets, **kwargs)
return self.add_name(prepost_applied, dsets)
@staticmethod @staticmethod
def add_name(df, name): def add_name(df, name):
"""Add column of identical strings to a dataframe.""" """Add column of identical strings to a dataframe."""
...@@ -129,18 +134,24 @@ class Signal(BridgeH5): ...@@ -129,18 +134,24 @@ class Signal(BridgeH5):
Returns an array with three columns: the tile id, the mother label, and the daughter label. Returns an array with three columns: the tile id, the mother label, and the daughter label.
""" """
if lineage_location is None: if lineage_location is None:
lineage_location = "postprocessing/lineage_merged" lineage_location = "modifiers/lineage_merged"
with h5py.File(self.filename, "r") as f: with h5py.File(self.filename, "r") as f:
# if lineage_location not in f:
# lineage_location = lineage_location.split("_")[0]
if lineage_location not in f: if lineage_location not in f:
lineage_location = f[lineage_location.split("_")[0]] lineage_location = "postprocessor/lineage"
tile_mo_da = f[lineage_location.split("_")[0]] tile_mo_da = f[lineage_location]
lineage = np.array(
( if isinstance(tile_mo_da, h5py.Dataset):
tile_mo_da["trap"], lineage = tile_mo_da[()]
tile_mo_da["mother_label"], else:
tile_mo_da["daughter_label"], lineage = np.array(
) (
).T tile_mo_da["trap"],
tile_mo_da["mother_label"],
tile_mo_da["daughter_label"],
)
).T
return lineage return lineage
@_first_arg_str_to_df @_first_arg_str_to_df
......
...@@ -5,6 +5,7 @@ from copy import copy ...@@ -5,6 +5,7 @@ from copy import copy
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from sklearn.cluster import KMeans from sklearn.cluster import KMeans
from agora.utils.indexing import validate_association
index_row = t.Tuple[str, str, int, int] index_row = t.Tuple[str, str, int, int]
...@@ -175,3 +176,67 @@ def drop_mother_label(index: pd.MultiIndex) -> np.ndarray: ...@@ -175,3 +176,67 @@ def drop_mother_label(index: pd.MultiIndex) -> np.ndarray:
def get_index_as_np(signal: pd.DataFrame): def get_index_as_np(signal: pd.DataFrame):
# Get mother labels from multiindex dataframe # Get mother labels from multiindex dataframe
return np.array(signal.index.to_list()) return np.array(signal.index.to_list())
def standard_filtering(
raw: pd.DataFrame,
lin: np.ndarray,
presence_high: float = 0.8,
presence_low: int = 7,
):
# Get all mothers
_, valid_indices = validate_association(
lin, np.array(raw.index.to_list()), match_column=0
)
in_lineage = raw.loc[valid_indices]
# Filter mothers by presence
present = in_lineage.loc[
in_lineage.notna().sum(axis=1) > (in_lineage.shape[1] * presence_high)
]
# Get indices
indices = np.array(present.index.to_list())
to_cast = np.stack((lin[:, :2], lin[:, [0, 2]]), axis=1)
ndin = to_cast[..., None] == indices.T[None, ...]
# use indices to fetch all daughters
valid_association = ndin.all(axis=2)[:, 0].any(axis=-1)
# Remove repeats
mothers, daughters = np.split(to_cast[valid_association], 2, axis=1)
mothers = mothers[:, 0]
daughters = daughters[:, 0]
d_m_dict = {tuple(d): m[-1] for m, d in zip(mothers, daughters)}
# assuming unique sorts
raw_mothers = raw.loc[_as_tuples(mothers)]
raw_mothers["mother_label"] = 0
raw_daughters = raw.loc[_as_tuples(daughters)]
raw_daughters["mother_label"] = d_m_dict.values()
concat = pd.concat((raw_mothers, raw_daughters)).sort_index()
concat.set_index("mother_label", append=True, inplace=True)
# Last filter to remove tracklets that are too short
removed_buds = concat.notna().sum(axis=1) <= presence_low
filt = concat.loc[~removed_buds]
# We check that no mothers are left child-less
m_d_dict = {tuple(m): [] for m in mothers}
for (trap, d), m in d_m_dict.items():
m_d_dict[(trap, m)].append(d)
for trap, daughter, mother in concat.index[removed_buds]:
idx_to_delete = m_d_dict[(trap, mother)].index(daughter)
del m_d_dict[(trap, mother)][idx_to_delete]
bud_free = []
for m, d in m_d_dict.items():
if not d:
bud_free.append(m)
final_result = filt.drop(bud_free)
# In the end, we get the mothers present for more than {presence_lineage1}% of the experiment
# and their tracklets present for more than {presence_lineage2} time-points
return final_result
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment