Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • swain-lab/aliby/aliby-mirror
  • swain-lab/aliby/alibylite
2 results
Show changes
Commits on Source (150)
Showing
with 5643 additions and 7114 deletions
This diff is collapsed.
source diff could not be displayed: it is too large. Options to address this: view the blob.
[tool.poetry] [tool.poetry]
name = "aliby" name = "alibylite"
version = "0.1.64" version = "0.0.1"
description = "Process and analyse live-cell imaging data" description = "Process and analyse live-cell imaging data"
authors = ["Alan Munoz <alan.munoz@ed.ac.uk>"] authors = ["Alan Munoz", "Peter Swain <peter.swain@ed.ac.uk>"]
packages = [ packages = [
{ include = "aliby", from="src" }, { include = "aliby", from="src" },
{ include = "extraction", from="src" }, { include = "extraction", from="src" },
...@@ -12,136 +12,39 @@ packages = [ ...@@ -12,136 +12,39 @@ packages = [
] ]
readme = "README.md" readme = "README.md"
[tool.poetry.scripts]
aliby-run = "aliby.bin.run:run"
aliby-annotate = "aliby.bin.annotate:annotate"
aliby-visualise = "aliby.bin.visualise:napari_overlay"
[build-system] [build-system]
requires = ["setuptools", "poetry-core>=1.0.0"] requires = ["setuptools", "poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api" build-backend = "poetry.core.masonry.api"
[tool.poetry.dependencies] [tool.poetry.dependencies]
python = ">=3.8, <3.11" python = ">=3.8, <3.11"
PyYAML = "^6.0"
flatten-dict = "^0.4.2"
gaussianprocessderivatives = "^0.1.5"
numpy = ">=1.21.6" numpy = ">=1.21.6"
Bottleneck = "^1.3.5" pandas = ">=2.0.3"
opencv-python = "^4.7.0.72" scikit-learn = ">=1.0.2, <1.3"
pathos = "^0.2.8" # Lambda-friendly multithreading
p-tqdm = "^1.3.3"
pandas = ">=1.3.3"
py-find-1st = "^1.1.5" # Fast indexing
scikit-learn = ">=1.0.2" # Used for an extraction metric
scipy = ">=1.7.3" scipy = ">=1.7.3"
# Pipeline + I/O
dask = "^2021.12.0"
imageio = "2.8.0" # For image-visualisation utilities
requests-toolbelt = "^0.9.1"
scikit-image = ">=0.18.1" scikit-image = ">=0.18.1"
tqdm = "^4.62.3" # progress bars
xmltodict = "^0.13.0" # read ome-tiff metadata
zarr = "^2.14.0"
GitPython = "^3.1.27"
h5py = "2.10" # File I/O
# Networking
omero-py = { version = ">=5.6.2", optional = true } # contact omero server
# Baby segmentation
aliby-baby = {version = "^0.1.17", optional=true}
# Postprocessing
[tool.poetry.group.pp.dependencies]
leidenalg = "^0.8.8"
more-itertools = "^8.12.0"
pycatch22 = "^0.4.2"
[tool.poetry.group.pp]
optional = true
[tool.poetry.group.dev] bottleneck = ">=1.3.5"
optional = true dask = ">=2021.12.0"
flatten-dict = ">=0.4.2"
h5py = ">=3.8.0"
more-itertools = ">=10.2.0"
pathos = ">=0.2.8"
pyyaml = ">=6.0.1"
py-find-1st = ">=1.1.6"
tqdm = ">=4.62.3"
xmltodict = ">=0.13.0"
zarr = ">=2.14.0"
[tool.poetry.group.dev.dependencies] tensorflow-io-gcs-filesystem = "0.34.0"
black = "^22.6.0" chardet = "^5.2.0"
mypy = "^0.930" grpcio = "1.62.2"
numpydoc = "^1.3.1" tensorflow = "2.13.1"
isort = "^5.10.1"
jupyter = "^1.0.0"
flake8 = "^4.0.1"
pyright = "^1.1.258"
pre-commit = "^2.20.0"
seaborn = "^0.11.2"
debugpy = "^1.6.3"
coverage = "^7.0.4"
jupytext = "^1.14.4"
grid-strategy = "^0.0.1"
readchar = "^4.0.3"
ipdb = "^0.13.11"
[tool.poetry.group.docs] baby-seg = ">=0.30.4"
optional = true omero-py = { version = ">=5.6.2", optional = true }
[tool.poetry.group.docs.dependencies]
Sphinx = "^5.2.0"
sphinx-rtd-theme = "^1.0.0"
sphinx-autodoc-typehints = "^1.19.2"
myst-parser = "^0.18.0"
[tool.poetry.group.test]
optional = true
[tool.poetry.group.test.dependencies]
pytest = "^6.2.5"
[tool.poetry.group.utils]
optional = true
# Dependency groups can only be used by a poetry installation, not pip
[tool.poetry.group.utils.dependencies]
napari = {version = ">=0.4.16", optional=true}
Torch = {version = "^1.13.1", optional=true}
pytorch-lightning = {version = "^1.9.3", optional=true}
torchvision = {version = "^0.14.1", optional=true}
trio = {version = "^0.22.0", optional=true}
grid-strategy = {version = "^0.0.1", optional=true}
[tool.poetry.extras]
omero = ["omero-py"]
baby = ["aliby-baby"]
[tool.black] [tool.black]
line-length = 79 line-length = 79
target-version = ['py38']
include = '\.pyi?$'
extend-exclude = '''
/(
\.git
| \.hg
| \.mypy_cache
| \.tox
| \.venv
| _build
| buck-out
| build
| dist
)/
'''
[tool.isort]
profile = "black"
multi_line_output = 3
line_length = 79
include_trailing_comma = true
[tool.pytest.ini_options]
minversion = "6.0"
addopts = "-ra -q"
testpaths = [
"tests",
]
...@@ -7,26 +7,24 @@ from pathlib import Path ...@@ -7,26 +7,24 @@ from pathlib import Path
from time import perf_counter from time import perf_counter
from typing import Union from typing import Union
from flatten_dict import flatten from flatten_dict import flatten, unflatten
from yaml import dump, safe_load from yaml import dump, safe_load
from agora.logging import timer from agora.logging_timer import timer
atomic = t.Union[int, float, str, bool] atomic = t.Union[int, float, str, bool]
class ParametersABC(ABC): class ParametersABC(ABC):
""" """
Defines parameters as attributes and allows parameters to Define parameters typically for a step in the pipeline.
be converted to either a dictionary or to yaml.
Outputs can be either a dict or yaml.
No attribute should be called "parameters"! No attribute should be called "parameters"!
""" """
def __init__(self, **kwargs): def __init__(self, **kwargs):
""" """Define parameters as attributes."""
Defines parameters as attributes
"""
assert ( assert (
"parameters" not in kwargs "parameters" not in kwargs
), "No attribute should be named parameters" ), "No attribute should be named parameters"
...@@ -35,8 +33,9 @@ class ParametersABC(ABC): ...@@ -35,8 +33,9 @@ class ParametersABC(ABC):
def to_dict(self, iterable="null") -> t.Dict: def to_dict(self, iterable="null") -> t.Dict:
""" """
Recursive function to return a nested dictionary of the Return a nested dictionary of the attributes of the class instance.
attributes of the class instance.
Use recursion.
""" """
if isinstance(iterable, dict): if isinstance(iterable, dict):
if any( if any(
...@@ -47,9 +46,11 @@ class ParametersABC(ABC): ...@@ -47,9 +46,11 @@ class ParametersABC(ABC):
] ]
): ):
return { return {
k: v.to_dict() k: (
if hasattr(v, "to_dict") v.to_dict()
else self.to_dict(v) if hasattr(v, "to_dict")
else self.to_dict(v)
)
for k, v in iterable.items() for k, v in iterable.items()
} }
else: else:
...@@ -62,7 +63,8 @@ class ParametersABC(ABC): ...@@ -62,7 +63,8 @@ class ParametersABC(ABC):
def to_yaml(self, path: Union[Path, str] = None): def to_yaml(self, path: Union[Path, str] = None):
""" """
Returns a yaml stream of the attributes of the class instance. Return a yaml stream of the attributes of the class instance.
If path is provided, the yaml stream is saved there. If path is provided, the yaml stream is saved there.
Parameters Parameters
...@@ -77,20 +79,19 @@ class ParametersABC(ABC): ...@@ -77,20 +79,19 @@ class ParametersABC(ABC):
@classmethod @classmethod
def from_dict(cls, d: dict): def from_dict(cls, d: dict):
"""Initialise from a dict of parameters."""
return cls(**d) return cls(**d)
@classmethod @classmethod
def from_yaml(cls, source: Union[Path, str]): def from_yaml(cls, source: Union[Path, str]):
""" """Initialise from a yaml filename or stdin."""
Returns instance from a yaml filename or stdin
"""
is_buffer = True is_buffer = True
try: try:
if Path(source).exists(): if Path(source).exists():
is_buffer = False is_buffer = False
except Exception as _: except Exception as e:
print(e)
assert isinstance(source, str), "Invalid source type." assert isinstance(source, str), "Invalid source type."
if is_buffer: if is_buffer:
params = safe_load(source) params = safe_load(source)
else: else:
...@@ -100,86 +101,48 @@ class ParametersABC(ABC): ...@@ -100,86 +101,48 @@ class ParametersABC(ABC):
@classmethod @classmethod
def default(cls, **kwargs): def default(cls, **kwargs):
"""Initialise allowing the default parameters to be potentially replaced."""
overriden_defaults = copy(cls._defaults) overriden_defaults = copy(cls._defaults)
for k, v in kwargs.items(): for k, v in kwargs.items():
overriden_defaults[k] = v overriden_defaults[k] = v
return cls.from_dict(overriden_defaults) return cls.from_dict(overriden_defaults)
def update(self, name: str, new_value): def update(self, name: str, new_value):
""" """Update a parameter in the nested dict of parameters."""
Update values recursively flat_params_dict = flatten(self.to_dict(), keep_empty_types=(dict,))
if name is a dictionary, replace data where existing found or add if not. names_found = [
It warns against type changes. param for param in flat_params_dict.keys() if name in param
]
If the existing structure under name is a dictionary, if len(names_found) == 1:
it looks for the first occurrence and modifies it accordingly. keys = names_found.pop()
if type(flat_params_dict[keys]) is not type(new_value):
If a leaf node that is to be changed is a collection, it adds the new elements. print("Warning:Changing type is risky.")
""" flat_params_dict[keys] = new_value
params_dict = unflatten(flat_params_dict)
assert name not in ( # replace all old values
"parameters", for key, value in params_dict.items():
"params", setattr(self, key, value)
), "Attribute can't be named params or parameters"
if name in self.__dict__:
if check_type_recursive(getattr(self, name), new_value):
print("Warnings:Type changes are risky")
if isinstance(getattr(self, name), dict):
flattened = flatten(self.to_dict())
names_found = [k for k in flattened.keys() if name in k]
found_idx = [keys.index(name) for keys in names_found]
assert len(names_found), f"{name} not found as key."
keys = None
if len(names_found) > 1:
for level in zip(found_idx, names_found):
if level == min(found_idx):
keys = level
print(
f"Warning: {name} was found in multiple keys. Selected {keys}"
)
break
else:
keys = names_found.pop()
if keys:
current_val = flattened.get(keys, None)
# if isinstance(current_val, t.Collection):
elif isinstance(getattr(self, name), t.Collection):
add_to_collection(getattr(self, name), new_value)
elif isinstance(getattr(self, name), set):
pass # TODO implement
new_d = getattr(self, name)
new_d.update(new_value)
setattr(self, name, new_d)
else: else:
setattr(self, name, new_value) print(f"Warning:{name} was neither recognised nor updated.")
def add_to_collection( def add_to_collection(
collection: t.Collection, value: t.Union[atomic, t.Collection] collection: t.Collection, element: t.Union[atomic, t.Collection]
): ):
# Adds element(s) in place. """Add elements to a collection, a list or set, in place."""
if not isinstance(value, t.Collection): if not isinstance(element, t.Collection):
value = [value] element = [element]
if isinstance(collection, list): if isinstance(collection, list):
collection += value collection += element
elif isinstance(collection, set): elif isinstance(collection, set):
collection.update(value) collection.update(element)
class ProcessABC(ABC): class ProcessABC(ABC):
""" """
Base class for processes. Base class for processes.
Defines parameters as attributes and requires run method to be defined.
Define parameters as attributes and requires a run method.
""" """
def __init__(self, parameters): def __init__(self, parameters):
...@@ -190,8 +153,8 @@ class ProcessABC(ABC): ...@@ -190,8 +153,8 @@ class ProcessABC(ABC):
""" """
self._parameters = parameters self._parameters = parameters
# convert parameters to dictionary # convert parameters to dictionary
# and then define each parameter as an attribute
for k, v in parameters.to_dict().items(): for k, v in parameters.to_dict().items():
# define each parameter as an attribute
setattr(self, k, v) setattr(self, k, v)
@property @property
...@@ -202,32 +165,12 @@ class ProcessABC(ABC): ...@@ -202,32 +165,12 @@ class ProcessABC(ABC):
def run(self): def run(self):
pass pass
def _log(self, message: str, level: str = "warning"): def log(self, message: str, level: str = "warning"):
# Log messages in the corresponding level """Log messages at the corresponding level."""
logger = logging.getLogger("aliby") logger = logging.getLogger("aliby")
getattr(logger, level)(f"{self.__class__.__name__}: {message}") getattr(logger, level)(f"{self.__class__.__name__}: {message}")
def check_type_recursive(val1, val2):
same_types = True
if not isinstance(val1, type(val2)) and not all(
type(x) in (Path, str) for x in (val1, val2) # Ignore str->path
):
return False
if not isinstance(val1, t.Iterable) and not isinstance(val2, t.Iterable):
return isinstance(val1, type(val2))
elif isinstance(val1, (tuple, list)) and isinstance(val2, (tuple, list)):
return bool(
sum([check_type_recursive(v1, v2) for v1, v2 in zip(val1, val2)])
)
elif isinstance(val1, dict) and isinstance(val2, dict):
if not len(val1) or not len(val2):
return False
for k in val2.keys():
same_types = same_types and check_type_recursive(val1[k], val2[k])
return same_types
class StepABC(ProcessABC): class StepABC(ProcessABC):
""" """
Base class that expands on ProcessABC to include tools used by Aliby steps. Base class that expands on ProcessABC to include tools used by Aliby steps.
...@@ -243,11 +186,9 @@ class StepABC(ProcessABC): ...@@ -243,11 +186,9 @@ class StepABC(ProcessABC):
@timer @timer
def run_tp(self, tp: int, **kwargs): def run_tp(self, tp: int, **kwargs):
""" """Time and log the timing of a step."""
Time and log the timing of a step.
"""
return self._run_tp(tp, **kwargs) return self._run_tp(tp, **kwargs)
def run(self): def run(self):
# Replace run with run_tp # Replace run with run_tp
raise Warning("Steps use run_tp instead of run") raise Warning("Steps use run_tp instead of run.")
""" """
Tools to interact with h5 files and handle data consistently. Tools to interact with h5 files and handle data consistently.
""" """
import collections import collections
import logging import logging
import typing as t import typing as t
...@@ -23,20 +24,19 @@ class BridgeH5: ...@@ -23,20 +24,19 @@ class BridgeH5:
"""Initialise with the name of the h5 file.""" """Initialise with the name of the h5 file."""
self.filename = filename self.filename = filename
if flag is not None: if flag is not None:
self._hdf = h5py.File(filename, flag) self.hdf = h5py.File(filename, flag)
self._filecheck assert (
"cell_info" in self.hdf
), "Invalid file. No 'cell_info' found."
def _log(self, message: str, level: str = "warn"): def log(self, message: str, level: str = "warn"):
# Log messages in the corresponding level # Log messages in the corresponding level
logger = logging.getLogger("aliby") logger = logging.getLogger("aliby")
getattr(logger, level)(f"{self.__class__.__name__}: {message}") getattr(logger, level)(f"{self.__class__.__name__}: {message}")
def _filecheck(self):
assert "cell_info" in self._hdf, "Invalid file. No 'cell_info' found."
def close(self): def close(self):
"""Close the h5 file.""" """Close the h5 file."""
self._hdf.close() self.hdf.close()
@property @property
def meta_h5(self) -> t.Dict[str, t.Any]: def meta_h5(self) -> t.Dict[str, t.Any]:
...@@ -83,7 +83,7 @@ class BridgeH5: ...@@ -83,7 +83,7 @@ class BridgeH5:
def get_npairs_over_time(self, nstepsback=2): def get_npairs_over_time(self, nstepsback=2):
tree = self.cell_tree tree = self.cell_tree
npairs = [] npairs = []
for tp in self._hdf["cell_info"]["processed_timepoints"][()]: for tp in self.hdf["cell_info"]["processed_timepoints"][()]:
tmp_tree = { tmp_tree = {
k: {k2: v2 for k2, v2 in v.items() if k2 <= tp} k: {k2: v2 for k2, v2 in v.items() if k2 <= tp}
for k, v in tree.items() for k, v in tree.items()
...@@ -115,7 +115,7 @@ class BridgeH5: ...@@ -115,7 +115,7 @@ class BridgeH5:
---------- ----------
Nested dictionary where keys (or branches) are the upper levels and the leaves are the last element of :fields:. Nested dictionary where keys (or branches) are the upper levels and the leaves are the last element of :fields:.
""" """
zipped_info = (*zip(*[self._hdf["cell_info"][f][()] for f in fields]),) zipped_info = (*zip(*[self.hdf["cell_info"][f][()] for f in fields]),)
return recursive_groupsort(zipped_info) return recursive_groupsort(zipped_info)
......
This diff is collapsed.
...@@ -6,17 +6,19 @@ import typing as t ...@@ -6,17 +6,19 @@ import typing as t
from functools import wraps from functools import wraps
def _first_arg_str_to_df( def _first_arg_str_to_raw_df(
fn: t.Callable, fn: t.Callable,
): ):
"""Enable Signal-like classes to convert strings to data sets.""" """Enable Signal-like classes to convert strings to data sets."""
@wraps(fn) @wraps(fn)
def format_input(*args, **kwargs): def format_input(*args, **kwargs):
cls = args[0] cls = args[0]
data = args[1] data = args[1]
if isinstance(data, str): if isinstance(data, str):
# get data from h5 file # get data from h5 file using Signal's get_raw
data = cls.get_raw(data) data = cls.get_raw(data)
# replace path in the undecorated function with data # replace path in the undecorated function with data
return fn(cls, data, *args[2:], **kwargs) return fn(cls, data, *args[2:], **kwargs)
return format_input return format_input
""" """
Anthology of interfaces fordispatch_metadata_parse different parsers and lack of them. Aliby decides on using different metadata parsers based on two elements:
1. The parameter given by PipelineParameters (either True/False or a string
ALIBY decides on using different metadata parsers based on two elements: pointing to the metadata file)
2. The available files in the root folder where images are found (either
1. The parameter given by PipelineParameters (Either True/False, or a string pointing to the metadata file) remote or locally).
2. The available files in the root folder where images are found (remote or locally)
If parameters is a string pointing to a metadata file, Aliby picks a parser
If parameters is a string pointing to a metadata file, ALIBY picks a parser based on the file format. based on the file format.
If parameters is True (as a boolean), ALIBY searches for any available file and uses the first valid one. If parameters is True, Aliby searches for any available file and uses the
If there are no metadata files, ALIBY requires indicating indices for tiler, segmentation and extraction. first valid one.
If there are no metadata files, Aliby requires indices in the tiff file names
for tiler, segmentation, and extraction.
WARNING: grammars depend on the directory structure of a local log-file_parser
repository.
""" """
import glob import glob
import logging import logging
import numpy as np
import os import os
import typing as t import typing as t
from datetime import datetime from datetime import datetime
...@@ -27,28 +32,32 @@ from logfile_parser.swainlab_parser import parse_from_swainlab_grammar ...@@ -27,28 +32,32 @@ from logfile_parser.swainlab_parser import parse_from_swainlab_grammar
class MetaData: class MetaData:
"""Small metadata Process that loads log.""" """Metadata process that loads and parses log files."""
def __init__(self, log_dir, store): def __init__(self, log_dir, store):
"""Initialise with log-file directory and h5 location to write."""
self.log_dir = log_dir self.log_dir = log_dir
self.store = store self.store = store
self.metadata_writer = Writer(self.store) self.metadata_writer = Writer(self.store)
def __getitem__(self, item): def __getitem__(self, item):
"""Load log and access item in resulting meta data dictionary."""
return self.load_logs()[item] return self.load_logs()[item]
def load_logs(self): def load_logs(self):
# parsed_flattened = parse_logfiles(self.log_dir) """Load log using a hierarchy of parsers."""
parsed_flattened = dispatch_metadata_parser(self.log_dir) parsed_flattened = parse_metadata(self.log_dir)
return parsed_flattened return parsed_flattened
def run(self, overwrite=False): def run(self, overwrite=False):
"""Load and parse logs and write to h5 file."""
metadata_dict = self.load_logs() metadata_dict = self.load_logs()
self.metadata_writer.write( self.metadata_writer.write(
path="/", meta=metadata_dict, overwrite=overwrite path="/", meta=metadata_dict, overwrite=overwrite
) )
def add_field(self, field_name, field_value, **kwargs): def add_field(self, field_name, field_value, **kwargs):
"""Write a field and its values to the h5 file."""
self.metadata_writer.write( self.metadata_writer.write(
path="/", path="/",
meta={field_name: field_value}, meta={field_name: field_value},
...@@ -56,207 +65,187 @@ class MetaData: ...@@ -56,207 +65,187 @@ class MetaData:
) )
def add_fields(self, fields_values: dict, **kwargs): def add_fields(self, fields_values: dict, **kwargs):
"""Write a dict of fields and values to the h5 file."""
for field, value in fields_values.items(): for field, value in fields_values.items():
self.add_field(field, value) self.add_field(field, value)
# Paradigm: able to do something with all datatypes present in log files, def parse_metadata(filedir: t.Union[str, Path]):
# then pare down on what specific information is really useful later.
# Needed because HDF5 attributes do not support dictionaries
def flatten_dict(nested_dict, separator="/"):
"""
Flattens nested dictionary. If empty return as-is.
""" """
flattened = {} Dispatch different metadata parsers that convert logfiles into a dictionary.
if nested_dict:
df = pd.json_normalize(nested_dict, sep=separator)
flattened = df.to_dict(orient="records")[0] or {}
return flattened
Currently only contains the swainlab log parsers.
# Needed because HDF5 attributes do not support datetime objects Parameters
# Takes care of time zones & daylight saving --------
def datetime_to_timestamp(time, locale="Europe/London"): filepath: str
""" File containing metadata or folder containing naming conventions.
Convert datetime object to UNIX timestamp
""" """
return timezone(locale).localize(time).timestamp() filedir = Path(filedir)
if filedir.is_file() or str(filedir).endswith(".zarr"):
# log file is in parent directory
filedir = filedir.parent
filepath = find_file(filedir, "*.log")
if filepath:
# new log files ending in .log
raw_parse = parse_from_swainlab_grammar(filepath)
minimal_meta = get_minimal_meta_swainlab(raw_parse)
else:
# legacy log files ending in .txt
legacy_parse = parse_legacy_logfiles(filedir)
minimal_meta = (
get_meta_from_legacy(legacy_parse) if legacy_parse else {}
)
if minimal_meta is None:
raise Exception("No metadata found.")
else:
return minimal_meta
def find_file(root_dir, regex): def find_file(root_dir, regex):
"""Find files in a directory using regex."""
# ignore aliby.log files
file = [ file = [
f f
for f in glob.glob(os.path.join(str(root_dir), regex)) for f in glob.glob(os.path.join(str(root_dir), regex))
if Path(f).name != "aliby.log" # Skip filename reserved for aliby if Path(f).name != "aliby.log"
] ]
if len(file) > 1:
print(
"Warning:Metadata: More than one logfile found. Defaulting to first option."
)
file = [sorted(file)[0]]
if len(file) == 0: if len(file) == 0:
logging.getLogger("aliby").log( return None
logging.WARNING, "Metadata: No valid swainlab .log found." elif len(file) > 1:
print(
"Warning:Metadata: More than one log file found."
" Defaulting to first option."
) )
return sorted(file)[0]
else: else:
return file[0] return file[0]
return None
# TODO: re-write this as a class if appropriate def get_minimal_meta_swainlab(parsed_metadata: dict):
# WARNING: grammars depend on the directory structure of a locally installed """
# logfile_parser repo Extract channels from parsed metadata.
def parse_logfiles(
Parameters
--------
parsed_metadata: dict[str, str or int or DataFrame or Dict]
default['general', 'image_config', 'device_properties',
'group_position', 'group_time', 'group_config']
Returns
--------
Dict with channels metadata
"""
channels_dict = find_channels_by_position(parsed_metadata["group_config"])
channels = parsed_metadata["image_config"]["Image config"].values.tolist()
parsed_ntps = parsed_metadata["group_time"]["frames"]
if type(parsed_ntps) is int:
ntps = parsed_ntps
else:
ntps = parsed_ntps.max()
parsed_tinterval = parsed_metadata["group_time"]["interval"]
if type(parsed_tinterval) is int:
timeinterval = parsed_tinterval
else:
timeinterval = parsed_tinterval.min()
minimal_meta = {
"channels_by_group": channels_dict,
"channels": channels,
"time_settings/ntimepoints": int(ntps),
"time_settings/timeinterval": int(timeinterval),
}
return minimal_meta
def find_channels_by_position(meta):
"""
Parse metadata to find the imaging channels for each group.
Return a dict with groups as keys and channels as values.
"""
if isinstance(meta, pd.DataFrame):
imaging_channels = list(meta.columns)
channels_dict = {group: [] for group in meta.index}
for group in channels_dict:
for channel in imaging_channels:
if meta.loc[group, channel] is not None:
channels_dict[group].append(channel)
elif isinstance(meta, dict) and "positions/posname" in meta:
channels_dict = {
position_name: [] for position_name in meta["positions/posname"]
}
imaging_channels = meta["channels"]
for i, position_name in enumerate(meta["positions/posname"]):
for imaging_channel in imaging_channels:
if (
"positions/" + imaging_channel in meta
and meta["positions/" + imaging_channel][i]
):
channels_dict[position_name].append(imaging_channel)
else:
channels_dict = {}
return channels_dict
### legacy code for acq and log files ###
def parse_legacy_logfiles(
root_dir, root_dir,
acq_grammar="multiDGUI_acq_format.json", acq_grammar="multiDGUI_acq_format.json",
log_grammar="multiDGUI_log_format.json", log_grammar="multiDGUI_log_format.json",
): ):
""" """
Parse acq and log files depending on the grammar specified, then merge into Parse acq and log files using the grammar specified.
single dict.
Merge results into a single dict.
""" """
# Both acq and log files contain useful information.
# ACQ_FILE = 'flavin_htb2_glucose_long_ramp_DelftAcq.txt'
# LOG_FILE = 'flavin_htb2_glucose_long_ramp_Delftlog.txt'
log_parser = Parser(log_grammar) log_parser = Parser(log_grammar)
acq_parser = Parser(acq_grammar) acq_parser = Parser(acq_grammar)
log_file = find_file(root_dir, "*log.txt") log_file = find_file(root_dir, "*log.txt")
acq_file = find_file(root_dir, "*[Aa]cq.txt") acq_file = find_file(root_dir, "*[Aa]cq.txt")
# parse into a single dict
parsed = {} parsed = {}
if log_file and acq_file: if log_file and acq_file:
with open(log_file, "r") as f: with open(log_file, "r") as f:
log_parsed = log_parser.parse(f) log_parsed = log_parser.parse(f)
with open(acq_file, "r") as f: with open(acq_file, "r") as f:
acq_parsed = acq_parser.parse(f) acq_parsed = acq_parser.parse(f)
parsed = {**acq_parsed, **log_parsed} parsed = {**acq_parsed, **log_parsed}
# convert data to having time stamps
for key, value in parsed.items(): for key, value in parsed.items():
if isinstance(value, datetime): if isinstance(value, datetime):
parsed[key] = datetime_to_timestamp(value) parsed[key] = datetime_to_timestamp(value)
# flatten dict
parsed_flattened = flatten_dict(parsed) parsed_flattened = flatten_dict(parsed)
for k, v in parsed_flattened.items(): for k, v in parsed_flattened.items():
if isinstance(v, list): if isinstance(v, list):
# replace None with 0
parsed_flattened[k] = [0 if el is None else el for el in v] parsed_flattened[k] = [0 if el is None else el for el in v]
return parsed_flattened return parsed_flattened
def get_meta_swainlab(parsed_metadata: dict):
"""
Convert raw parsing of Swainlab logfile to the metadata interface.
Input:
--------
parsed_metadata: Dict[str, str or int or DataFrame or Dict]
default['general', 'image_config', 'device_properties', 'group_position', 'group_time', 'group_config']
Returns:
--------
Dictionary with metadata following the standard
"""
channels = parsed_metadata["image_config"]["Image config"].values.tolist()
# nframes = int(parsed_metadata["group_time"]["frames"].max())
# return {"channels": channels, "nframes": nframes}
return {"channels": channels}
def get_meta_from_legacy(parsed_metadata: dict): def get_meta_from_legacy(parsed_metadata: dict):
"""Fix naming convention for channels in legacy .txt log files."""
result = parsed_metadata result = parsed_metadata
result["channels"] = result["channels/channel"] result["channels"] = result["channels/channel"]
return result return result
def parse_swainlab_metadata(filedir: t.Union[str, Path]): def flatten_dict(nested_dict, separator="/"):
"""
Dispatcher function that determines which parser to use based on the file ending.
Input:
--------
filedir: Directory where the logfile is located.
Returns:
--------
Dictionary with minimal metadata
"""
filedir = Path(filedir)
filepath = find_file(filedir, "*.log")
if filepath:
raw_parse = parse_from_swainlab_grammar(filepath)
minimal_meta = get_meta_swainlab(raw_parse)
else:
if filedir.is_file() or str(filedir).endswith(".zarr"):
filedir = filedir.parent
legacy_parse = parse_logfiles(filedir)
minimal_meta = (
get_meta_from_legacy(legacy_parse) if legacy_parse else {}
)
return minimal_meta
def dispatch_metadata_parser(filepath: t.Union[str, Path]):
""" """
Function to dispatch different metadata parsers that convert logfiles into a Flatten nested dictionary because h5 attributes cannot be dicts.
basic metadata dictionary. Currently only contains the swainlab log parsers.
Input: If empty return as-is.
--------
filepath: str existing file containing metadata, or folder containing naming conventions
""" """
parsed_meta = parse_swainlab_metadata(filepath) flattened = {}
if nested_dict:
if parsed_meta is None: df = pd.json_normalize(nested_dict, sep=separator)
parsed_meta = dir_to_meta flattened = df.to_dict(orient="records")[0] or {}
return flattened
return parsed_meta
def dir_to_meta(path: Path, suffix="tiff"):
filenames = list(path.glob(f"*.{suffix}"))
try:
# Deduct order from filenames
dimorder = "".join(
map(lambda x: x[0], filenames[0].stem.split("_")[1:])
)
dim_value = list(
map(
lambda f: filename_to_dict_indices(f.stem),
path.glob("*.tiff"),
)
)
maxes = [max(map(lambda x: x[dim], dim_value)) for dim in dimorder]
mins = [min(map(lambda x: x[dim], dim_value)) for dim in dimorder]
_dim_shapes = [
max_val - min_val + 1 for max_val, min_val in zip(maxes, mins)
]
meta = {
"size_" + dim: shape for dim, shape in zip(dimorder, _dim_shapes)
}
except Exception as e:
print(
f"Warning:Metadata: Cannot extract dimensions from filenames. Empty meta set {e}"
)
meta = {}
return meta
def filename_to_dict_indices(stem: str): def datetime_to_timestamp(time, locale="Europe/London"):
return { """Convert datetime object to UNIX timestamp."""
dim_number[0]: int(dim_number[1:]) # h5 attributes do not support datetime objects
for dim_number in stem.split("_")[1:] return timezone(locale).localize(time).timestamp()
}
...@@ -5,7 +5,7 @@ import h5py ...@@ -5,7 +5,7 @@ import h5py
import numpy as np import numpy as np
from agora.io.bridge import groupsort from agora.io.bridge import groupsort
from agora.io.writer import load_attributes from agora.io.writer import load_meta
class DynamicReader: class DynamicReader:
...@@ -13,7 +13,7 @@ class DynamicReader: ...@@ -13,7 +13,7 @@ class DynamicReader:
def __init__(self, file: str): def __init__(self, file: str):
self.file = file self.file = file
self.metadata = load_attributes(file) self.metadata = load_meta(file)
class StateReader(DynamicReader): class StateReader(DynamicReader):
......
This diff is collapsed.
...@@ -15,9 +15,10 @@ from agora.io.bridge import BridgeH5 ...@@ -15,9 +15,10 @@ from agora.io.bridge import BridgeH5
#################### Dynamic version ################################## #################### Dynamic version ##################################
def load_attributes(file: str, group="/"): def load_meta(file: str, group="/"):
""" """
Load the metadata from an h5 file and convert to a dictionary, including the "parameters" field which is stored as YAML. Load the metadata from an h5 file and convert to a dictionary, including
the "parameters" field which is stored as YAML.
Parameters Parameters
---------- ----------
...@@ -26,8 +27,9 @@ def load_attributes(file: str, group="/"): ...@@ -26,8 +27,9 @@ def load_attributes(file: str, group="/"):
group: str, optional group: str, optional
The group in the h5 file from which to read the data The group in the h5 file from which to read the data
""" """
# load the metadata, stored as attributes, from the h5 file and return as a dictionary # load the metadata, stored as attributes, from the h5 file
with h5py.File(file, "r") as f: with h5py.File(file, "r") as f:
# return as a dict
meta = dict(f[group].attrs.items()) meta = dict(f[group].attrs.items())
if "parameters" in meta: if "parameters" in meta:
# convert from yaml format into dict # convert from yaml format into dict
...@@ -51,9 +53,9 @@ class DynamicWriter: ...@@ -51,9 +53,9 @@ class DynamicWriter:
self.file = file self.file = file
# the metadata is stored as attributes in the h5 file # the metadata is stored as attributes in the h5 file
if Path(file).exists(): if Path(file).exists():
self.metadata = load_attributes(file) self.metadata = load_meta(file)
def _log(self, message: str, level: str = "warn"): def log(self, message: str, level: str = "warn"):
# Log messages in the corresponding level # Log messages in the corresponding level
logger = logging.getLogger("aliby") logger = logging.getLogger("aliby")
getattr(logger, level)(f"{self.__class__.__name__}: {message}") getattr(logger, level)(f"{self.__class__.__name__}: {message}")
...@@ -102,9 +104,11 @@ class DynamicWriter: ...@@ -102,9 +104,11 @@ class DynamicWriter:
maxshape=max_shape, maxshape=max_shape,
dtype=dtype, dtype=dtype,
compression=self.compression, compression=self.compression,
compression_opts=self.compression_opts compression_opts=(
if self.compression is not None self.compression_opts
else None, if self.compression is not None
else None
),
) )
# write all data, signified by the empty tuple # write all data, signified by the empty tuple
hgroup[key][()] = data hgroup[key][()] = data
...@@ -172,7 +176,7 @@ class DynamicWriter: ...@@ -172,7 +176,7 @@ class DynamicWriter:
# append or create new dataset # append or create new dataset
self._append(value, key, hgroup) self._append(value, key, hgroup)
except Exception as e: except Exception as e:
self._log( self.log(
f"{key}:{value} could not be written: {e}", "error" f"{key}:{value} could not be written: {e}", "error"
) )
# write metadata # write metadata
...@@ -448,7 +452,6 @@ class Writer(BridgeH5): ...@@ -448,7 +452,6 @@ class Writer(BridgeH5):
""" """
self.id_cache = {} self.id_cache = {}
with h5py.File(self.filename, "a") as f: with h5py.File(self.filename, "a") as f:
# Alan, haven't we already opened the h5 file through BridgeH5's init?
if overwrite == "overwrite": # TODO refactor overwriting if overwrite == "overwrite": # TODO refactor overwriting
if path in f: if path in f:
del f[path] del f[path]
...@@ -490,7 +493,12 @@ class Writer(BridgeH5): ...@@ -490,7 +493,12 @@ class Writer(BridgeH5):
def write_meta(self, f: h5py.File, path: str, attr: str, data: Iterable): def write_meta(self, f: h5py.File, path: str, attr: str, data: Iterable):
"""Write metadata to an open h5 file.""" """Write metadata to an open h5 file."""
obj = f.require_group(path) obj = f.require_group(path)
obj.attrs[attr] = data if type(data) is dict:
# necessary for channels_dict from find_channels_by_position
for key, vlist in data.items():
obj.attrs[attr + key] = vlist
else:
obj.attrs[attr] = data
@staticmethod @staticmethod
def write_arraylike(f: h5py.File, path: str, data: Iterable, **kwargs): def write_arraylike(f: h5py.File, path: str, data: Iterable, **kwargs):
...@@ -535,7 +543,6 @@ class Writer(BridgeH5): ...@@ -535,7 +543,6 @@ class Writer(BridgeH5):
path + "values" if path.endswith("/") else path + "/values" path + "values" if path.endswith("/") else path + "/values"
) )
if path not in f: if path not in f:
# create dataset and write data # create dataset and write data
max_ncells = 2e5 max_ncells = 2e5
max_tps = 1e3 max_tps = 1e3
...@@ -581,7 +588,6 @@ class Writer(BridgeH5): ...@@ -581,7 +588,6 @@ class Writer(BridgeH5):
else: else:
f[path].attrs["columns"] = df.columns.tolist() f[path].attrs["columns"] = df.columns.tolist()
else: else:
# path exists # path exists
dset = f[values_path] dset = f[values_path]
...@@ -589,7 +595,7 @@ class Writer(BridgeH5): ...@@ -589,7 +595,7 @@ class Writer(BridgeH5):
new_tps = set(df.columns) new_tps = set(df.columns)
if path + "/timepoint" in f: if path + "/timepoint" in f:
new_tps = new_tps.difference(f[path + "/timepoint"][()]) new_tps = new_tps.difference(f[path + "/timepoint"][()])
df = df[new_tps] df = df[list(new_tps)]
if ( if (
not hasattr(self, "id_cache") not hasattr(self, "id_cache")
...@@ -618,9 +624,9 @@ class Writer(BridgeH5): ...@@ -618,9 +624,9 @@ class Writer(BridgeH5):
# sort indices for h5 indexing # sort indices for h5 indexing
incremental_existing = np.argsort(found_indices) incremental_existing = np.argsort(found_indices)
self.id_cache[df.index.nlevels][ self.id_cache[df.index.nlevels]["found_indices"] = (
"found_indices" found_indices[incremental_existing]
] = found_indices[incremental_existing] )
self.id_cache[df.index.nlevels]["found_multi"] = found_multis[ self.id_cache[df.index.nlevels]["found_multi"] = found_multis[
incremental_existing incremental_existing
] ]
......
#!/usr/bin/env jupyter
"""
Add general logging functions and decorators
"""
import logging import logging
from time import perf_counter from time import perf_counter
def timer(func): def timer(func):
# Log duration of a function into aliby logfile """Log duration of a function into the aliby log file."""
def wrap_func(*args, **kwargs): def wrap_func(*args, **kwargs):
t1 = perf_counter() t1 = perf_counter()
result = func(*args, **kwargs) result = func(*args, **kwargs)
......
#!/usr/bin/env jupyter
#!/usr/bin/env jupyter
"""
Convert some types to others
"""
def _str_to_int(x: str or None):
"""
Cast string as int if possible. If Nonetype return None.
"""
if x is not None:
try:
return int(x)
except:
return x
#!/usr/bin/env jupyter
"""
Utilities based on association are used to efficiently acquire indices of tracklets with some kind of relationship.
This can be:
- Cells that are to be merged
- Cells that have a linear relationship
"""
import numpy as np import numpy as np
import typing as t import pandas as pd
# data type to link together trap and cell ids
i_dtype = {"names": ["trap_id", "cell_id"], "formats": [np.int64, np.int64]}
def validate_association(
association: np.ndarray,
indices: np.ndarray,
match_column: t.Optional[int] = None,
) -> t.Tuple[np.ndarray, np.ndarray]:
"""Select rows from the first array that are present in both.
We use casting for fast multiindexing, generalising for lineage dynamics
Parameters
----------
association : np.ndarray
2-D array where columns are (trap, mother, daughter) or 3-D array where
dimensions are (X,trap,2), containing tuples ((trap,mother), (trap,daughter))
across the 3rd dimension.
indices : np.ndarray
2-D array where each column is a different level. This should not include mother_label.
match_column: int
int indicating a specific column is required to match (i.e.
0-1 for target-source when trying to merge tracklets or mother-bud for lineage)
must be present in indices. If it is false one match suffices for the resultant indices
vector to be True.
Returns
-------
np.ndarray
1-D boolean array indicating valid merge events.
np.ndarray
1-D boolean array indicating indices with an association relationship.
Examples
--------
>>> import numpy as np
>>> from agora.utils.indexing import validate_association
>>> merges = np.array(range(12)).reshape(3,2,2)
>>> indices = np.array(range(6)).reshape(3,2)
>>> print(merges, indices)
>>> print(merges); print(indices)
[[[ 0 1]
[ 2 3]]
[[ 4 5]
[ 6 7]]
[[ 8 9]
[10 11]]]
[[0 1]
[2 3]
[4 5]]
>>> valid_associations, valid_indices = validate_association(merges, indices)
>>> print(valid_associations, valid_indices)
[ True False False] [ True True False]
def validate_lineage(
lineage: np.ndarray,
indices: np.ndarray,
how: str = "families",
):
""" """
if association.ndim == 2: Identify mother-bud pairs both in lineage and a Signal's indices.
# Reshape into 3-D array for broadcasting if neded
# association = np.stack( We expect the lineage information to be unique: a bud should not have
# (association[:, [0, 1]], association[:, [0, 2]]), axis=1 two mothers.
# )
association = _assoc_indices_to_3d(association) Lineage is returned with buds assigned only to their first mother if they
have multiple.
# Compare existing association with available indices
# Swap trap and label axes for the association array to correctly cast Parameters
valid_ndassociation = association[..., None] == indices.T[None, ...] ----------
lineage : np.ndarray
# Broadcasting is confusing (but efficient): 2D array of lineage associations where columns are
# First we check the dimension across trap and cell id, to ensure both match (trap, mother, daughter)
valid_cell_ids = valid_ndassociation.all(axis=2) or
a 3D array, which is an array of 2 X 2 arrays comprising
if match_column is None: [[trap_id, mother_label], [trap_id, daughter_label]].
# Then we check the merge tuples to check which cases have both target and source indices : np.ndarray
valid_association = valid_cell_ids.any(axis=2).all(axis=1) A 2D array of cell indices from a Signal, (trap_id, cell_label).
This array should not include mother_label.
# Finally we check the dimension that crosses all indices, to ensure the pair how: str
# is present in a valid merge event. If "mothers", matches indicate mothers from mother-bud pairs;
valid_indices = ( If "daughters", matches indicate daughters from mother-bud pairs;
valid_ndassociation[valid_association].all(axis=2).any(axis=(0, 1)) If "families", matches indicate mothers and daughters in mother-bud pairs.
)
else: # We fetch specific indices if we aim for the ones with one present Returns
valid_indices = valid_cell_ids[:, match_column].any(axis=0) -------
# Valid association then becomes a boolean array, true means that there is a valid_lineage: boolean np.ndarray
# match (match_column) between that cell and the index 1D array indicating matched elements in lineage.
valid_association = ( valid_indices: boolean np.ndarray
valid_cell_ids[:, match_column] & valid_indices 1D array indicating matched elements in indices.
).any(axis=1) lineage: np.ndarray
Any bud already having a mother that is assigned to another has that
second assignment discarded.
Examples
--------
>>> import numpy as np
>>> from agora.utils.indexing import validate_lineage
>>> lineage = np.array([ [[0, 1], [0, 3]], [[0, 1], [0, 4]], [[0, 1], [0, 6]], [[0, 4], [0, 7]] ])
>>> indices = np.array([ [0, 1], [0, 2], [0, 3]])
>>> valid_lineage, valid_indices, lineage = validate_lineage(lineage, indices)
>>> print(valid_lineage)
array([ True, False, False, False])
>>> print(valid_indices)
array([ True, False, True])
and
>>> lineage = np.array([[[0,3], [0,1]], [[0,2], [0,4]]])
>>> indices = np.array([[0,1], [0,2], [0,3]])
>>> valid_lineage, valid_indices, lineage = validate_lineage(lineage, indices)
>>> print(valid_lineage)
array([ True, False])
>>> print(valid_indices)
array([ True, False, True])
"""
if lineage.ndim == 2:
# [trap, mother, daughter] becomes [[trap, mother], [trap, daughter]]
lineage = assoc_indices_to_3d(lineage)
invert_lineage = True
if how == "mothers":
c_index = 0
elif how == "daughters":
c_index = 1
# if buds have two mothers, pick the first one
lineage = lineage[
~pd.DataFrame(lineage[:, 1, :]).duplicated().values, :, :
]
# find valid lineage
valid_lineages = index_isin(lineage, indices)
if how == "families":
# both mother and bud must be in indices
valid_lineage = valid_lineages.all(axis=1)
else:
valid_lineage = valid_lineages[:, c_index, :]
flat_valid_lineage = valid_lineage.flatten()
# find valid indices
selected_lineages = lineage[flat_valid_lineage, ...]
if how == "families":
# select only pairs of mother and bud indices
valid_indices = index_isin(indices, selected_lineages)
else:
valid_indices = index_isin(indices, selected_lineages[:, c_index, :])
flat_valid_indices = valid_indices.flatten()
# put the corrected lineage in the right format
if invert_lineage:
lineage = assoc_indices_to_2d(lineage)
return flat_valid_lineage, flat_valid_indices, lineage
def index_isin(x: np.ndarray, y: np.ndarray) -> np.ndarray:
"""
Find those elements of x that are in y.
return valid_association, valid_indices Both arrays must be arrays of integer indices,
such as (trap_id, cell_id).
"""
x = np.ascontiguousarray(x, dtype=np.int64)
y = np.ascontiguousarray(y, dtype=np.int64)
xv = x.view(i_dtype)
inboth = np.intersect1d(xv, y.view(i_dtype))
x_bool = np.isin(xv, inboth)
return x_bool
def _assoc_indices_to_3d(ndarray: np.ndarray): def assoc_indices_to_3d(ndarray: np.ndarray):
""" """
Convert the last column to a new row while repeating all previous indices. Convert the last column to a new row and repeat first column's values.
This is useful when converting a signal multiindex before comparing association. For example: [trap, mother, daughter] becomes
[[trap, mother], [trap, daughter]].
Assumes the input array has shape (N,3) Assumes the input array has shape (N,3).
""" """
result = ndarray result = ndarray
if len(ndarray) and ndarray.ndim > 1: if len(ndarray) and ndarray.ndim > 1:
if ndarray.shape[1] == 3: # Faster indexing for single positions # faster indexing for single positions
if ndarray.shape[1] == 3:
result = np.transpose( result = np.transpose(
np.hstack((ndarray[:, [0]], ndarray)).reshape(-1, 2, 2), np.hstack((ndarray[:, [0]], ndarray)).reshape(-1, 2, 2),
axes=[0, 2, 1], axes=[0, 2, 1],
) )
else: # 20% slower but more general indexing else:
# 20% slower but more general indexing
columns = np.arange(ndarray.shape[1]) columns = np.arange(ndarray.shape[1])
result = np.stack( result = np.stack(
( (
ndarray[:, np.delete(columns, -1)], ndarray[:, np.delete(columns, -1)],
...@@ -132,21 +150,11 @@ def _assoc_indices_to_3d(ndarray: np.ndarray): ...@@ -132,21 +150,11 @@ def _assoc_indices_to_3d(ndarray: np.ndarray):
return result return result
def _3d_index_to_2d(array: np.ndarray): def assoc_indices_to_2d(array: np.ndarray):
""" """Convert indices to 2d."""
Opposite to _assoc_indices_to_3d.
"""
result = array result = array
if len(array): if len(array):
result = np.concatenate( result = np.concatenate(
(array[:, 0, :], array[:, 1, 1, np.newaxis]), axis=1 (array[:, 0, :], array[:, 1, 1, np.newaxis]), axis=1
) )
return result return result
def compare_indices(x: np.ndarray, y: np.ndarray) -> np.ndarray:
"""
Fetch two 2-D indices and return a binary 2-D matrix
where a True value links two cells where all cells are the same
"""
return (x[..., None] == y.T[None, ...]).all(axis=1)
...@@ -6,7 +6,6 @@ import numpy as np ...@@ -6,7 +6,6 @@ import numpy as np
import pandas as pd import pandas as pd
from sklearn.cluster import KMeans from sklearn.cluster import KMeans
from agora.utils.indexing import validate_association
index_row = t.Tuple[str, str, int, int] index_row = t.Tuple[str, str, int, int]
...@@ -86,16 +85,19 @@ def bidirectional_retainment_filter( ...@@ -86,16 +85,19 @@ def bidirectional_retainment_filter(
daughters_thresh: int = 7, daughters_thresh: int = 7,
) -> pd.DataFrame: ) -> pd.DataFrame:
""" """
Retrieve families where mothers are present for more than a fraction of the experiment, and daughters for longer than some number of time-points. Retrieve families where mothers are present for more than a fraction
of the experiment and daughters for longer than some number of
time-points.
Parameters Parameters
---------- ----------
df: pd.DataFrame df: pd.DataFrame
Data Data
mothers_thresh: float mothers_thresh: float
Minimum fraction of experiment's total duration for which mothers must be present. Minimum fraction of experiment's total duration for which mothers
must be present.
daughters_thresh: int daughters_thresh: int
Minimum number of time points for which daughters must be observed Minimum number of time points for which daughters must be observed.
""" """
# daughters # daughters
all_daughters = df.loc[df.index.get_level_values("mother_label") > 0] all_daughters = df.loc[df.index.get_level_values("mother_label") > 0]
...@@ -170,6 +172,7 @@ def slices_from_spans(spans: t.Tuple[int], df: pd.DataFrame) -> t.List[slice]: ...@@ -170,6 +172,7 @@ def slices_from_spans(spans: t.Tuple[int], df: pd.DataFrame) -> t.List[slice]:
def drop_mother_label(index: pd.MultiIndex) -> np.ndarray: def drop_mother_label(index: pd.MultiIndex) -> np.ndarray:
"""Remove mother_label level from a MultiIndex."""
no_mother_label = index no_mother_label = index
if "mother_label" in index.names: if "mother_label" in index.names:
no_mother_label = index.droplevel("mother_label") no_mother_label = index.droplevel("mother_label")
......
#!/usr/bin/env python3
import re
import typing as t
import numpy as np
import pandas as pd
from agora.io.bridge import groupsort
from itertools import groupby
def mb_array_to_dict(mb_array: np.ndarray):
"""
Convert a lineage ndarray (trap, mother_id, daughter_id)
into a dictionary of lists ( mother_id ->[daughters_ids] )
"""
return {
(trap, mo): [(trap, d[0]) for d in daughters]
for trap, mo_da in groupsort(mb_array).items()
for mo, daughters in groupsort(mo_da).items()
}
...@@ -3,90 +3,161 @@ ...@@ -3,90 +3,161 @@
Functions to efficiently merge rows in DataFrames. Functions to efficiently merge rows in DataFrames.
""" """
import typing as t import typing as t
from copy import copy
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from utils_find_1st import cmp_larger, find_1st from utils_find_1st import cmp_larger, find_1st
from agora.utils.indexing import compare_indices, validate_association from agora.utils.indexing import index_isin
def group_merges(merges: np.ndarray) -> t.List[t.Tuple]:
"""
Convert merges into a list of merges for traps requiring multiple
merges and then for traps requiring single merges.
"""
left_tracks = merges[:, 0]
right_tracks = merges[:, 1]
# find traps requiring multiple merges
linr = merges[index_isin(left_tracks, right_tracks).flatten(), :]
rinl = merges[index_isin(right_tracks, left_tracks).flatten(), :]
# make unique and order merges for each trap
multi_merge = np.unique(np.concatenate((linr, rinl)), axis=0)
# find traps requiring a singe merge
single_merge = merges[
~index_isin(merges, multi_merge).all(axis=1).flatten(), :
]
# convert to lists of arrays
single_merge_list = [[sm] for sm in single_merge]
multi_merge_list = [
multi_merge[multi_merge[:, 0, 0] == trap_id, ...]
for trap_id in np.unique(multi_merge[:, 0, 0])
]
res = [*multi_merge_list, *single_merge_list]
return res
def merge_lineage(
lineage: np.ndarray, merges: np.ndarray
) -> (np.ndarray, np.ndarray):
"""
Use merges to update lineage information.
Check if merging causes any buds to have multiple mothers and discard
those incorrect merges.
Return updated lineage and merge arrays.
"""
flat_lineage = lineage.reshape(-1, 2)
bud_mother_dict = {
tuple(bud): mother for bud, mother in zip(lineage[:, 1], lineage[:, 0])
}
left_tracks = merges[:, 0]
# find left tracks that are in lineages
valid_lineages = index_isin(flat_lineage, left_tracks).flatten()
# group into multi- and then single merges
grouped_merges = group_merges(merges)
# perform merges
if valid_lineages.any():
# indices of each left track -> indices of rightmost right track
replacement_dict = {
tuple(contig_pair[0]): merge[-1][1]
for merge in grouped_merges
for contig_pair in merge
}
# if both key and value are buds, they must have the same mother
buds = lineage[:, 1]
incorrect_merges = [
key
for key in replacement_dict
if np.any(index_isin(buds, replacement_dict[key]).flatten())
and np.any(index_isin(buds, key).flatten())
and not np.array_equal(
bud_mother_dict[key],
bud_mother_dict[tuple(replacement_dict[key])],
)
]
if incorrect_merges:
# reassign incorrect merges so that they have no affect
for key in incorrect_merges:
replacement_dict[key] = key
# find only correct merges
new_merges = merges[
~index_isin(
merges[:, 0], np.array(incorrect_merges)
).flatten(),
...,
]
else:
new_merges = merges
# correct lineage information
# replace mother or bud index with index of rightmost track
flat_lineage[valid_lineages] = [
replacement_dict[tuple(index)]
for index in flat_lineage[valid_lineages]
]
else:
new_merges = merges
# reverse flattening
new_lineage = flat_lineage.reshape(-1, 2, 2)
# remove any duplicates
new_lineage = np.unique(new_lineage, axis=0)
return new_lineage, new_merges
def apply_merges(data: pd.DataFrame, merges: np.ndarray): def apply_merges(data: pd.DataFrame, merges: np.ndarray):
"""Split data in two, one subset for rows relevant for merging and one """
without them. It uses an array of source tracklets and target tracklets Generate a new data frame containing merged tracks.
to efficiently merge them.
Parameters Parameters
---------- ----------
data : pd.DataFrame data : pd.DataFrame
Input DataFrame. A Signal data frame.
merges : np.ndarray merges : np.ndarray
3-D ndarray where dimensions are (X,2,2): nmerges, source-target An array of pairs of (trap, cell) indices to merge.
pair and single-cell identifiers, respectively.
Examples
--------
FIXME: Add docs.
""" """
indices = data.index indices = data.index
if "mother_label" in indices.names: if "mother_label" in indices.names:
indices = indices.droplevel("mother_label") indices = indices.droplevel("mother_label")
valid_merges, indices = validate_association( indices = np.array(list(indices))
merges, np.array(list(indices)) # merges in the data frame's indices
) valid_merges = index_isin(merges, indices).all(axis=1).flatten()
# corresponding indices for the data frame in merges
# Assign non-merged selected_merges = merges[valid_merges, ...]
merged = data.loc[~indices] valid_indices = index_isin(indices, selected_merges).flatten()
# data not requiring merging
# Implement the merges and drop source rows. merged = data.loc[~valid_indices]
# TODO Use matrices to perform merges in batch # merge tracks
# for ecficiency
if valid_merges.any(): if valid_merges.any():
to_merge = data.loc[indices] to_merge = data.loc[valid_indices].copy()
targets, sources = zip(*merges[valid_merges]) left_indices = merges[valid_merges, 0]
for source, target in zip(sources, targets): right_indices = merges[valid_merges, 1]
target = tuple(target) # join left track with right track
to_merge.loc[target] = join_tracks_pair( for left_index, right_index in zip(left_indices, right_indices):
to_merge.loc[target].values, to_merge.loc[tuple(left_index)] = join_two_tracks(
to_merge.loc[tuple(source)].values, to_merge.loc[tuple(left_index)].values,
to_merge.loc[tuple(right_index)].values,
) )
to_merge.drop(map(tuple, sources), inplace=True) # drop indices for right tracks
to_merge.drop(map(tuple, right_indices), inplace=True)
# add to data not requiring merges
merged = pd.concat((merged, to_merge), names=data.index.names) merged = pd.concat((merged, to_merge), names=data.index.names)
return merged return merged
def join_tracks_pair(target: np.ndarray, source: np.ndarray) -> np.ndarray: def join_two_tracks(
""" left_track: np.ndarray, right_track: np.ndarray
Join two tracks and return the new value of the target. ) -> np.ndarray:
""" """Join two tracks and return the new one."""
target_copy = target new_track = left_track.copy()
end = find_1st(target_copy[::-1], 0, cmp_larger) # find last positive element by inverting track
target_copy[-end:] = source[-end:] end = find_1st(left_track[::-1], 0, cmp_larger)
return target_copy # merge tracks into one
new_track[-end:] = right_track[-end:]
return new_track
def group_merges(merges: np.ndarray) -> t.List[t.Tuple]:
# Return a list where the cell is present as source and target
# (multimerges)
sources_targets = compare_indices(merges[:, 0, :], merges[:, 1, :])
is_multimerge = sources_targets.any(axis=0) | sources_targets.any(axis=1)
is_monomerge = ~is_multimerge
multimerge_subsets = union_find(zip(*np.where(sources_targets)))
merge_groups = [merges[np.array(tuple(x))] for x in multimerge_subsets]
sorted_merges = list(map(sort_association, merge_groups))
# Ensure that source and target are at the edges ##################################################################
return [
*sorted_merges,
*[[event] for event in merges[is_monomerge]],
]
def union_find(lsts): def union_find(lsts):
...@@ -120,27 +191,3 @@ def sort_association(array: np.ndarray): ...@@ -120,27 +191,3 @@ def sort_association(array: np.ndarray):
[res.append(x) for x in np.flip(order).flatten() if x not in res] [res.append(x) for x in np.flip(order).flatten() if x not in res]
sorted_array = array[np.array(res)] sorted_array = array[np.array(res)]
return sorted_array return sorted_array
def merge_association(
association: np.ndarray, merges: np.ndarray
) -> np.ndarray:
grouped_merges = group_merges(merges)
flat_indices = association.reshape(-1, 2)
comparison_mat = compare_indices(merges[:, 0], flat_indices)
valid_indices = comparison_mat.any(axis=0)
if valid_indices.any(): # Where valid, perform transformation
replacement_d = {}
for dataset in grouped_merges:
for k in dataset:
replacement_d[tuple(k[0])] = dataset[-1][1]
flat_indices[valid_indices] = [
replacement_d[tuple(i)] for i in flat_indices[valid_indices]
]
merged_indices = flat_indices.reshape(-1, 2, 2)
return merged_indices
""" """
Orchestration module and network mid-level interfaces. Orchestration module and network mid-level interfaces.
""" """
from .version import __version__
...@@ -22,18 +22,16 @@ from requests.exceptions import HTTPError, Timeout ...@@ -22,18 +22,16 @@ from requests.exceptions import HTTPError, Timeout
################### Dask Methods ################################ ################### Dask Methods ################################
def format_segmentation(segmentation, tp): def format_segmentation(segmentation, tp):
"""Format a single timepoint into a dictionary. """
Format BABY's results from a single time point into a dictionary.
Parameters Parameters
------------ ------------
segmentation: list segmentation: list
A list of results, each result is the output of the crawler, which is JSON-encoded A list of results, each result is the output of BABY
crawler, which is JSON-encoded.
tp: int tp: int
the time point considered The time point.
Returns
--------
A dictionary containing the formatted results of BABY
""" """
# Segmentation is a list of dictionaries, ordered by trap # Segmentation is a list of dictionaries, ordered by trap
# Add trap information # Add trap information
...@@ -204,6 +202,7 @@ def choose_model_from_params( ...@@ -204,6 +202,7 @@ def choose_model_from_params(
------- -------
model_name : str model_name : str
""" """
# cameras prime95 has become sCMOS and evolve has EMCCD
valid_models = list(modelsets().keys()) valid_models = list(modelsets().keys())
# Apply modelset filter if specified # Apply modelset filter if specified
......