Compare revisions

00ef7a84 · 664fcf4c · 4dd8bc21 · 67059355 · ee850164 · 914eae97
--- a/poetry.lock
+++ b/poetry.lock
--- a/poetry3_8.lock
+++ b/poetry3_8.lock
--- a/pyproject.toml
+++ b/pyproject.toml
 [tool.poetry]
-name = "aliby"
+name = "alibylite"
-version = "0.1.64"
+version = "0.0.1"
 description = "Process and analyse live-cell imaging data"
-authors = ["Alan Munoz <alan.munoz@ed.ac.uk>"]
+authors = ["Alan Munoz", "Peter Swain <peter.swain@ed.ac.uk>"]
 packages = [
    { include = "aliby", from="src" },
    { include = "extraction", from="src" },
@@ -12,136 +12,39 @@ packages = [
 ]
 readme = "README.md"
-[tool.poetry.scripts]
-aliby-run = "aliby.bin.run:run"
-aliby-annotate = "aliby.bin.annotate:annotate"
-aliby-visualise = "aliby.bin.visualise:napari_overlay"
 [build-system]
 requires = ["setuptools", "poetry-core>=1.0.0"]
 build-backend = "poetry.core.masonry.api"
 [tool.poetry.dependencies]
 python = ">=3.8, <3.11"
-PyYAML = "^6.0"
-flatten-dict = "^0.4.2"
-gaussianprocessderivatives = "^0.1.5"
 numpy = ">=1.21.6"
-Bottleneck = "^1.3.5"
+pandas = ">=2.0.3"
-opencv-python = "^4.7.0.72"
+scikit-learn = ">=1.0.2, <1.3"
-pathos = "^0.2.8" # Lambda-friendly multithreading
-p-tqdm = "^1.3.3"
-pandas = ">=1.3.3"
-py-find-1st = "^1.1.5" # Fast indexing
-scikit-learn = ">=1.0.2" # Used for an extraction metric
 scipy = ">=1.7.3"
-# Pipeline + I/O
-dask = "^2021.12.0"
-imageio = "2.8.0" # For image-visualisation utilities
-requests-toolbelt = "^0.9.1"
 scikit-image = ">=0.18.1"
-tqdm = "^4.62.3" # progress bars
-xmltodict = "^0.13.0" # read ome-tiff metadata
-zarr = "^2.14.0"
-GitPython = "^3.1.27"
-h5py = "2.10" # File I/O
-# Networking
-omero-py = { version = ">=5.6.2", optional = true } # contact omero server
-# Baby segmentation
-aliby-baby = {version = "^0.1.17", optional=true}
-# Postprocessing
-[tool.poetry.group.pp.dependencies]
-leidenalg = "^0.8.8"
-more-itertools = "^8.12.0"
-pycatch22 = "^0.4.2"
-[tool.poetry.group.pp]
-optional = true
-[tool.poetry.group.dev]
+bottleneck = ">=1.3.5"
-optional = true
+dask = ">=2021.12.0"
+flatten-dict = ">=0.4.2"
+h5py = ">=3.8.0"
+more-itertools = ">=10.2.0"
+pathos = ">=0.2.8"
+pyyaml = ">=6.0.1"
+py-find-1st = ">=1.1.6"
+tqdm = ">=4.62.3"
+xmltodict = ">=0.13.0"
+zarr = ">=2.14.0"
-[tool.poetry.group.dev.dependencies]
+tensorflow-io-gcs-filesystem = "0.34.0"
-black = "^22.6.0"
+chardet = "^5.2.0"
-mypy = "^0.930"
+grpcio = "1.62.2"
-numpydoc = "^1.3.1"
+tensorflow = "2.13.1"
-isort = "^5.10.1"
-jupyter = "^1.0.0"
-flake8 = "^4.0.1"
-pyright = "^1.1.258"
-pre-commit = "^2.20.0"
-seaborn = "^0.11.2"
-debugpy = "^1.6.3"
-coverage = "^7.0.4"
-jupytext = "^1.14.4"
-grid-strategy = "^0.0.1"
-readchar = "^4.0.3"
-ipdb = "^0.13.11"
-[tool.poetry.group.docs]
+baby-seg = ">=0.30.4"
-optional = true
+omero-py = { version = ">=5.6.2", optional = true }
-[tool.poetry.group.docs.dependencies]
-Sphinx = "^5.2.0"
-sphinx-rtd-theme = "^1.0.0"
-sphinx-autodoc-typehints = "^1.19.2"
-myst-parser = "^0.18.0"
-[tool.poetry.group.test]
-optional = true
-[tool.poetry.group.test.dependencies]
-pytest = "^6.2.5"
-[tool.poetry.group.utils]
-optional = true
-# Dependency groups can only be used by a poetry installation, not pip
-[tool.poetry.group.utils.dependencies]
-napari = {version = ">=0.4.16", optional=true}
-Torch = {version = "^1.13.1", optional=true}
-pytorch-lightning = {version = "^1.9.3", optional=true}
-torchvision = {version = "^0.14.1", optional=true}
-trio = {version = "^0.22.0", optional=true}
-grid-strategy = {version = "^0.0.1", optional=true}
-[tool.poetry.extras]
-omero = ["omero-py"]
-baby = ["aliby-baby"]
 [tool.black]
 line-length = 79
-target-version = ['py38']
-include = '\.pyi?$'
-extend-exclude = '''
-/(
-    \.git
-  | \.hg
-  | \.mypy_cache
-  | \.tox
-  | \.venv
-  | _build
-  | buck-out
-  | build
-  | dist
-)/
-'''
-[tool.isort]
-profile = "black"
-multi_line_output = 3
-line_length = 79
-include_trailing_comma = true
-[tool.pytest.ini_options]
-minversion = "6.0"
-addopts = "-ra -q"
-testpaths = [
-    "tests",
-]
--- a/src/agora/abc.py
+++ b/src/agora/abc.py
@@ -7,26 +7,24 @@ from pathlib import Path
 from time import perf_counter
 from typing import Union
-from flatten_dict import flatten
+from flatten_dict import flatten, unflatten
 from yaml import dump, safe_load
-from agora.logging import timer
+from agora.logging_timer import timer
 atomic = t.Union[int, float, str, bool]
 class ParametersABC(ABC):
    """
-    Defines parameters as attributes and allows parameters to
+    Define parameters typically for a step in the pipeline.
-    be converted to either a dictionary or to yaml.
+    Outputs can be either a dict or yaml.
    No attribute should be called "parameters"!
    """
    def __init__(self, **kwargs):
-        """
+        """Define parameters as attributes."""
-        Defines parameters as attributes
-        """
        assert (
            "parameters" not in kwargs
        ), "No attribute should be named parameters"
@@ -35,8 +33,9 @@ class ParametersABC(ABC):
    def to_dict(self, iterable="null") -> t.Dict:
        """
-        Recursive function to return a nested dictionary of the
+        Return a nested dictionary of the attributes of the class instance.
-        attributes of the class instance.
+        Use recursion.
        """
        if isinstance(iterable, dict):
            if any(
@@ -47,9 +46,11 @@ class ParametersABC(ABC):
                ]
            ):
                return {
-                    k: v.to_dict()
+                    k: (
-                    if hasattr(v, "to_dict")
+                        v.to_dict()
-                    else self.to_dict(v)
+                        if hasattr(v, "to_dict")
+                        else self.to_dict(v)
+                    )
                    for k, v in iterable.items()
                }
            else:
@@ -62,7 +63,8 @@ class ParametersABC(ABC):
    def to_yaml(self, path: Union[Path, str] = None):
        """
-        Returns a yaml stream of the attributes of the class instance.
+        Return a yaml stream of the attributes of the class instance.
        If path is provided, the yaml stream is saved there.
        Parameters
@@ -77,20 +79,19 @@ class ParametersABC(ABC):
    @classmethod
    def from_dict(cls, d: dict):
+        """Initialise from a dict of parameters."""
        return cls(**d)
    @classmethod
    def from_yaml(cls, source: Union[Path, str]):
-        """
+        """Initialise from a yaml filename or stdin."""
-        Returns instance from a yaml filename or stdin
-        """
        is_buffer = True
        try:
            if Path(source).exists():
                is_buffer = False
-        except Exception as _:
+        except Exception as e:
+            print(e)
            assert isinstance(source, str), "Invalid source type."
        if is_buffer:
            params = safe_load(source)
        else:
@@ -100,86 +101,48 @@ class ParametersABC(ABC):
    @classmethod
    def default(cls, **kwargs):
+        """Initialise allowing the default parameters to be potentially replaced."""
        overriden_defaults = copy(cls._defaults)
        for k, v in kwargs.items():
            overriden_defaults[k] = v
        return cls.from_dict(overriden_defaults)
    def update(self, name: str, new_value):
-        """
+        """Update a parameter in the nested dict of parameters."""
-        Update values recursively
+        flat_params_dict = flatten(self.to_dict(), keep_empty_types=(dict,))
-        if name is a dictionary, replace data where existing found or add if not.
+        names_found = [
-        It warns against type changes.
+            param for param in flat_params_dict.keys() if name in param
+        ]
-        If the existing structure under name is a dictionary,
+        if len(names_found) == 1:
-        it looks for the first occurrence and modifies it accordingly.
+            keys = names_found.pop()
+            if type(flat_params_dict[keys]) is not type(new_value):
-        If a leaf node that is to be changed is a collection, it adds the new elements.
+                print("Warning:Changing type is risky.")
-        """
+            flat_params_dict[keys] = new_value
+            params_dict = unflatten(flat_params_dict)
-        assert name not in (
+            # replace all old values
-            "parameters",
+            for key, value in params_dict.items():
-            "params",
+                setattr(self, key, value)
-        ), "Attribute can't be named params or parameters"
-        if name in self.__dict__:
-            if check_type_recursive(getattr(self, name), new_value):
-                print("Warnings:Type changes are risky")
-            if isinstance(getattr(self, name), dict):
-                flattened = flatten(self.to_dict())
-                names_found = [k for k in flattened.keys() if name in k]
-                found_idx = [keys.index(name) for keys in names_found]
-                assert len(names_found), f"{name} not found as key."
-                keys = None
-                if len(names_found) > 1:
-                    for level in zip(found_idx, names_found):
-                        if level == min(found_idx):
-                            keys = level
-                            print(
-                                f"Warning: {name} was found in multiple keys. Selected {keys}"
-                            )
-                            break
-                else:
-                    keys = names_found.pop()
-                if keys:
-                    current_val = flattened.get(keys, None)
-                    # if isinstance(current_val, t.Collection):
-            elif isinstance(getattr(self, name), t.Collection):
-                add_to_collection(getattr(self, name), new_value)
-            elif isinstance(getattr(self, name), set):
-                pass  # TODO implement
-            new_d = getattr(self, name)
-            new_d.update(new_value)
-            setattr(self, name, new_d)
        else:
-            setattr(self, name, new_value)
+            print(f"Warning:{name} was neither recognised nor updated.")
 def add_to_collection(
-    collection: t.Collection, value: t.Union[atomic, t.Collection]
+    collection: t.Collection, element: t.Union[atomic, t.Collection]
 ):
-    # Adds element(s) in place.
+    """Add elements to a collection, a list or set, in place."""
-    if not isinstance(value, t.Collection):
+    if not isinstance(element, t.Collection):
-        value = [value]
+        element = [element]
    if isinstance(collection, list):
-        collection += value
+        collection += element
    elif isinstance(collection, set):
-        collection.update(value)
+        collection.update(element)
 class ProcessABC(ABC):
    """
    Base class for processes.
-    Defines parameters as attributes and requires run method to be defined.
+    Define parameters as attributes and requires a run method.
    """
    def __init__(self, parameters):
@@ -190,8 +153,8 @@ class ProcessABC(ABC):
        """
        self._parameters = parameters
        # convert parameters to dictionary
-        # and then define each parameter as an attribute
        for k, v in parameters.to_dict().items():
+            # define each parameter as an attribute
            setattr(self, k, v)
    @property
@@ -202,32 +165,12 @@ class ProcessABC(ABC):
    def run(self):
        pass
-    def _log(self, message: str, level: str = "warning"):
+    def log(self, message: str, level: str = "warning"):
-        # Log messages in the corresponding level
+        """Log messages at the corresponding level."""
        logger = logging.getLogger("aliby")
        getattr(logger, level)(f"{self.__class__.__name__}: {message}")
-def check_type_recursive(val1, val2):
-    same_types = True
-    if not isinstance(val1, type(val2)) and not all(
-        type(x) in (Path, str) for x in (val1, val2)  # Ignore str->path
-    ):
-        return False
-    if not isinstance(val1, t.Iterable) and not isinstance(val2, t.Iterable):
-        return isinstance(val1, type(val2))
-    elif isinstance(val1, (tuple, list)) and isinstance(val2, (tuple, list)):
-        return bool(
-            sum([check_type_recursive(v1, v2) for v1, v2 in zip(val1, val2)])
-        )
-    elif isinstance(val1, dict) and isinstance(val2, dict):
-        if not len(val1) or not len(val2):
-            return False
-        for k in val2.keys():
-            same_types = same_types and check_type_recursive(val1[k], val2[k])
-    return same_types
 class StepABC(ProcessABC):
    """
    Base class that expands on ProcessABC to include tools used by Aliby steps.
@@ -243,11 +186,9 @@ class StepABC(ProcessABC):
    @timer
    def run_tp(self, tp: int, **kwargs):
-        """
+        """Time and log the timing of a step."""
-        Time and log the timing of a step.
-        """
        return self._run_tp(tp, **kwargs)
    def run(self):
        # Replace run with run_tp
-        raise Warning("Steps use run_tp instead of run")
+        raise Warning("Steps use run_tp instead of run.")
--- a/src/agora/io/bridge.py
+++ b/src/agora/io/bridge.py
 """
 Tools to interact with h5 files and handle data consistently.
 """
 import collections
 import logging
 import typing as t
@@ -23,20 +24,19 @@ class BridgeH5:
        """Initialise with the name of the h5 file."""
        self.filename = filename
        if flag is not None:
-            self._hdf = h5py.File(filename, flag)
+            self.hdf = h5py.File(filename, flag)
-            self._filecheck
+            assert (
+                "cell_info" in self.hdf
+            ), "Invalid file. No 'cell_info' found."
-    def _log(self, message: str, level: str = "warn"):
+    def log(self, message: str, level: str = "warn"):
        # Log messages in the corresponding level
        logger = logging.getLogger("aliby")
        getattr(logger, level)(f"{self.__class__.__name__}: {message}")
-    def _filecheck(self):
-        assert "cell_info" in self._hdf, "Invalid file. No 'cell_info' found."
    def close(self):
        """Close the h5 file."""
-        self._hdf.close()
+        self.hdf.close()
    @property
    def meta_h5(self) -> t.Dict[str, t.Any]:
@@ -83,7 +83,7 @@ class BridgeH5:
    def get_npairs_over_time(self, nstepsback=2):
        tree = self.cell_tree
        npairs = []
-        for tp in self._hdf["cell_info"]["processed_timepoints"][()]:
+        for tp in self.hdf["cell_info"]["processed_timepoints"][()]:
            tmp_tree = {
                k: {k2: v2 for k2, v2 in v.items() if k2 <= tp}
                for k, v in tree.items()
@@ -115,7 +115,7 @@ class BridgeH5:
        ----------
        Nested dictionary where keys (or branches) are the upper levels and the leaves are the last element of :fields:.
        """
-        zipped_info = (*zip(*[self._hdf["cell_info"][f][()] for f in fields]),)
+        zipped_info = (*zip(*[self.hdf["cell_info"][f][()] for f in fields]),)
        return recursive_groupsort(zipped_info)

--- a/src/agora/io/cells.py
+++ b/src/agora/io/cells.py
--- a/src/agora/io/decorators.py
+++ b/src/agora/io/decorators.py
@@ -6,17 +6,19 @@ import typing as t
 from functools import wraps
-def _first_arg_str_to_df(
+def _first_arg_str_to_raw_df(
    fn: t.Callable,
 ):
    """Enable Signal-like classes to convert strings to data sets."""
    @wraps(fn)
    def format_input(*args, **kwargs):
        cls = args[0]
        data = args[1]
        if isinstance(data, str):
-            # get data from h5 file
+            # get data from h5 file using Signal's get_raw
            data = cls.get_raw(data)
        # replace path in the undecorated function with data
        return fn(cls, data, *args[2:], **kwargs)
    return format_input
--- a/src/agora/io/metadata.py
+++ b/src/agora/io/metadata.py
 """
-Anthology of interfaces fordispatch_metadata_parse different parsers and lack of them.
+Aliby decides on using different metadata parsers based on two elements:
+1. The parameter given by PipelineParameters (either True/False or a string
-ALIBY decides on using different metadata parsers based on two elements:
+pointing to the metadata file)
+2. The available files in the root folder where images are found (either
-1. The parameter given by PipelineParameters (Either True/False, or a string pointing to the metadata file)
+remote or locally).
-2. The available files in the root folder where images are found (remote or locally)
+If parameters is a string pointing to a metadata file, Aliby picks a parser
-If parameters is a string pointing to a metadata file, ALIBY picks a parser based on the file format.
+based on the file format.
-If parameters is True (as a boolean), ALIBY searches for any available file and uses the first valid one.
+If parameters is True, Aliby searches for any available file and uses the
-If there are no metadata files, ALIBY requires indicating indices for tiler, segmentation and extraction.
+first valid one.
+If there are no metadata files, Aliby requires indices in the tiff file names
+for tiler, segmentation, and extraction.
+WARNING: grammars depend on the directory structure of a local log-file_parser
+repository.
 """
 import glob
 import logging
+import numpy as np
 import os
 import typing as t
 from datetime import datetime
@@ -27,28 +32,32 @@ from logfile_parser.swainlab_parser import parse_from_swainlab_grammar
 class MetaData:
-    """Small metadata Process that loads log."""
+    """Metadata process that loads and parses log files."""
    def __init__(self, log_dir, store):
+        """Initialise with log-file directory and h5 location to write."""
        self.log_dir = log_dir
        self.store = store
        self.metadata_writer = Writer(self.store)
    def __getitem__(self, item):
+        """Load log and access item in resulting meta data dictionary."""
        return self.load_logs()[item]
    def load_logs(self):
-        # parsed_flattened = parse_logfiles(self.log_dir)
+        """Load log using a hierarchy of parsers."""
-        parsed_flattened = dispatch_metadata_parser(self.log_dir)
+        parsed_flattened = parse_metadata(self.log_dir)
        return parsed_flattened
    def run(self, overwrite=False):
+        """Load and parse logs and write to h5 file."""
        metadata_dict = self.load_logs()
        self.metadata_writer.write(
            path="/", meta=metadata_dict, overwrite=overwrite
        )
    def add_field(self, field_name, field_value, **kwargs):
+        """Write a field and its values to the h5 file."""
        self.metadata_writer.write(
            path="/",
            meta={field_name: field_value},
@@ -56,207 +65,187 @@ class MetaData:
        )
    def add_fields(self, fields_values: dict, **kwargs):
+        """Write a dict of fields and values to the h5 file."""
        for field, value in fields_values.items():
            self.add_field(field, value)
-# Paradigm: able to do something with all datatypes present in log files,
+def parse_metadata(filedir: t.Union[str, Path]):
-# then pare down on what specific information is really useful later.
-# Needed because HDF5 attributes do not support dictionaries
-def flatten_dict(nested_dict, separator="/"):
-    """
-    Flattens nested dictionary. If empty return as-is.
    """
-    flattened = {}
+    Dispatch different metadata parsers that convert logfiles into a dictionary.
-    if nested_dict:
-        df = pd.json_normalize(nested_dict, sep=separator)
-        flattened = df.to_dict(orient="records")[0] or {}
-    return flattened
+    Currently only contains the swainlab log parsers.
-# Needed because HDF5 attributes do not support datetime objects
+    Parameters
-# Takes care of time zones & daylight saving
+    --------
-def datetime_to_timestamp(time, locale="Europe/London"):
+    filepath: str
-    """
+        File containing metadata or folder containing naming conventions.
-    Convert datetime object to UNIX timestamp
    """
-    return timezone(locale).localize(time).timestamp()
+    filedir = Path(filedir)
+    if filedir.is_file() or str(filedir).endswith(".zarr"):
+        # log file is in parent directory
+        filedir = filedir.parent
+    filepath = find_file(filedir, "*.log")
+    if filepath:
+        # new log files ending in .log
+        raw_parse = parse_from_swainlab_grammar(filepath)
+        minimal_meta = get_minimal_meta_swainlab(raw_parse)
+    else:
+        # legacy log files ending in .txt
+        legacy_parse = parse_legacy_logfiles(filedir)
+        minimal_meta = (
+            get_meta_from_legacy(legacy_parse) if legacy_parse else {}
+        )
+    if minimal_meta is None:
+        raise Exception("No metadata found.")
+    else:
+        return minimal_meta
 def find_file(root_dir, regex):
+    """Find files in a directory using regex."""
+    # ignore aliby.log files
    file = [
        f
        for f in glob.glob(os.path.join(str(root_dir), regex))
-        if Path(f).name != "aliby.log"  # Skip filename reserved for aliby
+        if Path(f).name != "aliby.log"
    ]
-    if len(file) > 1:
-        print(
-            "Warning:Metadata: More than one logfile found. Defaulting to first option."
-        )
-        file = [sorted(file)[0]]
    if len(file) == 0:
-        logging.getLogger("aliby").log(
+        return None
-            logging.WARNING, "Metadata: No valid swainlab .log found."
+    elif len(file) > 1:
+        print(
+            "Warning:Metadata: More than one log file found."
+            " Defaulting to first option."
        )
+        return sorted(file)[0]
    else:
        return file[0]
-    return None
-# TODO: re-write this as a class if appropriate
+def get_minimal_meta_swainlab(parsed_metadata: dict):
-# WARNING: grammars depend on the directory structure of a locally installed
+    """
-# logfile_parser repo
+    Extract channels from parsed metadata.
-def parse_logfiles(
+    Parameters
+    --------
+    parsed_metadata: dict[str, str or int or DataFrame or Dict]
+        default['general', 'image_config', 'device_properties',
+                'group_position', 'group_time', 'group_config']
+    Returns
+    --------
+    Dict with channels metadata
+    """
+    channels_dict = find_channels_by_position(parsed_metadata["group_config"])
+    channels = parsed_metadata["image_config"]["Image config"].values.tolist()
+    parsed_ntps = parsed_metadata["group_time"]["frames"]
+    if type(parsed_ntps) is int:
+        ntps = parsed_ntps
+    else:
+        ntps = parsed_ntps.max()
+    parsed_tinterval = parsed_metadata["group_time"]["interval"]
+    if type(parsed_tinterval) is int:
+        timeinterval = parsed_tinterval
+    else:
+        timeinterval = parsed_tinterval.min()
+    minimal_meta = {
+        "channels_by_group": channels_dict,
+        "channels": channels,
+        "time_settings/ntimepoints": int(ntps),
+        "time_settings/timeinterval": int(timeinterval),
+    }
+    return minimal_meta
+def find_channels_by_position(meta):
+    """
+    Parse metadata to find the imaging channels for each group.
+    Return a dict with groups as keys and channels as values.
+    """
+    if isinstance(meta, pd.DataFrame):
+        imaging_channels = list(meta.columns)
+        channels_dict = {group: [] for group in meta.index}
+        for group in channels_dict:
+            for channel in imaging_channels:
+                if meta.loc[group, channel] is not None:
+                    channels_dict[group].append(channel)
+    elif isinstance(meta, dict) and "positions/posname" in meta:
+        channels_dict = {
+            position_name: [] for position_name in meta["positions/posname"]
+        }
+        imaging_channels = meta["channels"]
+        for i, position_name in enumerate(meta["positions/posname"]):
+            for imaging_channel in imaging_channels:
+                if (
+                    "positions/" + imaging_channel in meta
+                    and meta["positions/" + imaging_channel][i]
+                ):
+                    channels_dict[position_name].append(imaging_channel)
+    else:
+        channels_dict = {}
+    return channels_dict
+### legacy code for acq and log files ###
+def parse_legacy_logfiles(
    root_dir,
    acq_grammar="multiDGUI_acq_format.json",
    log_grammar="multiDGUI_log_format.json",
 ):
    """
-    Parse acq and log files depending on the grammar specified, then merge into
+    Parse acq and log files using the grammar specified.
-    single dict.
+    Merge results into a single dict.
    """
-    # Both acq and log files contain useful information.
-    # ACQ_FILE = 'flavin_htb2_glucose_long_ramp_DelftAcq.txt'
-    # LOG_FILE = 'flavin_htb2_glucose_long_ramp_Delftlog.txt'
    log_parser = Parser(log_grammar)
    acq_parser = Parser(acq_grammar)
    log_file = find_file(root_dir, "*log.txt")
    acq_file = find_file(root_dir, "*[Aa]cq.txt")
+    # parse into a single dict
    parsed = {}
    if log_file and acq_file:
        with open(log_file, "r") as f:
            log_parsed = log_parser.parse(f)
        with open(acq_file, "r") as f:
            acq_parsed = acq_parser.parse(f)
        parsed = {**acq_parsed, **log_parsed}
+    # convert data to having time stamps
    for key, value in parsed.items():
        if isinstance(value, datetime):
            parsed[key] = datetime_to_timestamp(value)
+    # flatten dict
    parsed_flattened = flatten_dict(parsed)
    for k, v in parsed_flattened.items():
        if isinstance(v, list):
+            # replace None with 0
            parsed_flattened[k] = [0 if el is None else el for el in v]
    return parsed_flattened
-def get_meta_swainlab(parsed_metadata: dict):
-    """
-    Convert raw parsing of Swainlab logfile to the metadata interface.
-    Input:
-    --------
-    parsed_metadata: Dict[str, str or int or DataFrame or Dict]
-    default['general', 'image_config', 'device_properties', 'group_position', 'group_time', 'group_config']
-    Returns:
-    --------
-    Dictionary with metadata following the standard
-    """
-    channels = parsed_metadata["image_config"]["Image config"].values.tolist()
-    # nframes = int(parsed_metadata["group_time"]["frames"].max())
-    # return {"channels": channels, "nframes": nframes}
-    return {"channels": channels}
 def get_meta_from_legacy(parsed_metadata: dict):
+    """Fix naming convention for channels in legacy .txt log files."""
    result = parsed_metadata
    result["channels"] = result["channels/channel"]
    return result
-def parse_swainlab_metadata(filedir: t.Union[str, Path]):
+def flatten_dict(nested_dict, separator="/"):
-    """
-    Dispatcher function that determines which parser to use based on the file ending.
-    Input:
-    --------
-    filedir: Directory where the logfile is located.
-    Returns:
-    --------
-    Dictionary with minimal metadata
-    """
-    filedir = Path(filedir)
-    filepath = find_file(filedir, "*.log")
-    if filepath:
-        raw_parse = parse_from_swainlab_grammar(filepath)
-        minimal_meta = get_meta_swainlab(raw_parse)
-    else:
-        if filedir.is_file() or str(filedir).endswith(".zarr"):
-            filedir = filedir.parent
-        legacy_parse = parse_logfiles(filedir)
-        minimal_meta = (
-            get_meta_from_legacy(legacy_parse) if legacy_parse else {}
-        )
-    return minimal_meta
-def dispatch_metadata_parser(filepath: t.Union[str, Path]):
    """
-    Function to dispatch different metadata parsers that convert logfiles into a
+    Flatten nested dictionary because h5 attributes cannot be dicts.
-    basic metadata dictionary. Currently only contains the swainlab log parsers.
-    Input:
+    If empty return as-is.
-    --------
-    filepath: str existing file containing metadata, or folder containing naming conventions
    """
-    parsed_meta = parse_swainlab_metadata(filepath)
+    flattened = {}
+    if nested_dict:
-    if parsed_meta is None:
+        df = pd.json_normalize(nested_dict, sep=separator)
-        parsed_meta = dir_to_meta
+        flattened = df.to_dict(orient="records")[0] or {}
+    return flattened
-    return parsed_meta
-def dir_to_meta(path: Path, suffix="tiff"):
-    filenames = list(path.glob(f"*.{suffix}"))
-    try:
-        # Deduct order from filenames
-        dimorder = "".join(
-            map(lambda x: x[0], filenames[0].stem.split("_")[1:])
-        )
-        dim_value = list(
-            map(
-                lambda f: filename_to_dict_indices(f.stem),
-                path.glob("*.tiff"),
-            )
-        )
-        maxes = [max(map(lambda x: x[dim], dim_value)) for dim in dimorder]
-        mins = [min(map(lambda x: x[dim], dim_value)) for dim in dimorder]
-        _dim_shapes = [
-            max_val - min_val + 1 for max_val, min_val in zip(maxes, mins)
-        ]
-        meta = {
-            "size_" + dim: shape for dim, shape in zip(dimorder, _dim_shapes)
-        }
-    except Exception as e:
-        print(
-            f"Warning:Metadata: Cannot extract dimensions from filenames. Empty meta set {e}"
-        )
-        meta = {}
-    return meta
-def filename_to_dict_indices(stem: str):
+def datetime_to_timestamp(time, locale="Europe/London"):
-    return {
+    """Convert datetime object to UNIX timestamp."""
-        dim_number[0]: int(dim_number[1:])
+    # h5 attributes do not support datetime objects
-        for dim_number in stem.split("_")[1:]
+    return timezone(locale).localize(time).timestamp()
-    }
--- a/src/agora/io/reader.py
+++ b/src/agora/io/reader.py
@@ -5,7 +5,7 @@ import h5py
 import numpy as np
 from agora.io.bridge import groupsort
-from agora.io.writer import load_attributes
+from agora.io.writer import load_meta
 class DynamicReader:
@@ -13,7 +13,7 @@ class DynamicReader:
    def __init__(self, file: str):
        self.file = file
-        self.metadata = load_attributes(file)
+        self.metadata = load_meta(file)
 class StateReader(DynamicReader):

--- a/src/agora/io/signal.py
+++ b/src/agora/io/signal.py
--- a/src/agora/io/writer.py
+++ b/src/agora/io/writer.py
@@ -15,9 +15,10 @@ from agora.io.bridge import BridgeH5
 #################### Dynamic version ##################################
-def load_attributes(file: str, group="/"):
+def load_meta(file: str, group="/"):
    """
-    Load the metadata from an h5 file and convert to a dictionary, including the "parameters" field which is stored as YAML.
+    Load the metadata from an h5 file and convert to a dictionary, including
+    the "parameters" field which is stored as YAML.
    Parameters
    ----------
@@ -26,8 +27,9 @@ def load_attributes(file: str, group="/"):
    group: str, optional
        The group in the h5 file from which to read the data
    """
-    # load the metadata, stored as attributes, from the h5 file and return as a dictionary
+    # load the metadata, stored as attributes, from the h5 file
    with h5py.File(file, "r") as f:
+        # return as a dict
        meta = dict(f[group].attrs.items())
    if "parameters" in meta:
        # convert from yaml format into dict
@@ -51,9 +53,9 @@ class DynamicWriter:
        self.file = file
        # the metadata is stored as attributes in the h5 file
        if Path(file).exists():
-            self.metadata = load_attributes(file)
+            self.metadata = load_meta(file)
-    def _log(self, message: str, level: str = "warn"):
+    def log(self, message: str, level: str = "warn"):
        # Log messages in the corresponding level
        logger = logging.getLogger("aliby")
        getattr(logger, level)(f"{self.__class__.__name__}: {message}")
@@ -102,9 +104,11 @@ class DynamicWriter:
                maxshape=max_shape,
                dtype=dtype,
                compression=self.compression,
-                compression_opts=self.compression_opts
+                compression_opts=(
-                if self.compression is not None
+                    self.compression_opts
-                else None,
+                    if self.compression is not None
+                    else None
+                ),
            )
            # write all data, signified by the empty tuple
            hgroup[key][()] = data
@@ -172,7 +176,7 @@ class DynamicWriter:
                            # append or create new dataset
                            self._append(value, key, hgroup)
                    except Exception as e:
-                        self._log(
+                        self.log(
                            f"{key}:{value} could not be written: {e}", "error"
                        )
            # write metadata
@@ -448,7 +452,6 @@ class Writer(BridgeH5):
        """
        self.id_cache = {}
        with h5py.File(self.filename, "a") as f:
-            # Alan, haven't we already opened the h5 file through BridgeH5's init?
            if overwrite == "overwrite":  # TODO refactor overwriting
                if path in f:
                    del f[path]
@@ -490,7 +493,12 @@ class Writer(BridgeH5):
    def write_meta(self, f: h5py.File, path: str, attr: str, data: Iterable):
        """Write metadata to an open h5 file."""
        obj = f.require_group(path)
-        obj.attrs[attr] = data
+        if type(data) is dict:
+            # necessary for channels_dict from find_channels_by_position
+            for key, vlist in data.items():
+                obj.attrs[attr + key] = vlist
+        else:
+            obj.attrs[attr] = data
    @staticmethod
    def write_arraylike(f: h5py.File, path: str, data: Iterable, **kwargs):
@@ -535,7 +543,6 @@ class Writer(BridgeH5):
            path + "values" if path.endswith("/") else path + "/values"
        )
        if path not in f:
            # create dataset and write data
            max_ncells = 2e5
            max_tps = 1e3
@@ -581,7 +588,6 @@ class Writer(BridgeH5):
            else:
                f[path].attrs["columns"] = df.columns.tolist()
        else:
            # path exists
            dset = f[values_path]
@@ -589,7 +595,7 @@ class Writer(BridgeH5):
            new_tps = set(df.columns)
            if path + "/timepoint" in f:
                new_tps = new_tps.difference(f[path + "/timepoint"][()])
-            df = df[new_tps]
+            df = df[list(new_tps)]
            if (
                not hasattr(self, "id_cache")
@@ -618,9 +624,9 @@ class Writer(BridgeH5):
                # sort indices for h5 indexing
                incremental_existing = np.argsort(found_indices)
-                self.id_cache[df.index.nlevels][
+                self.id_cache[df.index.nlevels]["found_indices"] = (
-                    "found_indices"
+                    found_indices[incremental_existing]
-                ] = found_indices[incremental_existing]
+                )
                self.id_cache[df.index.nlevels]["found_multi"] = found_multis[
                    incremental_existing
                ]

--- a/src/agora/logging.py
+++ b/src/agora/logging.py
-#!/usr/bin/env jupyter
-"""
-Add general logging functions and decorators
-"""
 import logging
 from time import perf_counter
 def timer(func):
-    # Log duration of a function into aliby logfile
+    """Log duration of a function into the aliby log file."""
    def wrap_func(*args, **kwargs):
        t1 = perf_counter()
        result = func(*args, **kwargs)

--- a/src/agora/utils/association.py
+++ b/src/agora/utils/association.py
-#!/usr/bin/env jupyter
--- a/src/agora/utils/cast.py
+++ b/src/agora/utils/cast.py
-#!/usr/bin/env jupyter
-"""
-Convert some types to others
-"""
-def _str_to_int(x: str or None):
-    """
-    Cast string as int if possible. If Nonetype return None.
-    """
-    if x is not None:
-        try:
-            return int(x)
-        except:
-            return x
--- a/src/agora/utils/indexing.py
+++ b/src/agora/utils/indexing.py
-#!/usr/bin/env jupyter
-"""
-Utilities based on association are used to efficiently acquire indices of tracklets with some kind of relationship.
-This can be:
-    - Cells that are to be merged
-    - Cells that have a linear relationship
-"""
 import numpy as np
-import typing as t
+import pandas as pd
+# data type to link together trap and cell ids
+i_dtype = {"names": ["trap_id", "cell_id"], "formats": [np.int64, np.int64]}
-def validate_association(
-    association: np.ndarray,
-    indices: np.ndarray,
-    match_column: t.Optional[int] = None,
-) -> t.Tuple[np.ndarray, np.ndarray]:
-    """Select rows from the first array that are present in both.
-        We use casting for fast multiindexing, generalising for lineage dynamics
-        Parameters
-        ----------
-        association : np.ndarray
-            2-D array where columns are (trap, mother, daughter) or 3-D array where
-            dimensions are (X,trap,2), containing tuples ((trap,mother), (trap,daughter))
-            across the 3rd dimension.
-        indices : np.ndarray
-            2-D array where each column is a different level. This should not include mother_label.
-        match_column: int
-            int indicating a specific column is required to match (i.e.
-            0-1 for target-source when trying to merge tracklets or mother-bud for lineage)
-            must be present in indices. If it is false one match suffices for the resultant indices
-            vector to be True.
-        Returns
-        -------
-        np.ndarray
-            1-D boolean array indicating valid merge events.
-        np.ndarray
-            1-D boolean array indicating indices with an association relationship.
-        Examples
-        --------
-        >>> import numpy as np
-        >>> from agora.utils.indexing import validate_association
-        >>> merges = np.array(range(12)).reshape(3,2,2)
-        >>> indices = np.array(range(6)).reshape(3,2)
-        >>> print(merges, indices)
-        >>> print(merges); print(indices)
-        [[[ 0  1]
-          [ 2  3]]
-         [[ 4  5]
-          [ 6  7]]
-         [[ 8  9]
-          [10 11]]]
-        [[0 1]
-         [2 3]
-         [4 5]]
-        >>> valid_associations, valid_indices  = validate_association(merges, indices)
-        >>> print(valid_associations, valid_indices)
-    [ True False False] [ True  True False]
+def validate_lineage(
+    lineage: np.ndarray,
+    indices: np.ndarray,
+    how: str = "families",
+):
    """
-    if association.ndim == 2:
+    Identify mother-bud pairs both in lineage and a Signal's indices.
-        # Reshape into 3-D array for broadcasting if neded
-        # association = np.stack(
+    We expect the lineage information to be unique: a bud should not have
-        #     (association[:, [0, 1]], association[:, [0, 2]]), axis=1
+    two mothers.
-        # )
-        association = _assoc_indices_to_3d(association)
+    Lineage is returned with buds assigned only to their first mother if they
+    have multiple.
-    # Compare existing association with available indices
-    # Swap trap and label axes for the association array to correctly cast
+    Parameters
-    valid_ndassociation = association[..., None] == indices.T[None, ...]
+    ----------
+    lineage : np.ndarray
-    # Broadcasting is confusing (but efficient):
+        2D array of lineage associations where columns are
-    # First we check the dimension across trap and cell id, to ensure both match
+        (trap, mother, daughter)
-    valid_cell_ids = valid_ndassociation.all(axis=2)
+        or
+        a 3D array, which is an array of 2 X 2 arrays comprising
-    if match_column is None:
+        [[trap_id, mother_label], [trap_id, daughter_label]].
-        # Then we check the merge tuples to check which cases have both target and source
+    indices : np.ndarray
-        valid_association = valid_cell_ids.any(axis=2).all(axis=1)
+        A 2D array of cell indices from a Signal, (trap_id, cell_label).
+        This array should not include mother_label.
-        # Finally we check the dimension that crosses all indices, to ensure the pair
+    how: str
-        # is present in a valid merge event.
+        If "mothers", matches indicate mothers from mother-bud pairs;
-        valid_indices = (
+        If "daughters", matches indicate daughters from mother-bud pairs;
-            valid_ndassociation[valid_association].all(axis=2).any(axis=(0, 1))
+        If "families", matches indicate mothers and daughters in mother-bud pairs.
-        )
-    else:  # We fetch specific indices if we aim for the ones with one present
+    Returns
-        valid_indices = valid_cell_ids[:, match_column].any(axis=0)
+    -------
-        # Valid association then becomes a boolean array, true means that there is a
+    valid_lineage: boolean np.ndarray
-        # match (match_column) between that cell and the index
+        1D array indicating matched elements in lineage.
-        valid_association = (
+    valid_indices: boolean np.ndarray
-            valid_cell_ids[:, match_column] & valid_indices
+        1D array indicating matched elements in indices.
-        ).any(axis=1)
+    lineage: np.ndarray
+        Any bud already having a mother that is assigned to another has that
+        second assignment discarded.
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from agora.utils.indexing import validate_lineage
+    >>> lineage = np.array([ [[0, 1], [0, 3]], [[0, 1], [0, 4]], [[0, 1], [0, 6]], [[0, 4], [0, 7]] ])
+    >>> indices = np.array([ [0, 1], [0, 2], [0, 3]])
+    >>> valid_lineage, valid_indices, lineage = validate_lineage(lineage, indices)
+    >>> print(valid_lineage)
+     array([ True, False, False, False])
+    >>> print(valid_indices)
+     array([ True, False, True])
+    and
+    >>> lineage = np.array([[[0,3], [0,1]], [[0,2], [0,4]]])
+    >>> indices = np.array([[0,1], [0,2], [0,3]])
+    >>> valid_lineage, valid_indices, lineage = validate_lineage(lineage, indices)
+    >>> print(valid_lineage)
+     array([ True, False])
+    >>> print(valid_indices)
+     array([ True, False, True])
+    """
+    if lineage.ndim == 2:
+        # [trap, mother, daughter] becomes [[trap, mother], [trap, daughter]]
+        lineage = assoc_indices_to_3d(lineage)
+        invert_lineage = True
+    if how == "mothers":
+        c_index = 0
+    elif how == "daughters":
+        c_index = 1
+    # if buds have two mothers, pick the first one
+    lineage = lineage[
+        ~pd.DataFrame(lineage[:, 1, :]).duplicated().values, :, :
+    ]
+    # find valid lineage
+    valid_lineages = index_isin(lineage, indices)
+    if how == "families":
+        # both mother and bud must be in indices
+        valid_lineage = valid_lineages.all(axis=1)
+    else:
+        valid_lineage = valid_lineages[:, c_index, :]
+    flat_valid_lineage = valid_lineage.flatten()
+    # find valid indices
+    selected_lineages = lineage[flat_valid_lineage, ...]
+    if how == "families":
+        # select only pairs of mother and bud indices
+        valid_indices = index_isin(indices, selected_lineages)
+    else:
+        valid_indices = index_isin(indices, selected_lineages[:, c_index, :])
+    flat_valid_indices = valid_indices.flatten()
+    # put the corrected lineage in the right format
+    if invert_lineage:
+        lineage = assoc_indices_to_2d(lineage)
+    return flat_valid_lineage, flat_valid_indices, lineage
+def index_isin(x: np.ndarray, y: np.ndarray) -> np.ndarray:
+    """
+    Find those elements of x that are in y.
-    return valid_association, valid_indices
+    Both arrays must be arrays of integer indices,
+    such as (trap_id, cell_id).
+    """
+    x = np.ascontiguousarray(x, dtype=np.int64)
+    y = np.ascontiguousarray(y, dtype=np.int64)
+    xv = x.view(i_dtype)
+    inboth = np.intersect1d(xv, y.view(i_dtype))
+    x_bool = np.isin(xv, inboth)
+    return x_bool
-def _assoc_indices_to_3d(ndarray: np.ndarray):
+def assoc_indices_to_3d(ndarray: np.ndarray):
    """
-    Convert the last column to a new row while repeating all previous indices.
+    Convert the last column to a new row and repeat first column's values.
-    This is useful when converting a signal multiindex before comparing association.
+    For example: [trap, mother, daughter] becomes
+        [[trap, mother], [trap, daughter]].
-    Assumes the input array has shape (N,3)
+    Assumes the input array has shape (N,3).
    """
    result = ndarray
    if len(ndarray) and ndarray.ndim > 1:
-        if ndarray.shape[1] == 3:  # Faster indexing for single positions
+        # faster indexing for single positions
+        if ndarray.shape[1] == 3:
            result = np.transpose(
                np.hstack((ndarray[:, [0]], ndarray)).reshape(-1, 2, 2),
                axes=[0, 2, 1],
            )
-        else:  # 20% slower but more general indexing
+        else:
+            # 20% slower but more general indexing
            columns = np.arange(ndarray.shape[1])
            result = np.stack(
                (
                    ndarray[:, np.delete(columns, -1)],
@@ -132,21 +150,11 @@ def _assoc_indices_to_3d(ndarray: np.ndarray):
    return result
-def _3d_index_to_2d(array: np.ndarray):
+def assoc_indices_to_2d(array: np.ndarray):
-    """
+    """Convert indices to 2d."""
-    Opposite to _assoc_indices_to_3d.
-    """
    result = array
    if len(array):
        result = np.concatenate(
            (array[:, 0, :], array[:, 1, 1, np.newaxis]), axis=1
        )
    return result
-def compare_indices(x: np.ndarray, y: np.ndarray) -> np.ndarray:
-    """
-    Fetch two 2-D indices and return a binary 2-D matrix
-    where a True value links two cells where all cells are the same
-    """
-    return (x[..., None] == y.T[None, ...]).all(axis=1)
--- a/src/agora/utils/kymograph.py
+++ b/src/agora/utils/kymograph.py
@@ -6,7 +6,6 @@ import numpy as np
 import pandas as pd
 from sklearn.cluster import KMeans
-from agora.utils.indexing import validate_association
 index_row = t.Tuple[str, str, int, int]
@@ -86,16 +85,19 @@ def bidirectional_retainment_filter(
    daughters_thresh: int = 7,
 ) -> pd.DataFrame:
    """
-    Retrieve families where mothers are present for more than a fraction of the experiment, and daughters for longer than some number of time-points.
+    Retrieve families where mothers are present for more than a fraction
+    of the experiment and daughters for longer than some number of
+    time-points.
    Parameters
    ----------
    df: pd.DataFrame
        Data
    mothers_thresh: float
-        Minimum fraction of experiment's total duration for which mothers must be present.
+        Minimum fraction of experiment's total duration for which mothers
+        must be present.
    daughters_thresh: int
-        Minimum number of time points for which daughters must be observed
+        Minimum number of time points for which daughters must be observed.
    """
    # daughters
    all_daughters = df.loc[df.index.get_level_values("mother_label") > 0]
@@ -170,6 +172,7 @@ def slices_from_spans(spans: t.Tuple[int], df: pd.DataFrame) -> t.List[slice]:
 def drop_mother_label(index: pd.MultiIndex) -> np.ndarray:
+    """Remove mother_label level from a MultiIndex."""
    no_mother_label = index
    if "mother_label" in index.names:
        no_mother_label = index.droplevel("mother_label")

--- a/src/agora/utils/lineage.py
+++ b/src/agora/utils/lineage.py
-#!/usr/bin/env python3
-import re
-import typing as t
-import numpy as np
-import pandas as pd
-from agora.io.bridge import groupsort
-from itertools import groupby
-def mb_array_to_dict(mb_array: np.ndarray):
-    """
-    Convert a lineage ndarray (trap, mother_id, daughter_id)
-    into a dictionary of lists ( mother_id ->[daughters_ids] )
-    """
-    return {
-        (trap, mo): [(trap, d[0]) for d in daughters]
-        for trap, mo_da in groupsort(mb_array).items()
-        for mo, daughters in groupsort(mo_da).items()
-    }
--- a/src/agora/utils/merge.py
+++ b/src/agora/utils/merge.py
@@ -3,90 +3,161 @@
 Functions to efficiently merge rows in DataFrames.
 """
 import typing as t
-from copy import copy
 import numpy as np
 import pandas as pd
 from utils_find_1st import cmp_larger, find_1st
-from agora.utils.indexing import compare_indices, validate_association
+from agora.utils.indexing import index_isin
+def group_merges(merges: np.ndarray) -> t.List[t.Tuple]:
+    """
+    Convert merges into a list of merges for traps requiring multiple
+    merges and then for traps requiring single merges.
+    """
+    left_tracks = merges[:, 0]
+    right_tracks = merges[:, 1]
+    # find traps requiring multiple merges
+    linr = merges[index_isin(left_tracks, right_tracks).flatten(), :]
+    rinl = merges[index_isin(right_tracks, left_tracks).flatten(), :]
+    # make unique and order merges for each trap
+    multi_merge = np.unique(np.concatenate((linr, rinl)), axis=0)
+    # find traps requiring a singe merge
+    single_merge = merges[
+        ~index_isin(merges, multi_merge).all(axis=1).flatten(), :
+    ]
+    # convert to lists of arrays
+    single_merge_list = [[sm] for sm in single_merge]
+    multi_merge_list = [
+        multi_merge[multi_merge[:, 0, 0] == trap_id, ...]
+        for trap_id in np.unique(multi_merge[:, 0, 0])
+    ]
+    res = [*multi_merge_list, *single_merge_list]
+    return res
+def merge_lineage(
+    lineage: np.ndarray, merges: np.ndarray
+) -> (np.ndarray, np.ndarray):
+    """
+    Use merges to update lineage information.
+    Check if merging causes any buds to have multiple mothers and discard
+    those incorrect merges.
+    Return updated lineage and merge arrays.
+    """
+    flat_lineage = lineage.reshape(-1, 2)
+    bud_mother_dict = {
+        tuple(bud): mother for bud, mother in zip(lineage[:, 1], lineage[:, 0])
+    }
+    left_tracks = merges[:, 0]
+    # find left tracks that are in lineages
+    valid_lineages = index_isin(flat_lineage, left_tracks).flatten()
+    # group into multi- and then single merges
+    grouped_merges = group_merges(merges)
+    # perform merges
+    if valid_lineages.any():
+        # indices of each left track -> indices of rightmost right track
+        replacement_dict = {
+            tuple(contig_pair[0]): merge[-1][1]
+            for merge in grouped_merges
+            for contig_pair in merge
+        }
+        # if both key and value are buds, they must have the same mother
+        buds = lineage[:, 1]
+        incorrect_merges = [
+            key
+            for key in replacement_dict
+            if np.any(index_isin(buds, replacement_dict[key]).flatten())
+            and np.any(index_isin(buds, key).flatten())
+            and not np.array_equal(
+                bud_mother_dict[key],
+                bud_mother_dict[tuple(replacement_dict[key])],
+            )
+        ]
+        if incorrect_merges:
+            # reassign incorrect merges so that they have no affect
+            for key in incorrect_merges:
+                replacement_dict[key] = key
+            # find only correct merges
+            new_merges = merges[
+                ~index_isin(
+                    merges[:, 0], np.array(incorrect_merges)
+                ).flatten(),
+                ...,
+            ]
+        else:
+            new_merges = merges
+        # correct lineage information
+        # replace mother or bud index with index of rightmost track
+        flat_lineage[valid_lineages] = [
+            replacement_dict[tuple(index)]
+            for index in flat_lineage[valid_lineages]
+        ]
+    else:
+        new_merges = merges
+    # reverse flattening
+    new_lineage = flat_lineage.reshape(-1, 2, 2)
+    # remove any duplicates
+    new_lineage = np.unique(new_lineage, axis=0)
+    return new_lineage, new_merges
 def apply_merges(data: pd.DataFrame, merges: np.ndarray):
-    """Split data in two, one subset for rows relevant for merging and one
+    """
-    without them. It uses an array of source tracklets and target tracklets
+    Generate a new data frame containing merged tracks.
-    to efficiently merge them.
    Parameters
    ----------
    data : pd.DataFrame
-        Input DataFrame.
+        A Signal data frame.
    merges : np.ndarray
-        3-D ndarray where dimensions are (X,2,2): nmerges, source-target
+        An array of pairs of (trap, cell) indices to merge.
-        pair and single-cell identifiers, respectively.
-    Examples
-    --------
-    FIXME: Add docs.
    """
    indices = data.index
    if "mother_label" in indices.names:
        indices = indices.droplevel("mother_label")
-    valid_merges, indices = validate_association(
+    indices = np.array(list(indices))
-        merges, np.array(list(indices))
+    # merges in the data frame's indices
-    )
+    valid_merges = index_isin(merges, indices).all(axis=1).flatten()
+    # corresponding indices for the data frame in merges
-    # Assign non-merged
+    selected_merges = merges[valid_merges, ...]
-    merged = data.loc[~indices]
+    valid_indices = index_isin(indices, selected_merges).flatten()
+    # data not requiring merging
-    # Implement the merges and drop source rows.
+    merged = data.loc[~valid_indices]
-    # TODO Use matrices to perform merges in batch
+    # merge tracks
-    # for ecficiency
    if valid_merges.any():
-        to_merge = data.loc[indices]
+        to_merge = data.loc[valid_indices].copy()
-        targets, sources = zip(*merges[valid_merges])
+        left_indices = merges[valid_merges, 0]
-        for source, target in zip(sources, targets):
+        right_indices = merges[valid_merges, 1]
-            target = tuple(target)
+        # join left track with right track
-            to_merge.loc[target] = join_tracks_pair(
+        for left_index, right_index in zip(left_indices, right_indices):
-                to_merge.loc[target].values,
+            to_merge.loc[tuple(left_index)] = join_two_tracks(
-                to_merge.loc[tuple(source)].values,
+                to_merge.loc[tuple(left_index)].values,
+                to_merge.loc[tuple(right_index)].values,
            )
-        to_merge.drop(map(tuple, sources), inplace=True)
+        # drop indices for right tracks
+        to_merge.drop(map(tuple, right_indices), inplace=True)
+        # add to data not requiring merges
        merged = pd.concat((merged, to_merge), names=data.index.names)
    return merged
-def join_tracks_pair(target: np.ndarray, source: np.ndarray) -> np.ndarray:
+def join_two_tracks(
-    """
+    left_track: np.ndarray, right_track: np.ndarray
-    Join two tracks and return the new value of the target.
+) -> np.ndarray:
-    """
+    """Join two tracks and return the new one."""
-    target_copy = target
+    new_track = left_track.copy()
-    end = find_1st(target_copy[::-1], 0, cmp_larger)
+    # find last positive element by inverting track
-    target_copy[-end:] = source[-end:]
+    end = find_1st(left_track[::-1], 0, cmp_larger)
-    return target_copy
+    # merge tracks into one
+    new_track[-end:] = right_track[-end:]
+    return new_track
-def group_merges(merges: np.ndarray) -> t.List[t.Tuple]:
-    # Return a list where the cell is present as source and target
-    # (multimerges)
-    sources_targets = compare_indices(merges[:, 0, :], merges[:, 1, :])
-    is_multimerge = sources_targets.any(axis=0) | sources_targets.any(axis=1)
-    is_monomerge = ~is_multimerge
-    multimerge_subsets = union_find(zip(*np.where(sources_targets)))
-    merge_groups = [merges[np.array(tuple(x))] for x in multimerge_subsets]
-    sorted_merges = list(map(sort_association, merge_groups))
-    # Ensure that source and target are at the edges
+##################################################################
-    return [
-        *sorted_merges,
-        *[[event] for event in merges[is_monomerge]],
-    ]
 def union_find(lsts):
@@ -120,27 +191,3 @@ def sort_association(array: np.ndarray):
    [res.append(x) for x in np.flip(order).flatten() if x not in res]
    sorted_array = array[np.array(res)]
    return sorted_array
-def merge_association(
-    association: np.ndarray, merges: np.ndarray
-) -> np.ndarray:
-    grouped_merges = group_merges(merges)
-    flat_indices = association.reshape(-1, 2)
-    comparison_mat = compare_indices(merges[:, 0], flat_indices)
-    valid_indices = comparison_mat.any(axis=0)
-    if valid_indices.any():  # Where valid, perform transformation
-        replacement_d = {}
-        for dataset in grouped_merges:
-            for k in dataset:
-                replacement_d[tuple(k[0])] = dataset[-1][1]
-        flat_indices[valid_indices] = [
-            replacement_d[tuple(i)] for i in flat_indices[valid_indices]
-        ]
-    merged_indices = flat_indices.reshape(-1, 2, 2)
-    return merged_indices
--- a/src/aliby/__init__.py
+++ b/src/aliby/__init__.py
 """
 Orchestration module and network mid-level interfaces.
 """
+from .version import __version__
--- a/src/aliby/baby_client.py
+++ b/src/aliby/baby_client.py
@@ -22,18 +22,16 @@ from requests.exceptions import HTTPError, Timeout
 ################### Dask Methods ################################
 def format_segmentation(segmentation, tp):
-    """Format a single timepoint into a dictionary.
+    """
+    Format BABY's results from a single time point into a dictionary.
    Parameters
    ------------
    segmentation: list
-                  A list of results, each result is the output of the crawler, which is JSON-encoded
+        A list of results, each result is the output of BABY
+        crawler, which is JSON-encoded.
    tp: int
-        the time point considered
+        The time point.
-    Returns
-    --------
-    A dictionary containing the formatted results of BABY
    """
    # Segmentation is a list of dictionaries, ordered by trap
    # Add trap information
@@ -204,6 +202,7 @@ def choose_model_from_params(
    -------
    model_name : str
    """
+    # cameras prime95 has become sCMOS and evolve has EMCCD
    valid_models = list(modelsets().keys())
    # Apply modelset filter if specified
No results found