diff --git a/src/agora/abc.py b/src/agora/abc.py index 19281c94157750407f9414ceb2c31a187d7524e9..6bdb93b854eb7062e360da042b3cc686a0d88f82 100644 --- a/src/agora/abc.py +++ b/src/agora/abc.py @@ -24,9 +24,7 @@ class ParametersABC(ABC): """ def __init__(self, **kwargs): - """ - Define parameters as attributes. - """ + """Define parameters as attributes.""" assert ( "parameters" not in kwargs ), "No attribute should be named parameters" @@ -35,8 +33,9 @@ class ParametersABC(ABC): def to_dict(self, iterable="null") -> t.Dict: """ - Recursive function to return a nested dictionary of the - attributes of the class instance. + Return a nested dictionary of the attributes of the class instance. + + Uses recursion. """ if isinstance(iterable, dict): if any( @@ -62,7 +61,8 @@ class ParametersABC(ABC): def to_yaml(self, path: Union[Path, str] = None): """ - Returns a yaml stream of the attributes of the class instance. + Return a yaml stream of the attributes of the class instance. + If path is provided, the yaml stream is saved there. Parameters @@ -81,9 +81,7 @@ class ParametersABC(ABC): @classmethod def from_yaml(cls, source: Union[Path, str]): - """ - Returns instance from a yaml filename or stdin - """ + """Return instance from a yaml filename or stdin.""" is_buffer = True try: if Path(source).exists(): @@ -107,7 +105,8 @@ class ParametersABC(ABC): def update(self, name: str, new_value): """ - Update values recursively + Update values recursively. + if name is a dictionary, replace data where existing found or add if not. It warns against type changes. @@ -179,7 +178,8 @@ def add_to_collection( class ProcessABC(ABC): """ Base class for processes. - Defines parameters as attributes and requires run method to be defined. + + Define parameters as attributes and requires a run method. """ def __init__(self, parameters): diff --git a/src/agora/io/metadata.py b/src/agora/io/metadata.py index 211d849620809639968fd37908e9b02ca736cc70..8d94986fb38d4a3dca93d8edabb9dc424da493b7 100644 --- a/src/agora/io/metadata.py +++ b/src/agora/io/metadata.py @@ -66,7 +66,7 @@ class MetaData: # Needed because HDF5 attributes do not support dictionaries def flatten_dict(nested_dict, separator="/"): """ - Flattens nested dictionary. If empty return as-is. + Flatten nested dictionary. If empty return as-is. """ flattened = {} if nested_dict: @@ -79,9 +79,7 @@ def flatten_dict(nested_dict, separator="/"): # Needed because HDF5 attributes do not support datetime objects # Takes care of time zones & daylight saving def datetime_to_timestamp(time, locale="Europe/London"): - """ - Convert datetime object to UNIX timestamp - """ + """Convert datetime object to UNIX timestamp.""" return timezone(locale).localize(time).timestamp() @@ -189,7 +187,6 @@ def parse_swainlab_metadata(filedir: t.Union[str, Path]): Dictionary with minimal metadata """ filedir = Path(filedir) - filepath = find_file(filedir, "*.log") if filepath: raw_parse = parse_from_swainlab_grammar(filepath) @@ -201,24 +198,23 @@ def parse_swainlab_metadata(filedir: t.Union[str, Path]): minimal_meta = ( get_meta_from_legacy(legacy_parse) if legacy_parse else {} ) - return minimal_meta def dispatch_metadata_parser(filepath: t.Union[str, Path]): """ - Function to dispatch different metadata parsers that convert logfiles into a - basic metadata dictionary. Currently only contains the swainlab log parsers. + Dispatch different metadata parsers that convert logfiles into a dictionary. + + Currently only contains the swainlab log parsers. Input: -------- - filepath: str existing file containing metadata, or folder containing naming conventions + filepath: str existing file containing metadata, or folder containing naming + conventions """ parsed_meta = parse_swainlab_metadata(filepath) - if parsed_meta is None: parsed_meta = dir_to_meta - return parsed_meta diff --git a/src/agora/utils/indexing.py b/src/agora/utils/indexing.py index 503d589c8492cef63d38898bfc8bd05207cc901d..789ca5a0423fd84733f7721ff6c35a411b2c5aaa 100644 --- a/src/agora/utils/indexing.py +++ b/src/agora/utils/indexing.py @@ -1,10 +1,9 @@ #!/usr/bin/env jupyter """ -Utilities based on association are used to efficiently acquire indices of -tracklets with some kind of relationship. +Utilities based on association are used to efficiently acquire indices of tracklets with some kind of relationship. This can be: - - Cells that are to be merged. - - Cells that have a lineage relationship. + - Cells that are to be merged + - Cells that have a linear relationship """ import numpy as np @@ -16,123 +15,113 @@ def validate_association( indices: np.ndarray, match_column: t.Optional[int] = None, ) -> t.Tuple[np.ndarray, np.ndarray]: - """ - Identify matches between two arrays by comparing rows. - - We match lineage data on mother-bud pairs with all the cells identified to specialise to only those cells in mother-bud pairs. - - We use broadcasting for speed. - - Both a mother and bud in association must be in indices. - - Parameters - ---------- - association : np.ndarray - 2D array of lineage associations where columns are (trap, mother, daughter) - or - a 3D array, which is an array of 2 X 2 arrays comprising [[trap_id, mother_label], [trap_id, daughter_label]]. - indices : np.ndarray - A 2D array where each column is a different level, such as (trap_id, cell_label), which typically is an index of a Signal - dataframe. This array should not include mother_label. - match_column: int - If 0, matches indicate mothers from mother-bud pairs; - If 1, matches indicate daughters from mother-bud pairs; - If None, matches indicate either mothers or daughters in mother-bud pairs. - - Returns - ------- - valid_association: boolean np.ndarray - 1D array indicating elements in association with matches. - valid_indices: boolean np.ndarray - 1D array indicating elements in indices with matches. - - Examples - -------- - >>> import numpy as np - >>> from agora.utils.indexing import validate_association - - >>> association = np.array([ [[0, 1], [0, 3]], [[0, 1], [0, 4]], [[0, 1], [0, 6]], [[0, 4], [0, 7]] ]) - >>> indices = np.array([ [0, 1], [0, 2], [0, 3]]) - >>> print(indices.T) - - >>> valid_association, valid_indices = validate_association(association, indices) - - >>> print(valid_association) - array([ True, False, False, False]) - >>> print(valid_indices) - array([ True, False, True]) - - and - - >>> association = np.array([[[0,3], [0,1]], [[0,2], [0,4]]]) - >>> indices = np.array([[0,1], [0,2], [0,3]]) - >>> valid_association, valid_indices = validate_association(association, indices) - >>> print(valid_association) - array([ True, False]) - >>> print(valid_indices) - array([ True, False, True]) + + """Select rows from the first array that are present in both. + We use casting for fast multiindexing, generalising for lineage dynamics + + + Parameters + ---------- + association : np.ndarray + 2-D array where columns are (trap, mother, daughter) or 3-D array where + dimensions are (X,trap,2), containing tuples ((trap,mother), (trap,daughter)) + across the 3rd dimension. + indices : np.ndarray + 2-D array where each column is a different level. This should not include mother_label. + match_column: int + int indicating a specific column is required to match (i.e. + 0-1 for target-source when trying to merge tracklets or mother-bud for lineage) + must be present in indices. If it is false one match suffices for the resultant indices + vector to be True. + + Returns + ------- + np.ndarray + 1-D boolean array indicating valid merge events. + np.ndarray + 1-D boolean array indicating indices with an association relationship. + + Examples + -------- + + >>> import numpy as np + >>> from agora.utils.indexing import validate_association + >>> merges = np.array(range(12)).reshape(3,2,2) + >>> indices = np.array(range(6)).reshape(3,2) + + >>> print(merges, indices) + >>> print(merges); print(indices) + [[[ 0 1] + [ 2 3]] + + [[ 4 5] + [ 6 7]] + + [[ 8 9] + [10 11]]] + + [[0 1] + [2 3] + [4 5]] + + >>> valid_associations, valid_indices = validate_association(merges, indices) + >>> print(valid_associations, valid_indices) + [ True False False] [ True True False] + """ if association.ndim == 2: - # reshape into 3D array for broadcasting - # for each trap, [trap, mother, daughter] becomes - # [[trap, mother], [trap, daughter]] + # Reshape into 3-D array for broadcasting if neded + # association = np.stack( + # (association[:, [0, 1]], association[:, [0, 2]]), axis=1 + # ) association = _assoc_indices_to_3d(association) - # use broadcasting to compare association with indices - # swap trap and cell_label axes for correct broadcasting - indicesT = indices.T - # compare each of [[trap, mother], [trap, daughter]] for all traps - # in association with [trap, cell_label] for all traps in indices - valid_ndassociation = ( - association[..., np.newaxis] == indicesT[np.newaxis, ...] - ) - # find matches in association - ### - # make True comparisons have both trap_ids and cell labels matching + + # Compare existing association with available indices + # Swap trap and label axes for the association array to correctly cast + valid_ndassociation = association[..., None] == indices.T[None, ...] + + # Broadcasting is confusing (but efficient): + # First we check the dimension across trap and cell id, to ensure both match valid_cell_ids = valid_ndassociation.all(axis=2) + if match_column is None: - # make True comparisons match at least one row in indices - va_intermediate = valid_cell_ids.any(axis=2) - # make True comparisons have both mother and bud matching rows in indices - valid_association = va_intermediate.all(axis=1) - else: - # match_column selects mothers if 0 and daughters if 1 - # make True match at least one row in indices - valid_association = valid_cell_ids[:, match_column].any(axis=1) - # find matches in indices - ### - # make True comparisons have a validated association for both the mother and bud - # make True comparisons have both trap_ids and cell labels matching - valid_cell_ids_va = valid_ndassociation[valid_association].all(axis=2) - if match_column is None: - # make True comparisons match either a mother or a bud in association - valid_indices = valid_cell_ids_va.any(axis=1)[0] - else: - valid_indices = valid_cell_ids_va[:, match_column][0] + # Then we check the merge tuples to check which cases have both target and source + valid_association = valid_cell_ids.any(axis=2).all(axis=1) + + # Finally we check the dimension that crosses all indices, to ensure the pair + # is present in a valid merge event. + valid_indices = ( + valid_ndassociation[valid_association].all(axis=2).any(axis=(0, 1)) + ) + else: # We fetch specific indices if we aim for the ones with one present + valid_indices = valid_cell_ids[:, match_column].any(axis=0) + # Valid association then becomes a boolean array, true means that there is a + # match (match_column) between that cell and the index + valid_association = ( + valid_cell_ids[:, match_column] & valid_indices + ).any(axis=1) + return valid_association, valid_indices def _assoc_indices_to_3d(ndarray: np.ndarray): """ - Reorganise an array of shape (N, 3) into one of shape (N, 2, 2). + Convert the last column to a new row while repeating all previous indices. - Reorganise an array so that the last entry of each row is removed and generates a new row. This new row retains all other entries of the original row. + This is useful when converting a signal multiindex before comparing association. - Example: - [ [0, 1, 3], [0, 1, 4] ] - becomes - [ [[0, 1], [0, 3]], [[0, 1], [0, 4]] ] + Assumes the input array has shape (N,3) """ result = ndarray if len(ndarray) and ndarray.ndim > 1: - if ndarray.shape[1] == 3: - # faster indexing for single positions + if ndarray.shape[1] == 3: # Faster indexing for single positions result = np.transpose( np.hstack((ndarray[:, [0]], ndarray)).reshape(-1, 2, 2), axes=[0, 2, 1], ) - else: - # 20% slower, but more general indexing + else: # 20% slower but more general indexing columns = np.arange(ndarray.shape[1]) + result = np.stack( ( ndarray[:, np.delete(columns, -1)], @@ -144,7 +133,9 @@ def _assoc_indices_to_3d(ndarray: np.ndarray): def _3d_index_to_2d(array: np.ndarray): - """Revert switch from _assoc_indices_to_3d.""" + """ + Opposite to _assoc_indices_to_3d. + """ result = array if len(array): result = np.concatenate( @@ -155,8 +146,7 @@ def _3d_index_to_2d(array: np.ndarray): def compare_indices(x: np.ndarray, y: np.ndarray) -> np.ndarray: """ - Compare two 2D arrays using broadcasting. - - Return a binary array where a True value links two cells where all cells are the same. + Fetch two 2-D indices and return a binary 2-D matrix + where a True value links two cells where all cells are the same """ - return (x[..., np.newaxis] == y.T[np.newaxis, ...]).all(axis=1) + return (x[..., None] == y.T[None, ...]).all(axis=1) diff --git a/src/agora/utils/indexing_new.py b/src/agora/utils/indexing_new.py new file mode 100644 index 0000000000000000000000000000000000000000..7c749088c3276d95ccb9c54b8d6b7b99ea6b4fc9 --- /dev/null +++ b/src/agora/utils/indexing_new.py @@ -0,0 +1,195 @@ +#!/usr/bin/env jupyter +""" +Utilities based on association are used to efficiently acquire indices of +tracklets with some kind of relationship. +This can be: + - Cells that are to be merged. + - Cells that have a lineage relationship. +""" + +import numpy as np +import typing as t + + +def validate_association_new( + association: np.ndarray, + indices: np.ndarray, + match_column: t.Optional[int] = None, +) -> t.Tuple[np.ndarray, np.ndarray]: + """ + Identify matches between two arrays by comparing rows. + + We match lineage data on mother-bud pairs with all the cells identified to specialise to only those cells in mother-bud pairs. + + We use broadcasting for speed. + + Both a mother and bud in association must be in indices. + + Parameters + ---------- + association : np.ndarray + 2D array of lineage associations where columns are (trap, mother, daughter) + or + a 3D array, which is an array of 2 X 2 arrays comprising [[trap_id, mother_label], [trap_id, daughter_label]]. + indices : np.ndarray + A 2D array where each column is a different level, such as (trap_id, cell_label), which typically is an index of a Signal + dataframe. This array should not include mother_label. + match_column: int + If 0, matches indicate mothers from mother-bud pairs; + If 1, matches indicate daughters from mother-bud pairs; + If None, matches indicate either mothers or daughters in mother-bud pairs. + + Returns + ------- + valid_association: boolean np.ndarray + 1D array indicating elements in association with matches. + valid_indices: boolean np.ndarray + 1D array indicating elements in indices with matches. + + Examples + -------- + >>> import numpy as np + >>> from agora.utils.indexing import validate_association + + >>> association = np.array([ [[0, 1], [0, 3]], [[0, 1], [0, 4]], [[0, 1], [0, 6]], [[0, 4], [0, 7]] ]) + >>> indices = np.array([ [0, 1], [0, 2], [0, 3]]) + >>> print(indices.T) + + >>> valid_association, valid_indices = validate_association(association, indices) + + >>> print(valid_association) + array([ True, False, False, False]) + >>> print(valid_indices) + array([ True, False, True]) + + and + + >>> association = np.array([[[0,3], [0,1]], [[0,2], [0,4]]]) + >>> indices = np.array([[0,1], [0,2], [0,3]]) + >>> valid_association, valid_indices = validate_association(association, indices) + >>> print(valid_association) + array([ True, False]) + >>> print(valid_indices) + array([ True, False, True]) + """ + if association.ndim == 2: + # reshape into 3D array for broadcasting + # for each trap, [trap, mother, daughter] becomes + # [[trap, mother], [trap, daughter]] + association = _assoc_indices_to_3d(association) + # use broadcasting to compare association with indices + # swap trap and cell_label axes for correct broadcasting + indicesT = indices.T + # compare each of [[trap, mother], [trap, daughter]] for all traps + # in association with [trap, cell_label] for all traps in indices + valid_ndassociation = ( + association[..., np.newaxis] == indicesT[np.newaxis, ...] + ) + # find matches in association + ### + # make True comparisons have both trap_ids and cell labels matching + valid_cell_ids = valid_ndassociation.all(axis=2) + if match_column is None: + # make True comparisons match at least one row in indices + va_intermediate = valid_cell_ids.any(axis=2) + # make True comparisons have both mother and bud matching rows in indices + valid_association = va_intermediate.all(axis=1) + else: + # match_column selects mothers if 0 and daughters if 1 + # make True match at least one row in indices + valid_association = valid_cell_ids[:, match_column].any(axis=1) + # find matches in indices + ### + # make True comparisons have a validated association for both the mother and bud + # make True comparisons have both trap_ids and cell labels matching + valid_cell_ids_va = valid_ndassociation[valid_association].all(axis=2) + if match_column is None: + # make True comparisons match either a mother or a bud in association + valid_indices = valid_cell_ids_va.any(axis=1)[0] + else: + valid_indices = valid_cell_ids_va[:, match_column][0] + + # Alan's working code + # Compare existing association with available indices + # Swap trap and label axes for the association array to correctly cast + valid_ndassociation_a = association[..., None] == indices.T[None, ...] + # Broadcasting is confusing (but efficient): + # First we check the dimension across trap and cell id, to ensure both match + valid_cell_ids_a = valid_ndassociation_a.all(axis=2) + if match_column is None: + # Then we check the merge tuples to check which cases have both target and source + valid_association_a = valid_cell_ids_a.any(axis=2).all(axis=1) + + # Finally we check the dimension that crosses all indices, to ensure the pair + # is present in a valid merge event. + valid_indices_a = ( + valid_ndassociation_a[valid_association_a] + .all(axis=2) + .any(axis=(0, 1)) + ) + else: # We fetch specific indices if we aim for the ones with one present + valid_indices_a = valid_cell_ids_a[:, match_column].any(axis=0) + # Valid association then becomes a boolean array, true means that there is a + # match (match_column) between that cell and the index + valid_association_a = ( + valid_cell_ids_a[:, match_column] & valid_indices + ).any(axis=1) + + assert valid_association != valid_association_a, "valid_association error" + assert valid_indices != valid_indices_a, "valid_indices error" + + return valid_association, valid_indices + + +def _assoc_indices_to_3d(ndarray: np.ndarray): + """ + Reorganise an array of shape (N, 3) into one of shape (N, 2, 2). + + Reorganise an array so that the last entry of each row is removed + and generates a new row. This new row retains all other entries of + the original row. + + Example: + [ [0, 1, 3], [0, 1, 4] ] + becomes + [ [[0, 1], [0, 3]], [[0, 1], [0, 4]] ] + """ + result = ndarray + if len(ndarray) and ndarray.ndim > 1: + if ndarray.shape[1] == 3: + # faster indexing for single positions + result = np.transpose( + np.hstack((ndarray[:, [0]], ndarray)).reshape(-1, 2, 2), + axes=[0, 2, 1], + ) + else: + # 20% slower, but more general indexing + columns = np.arange(ndarray.shape[1]) + result = np.stack( + ( + ndarray[:, np.delete(columns, -1)], + ndarray[:, np.delete(columns, -2)], + ), + axis=1, + ) + return result + + +def _3d_index_to_2d(array: np.ndarray): + """Revert switch from _assoc_indices_to_3d.""" + result = array + if len(array): + result = np.concatenate( + (array[:, 0, :], array[:, 1, 1, np.newaxis]), axis=1 + ) + return result + + +def compare_indices(x: np.ndarray, y: np.ndarray) -> np.ndarray: + """ + Compare two 2D arrays using broadcasting. + + Return a binary array where a True value links two cells where + all cells are the same. + """ + return (x[..., np.newaxis] == y.T[np.newaxis, ...]).all(axis=1) diff --git a/src/agora/utils/merge.py b/src/agora/utils/merge.py index edd65b31f0e2366d8b0eeb5cf8e21a97c24efff8..442bdf0e399e07116713355858680241628296bb 100644 --- a/src/agora/utils/merge.py +++ b/src/agora/utils/merge.py @@ -13,8 +13,11 @@ from agora.utils.indexing import compare_indices, validate_association def apply_merges(data: pd.DataFrame, merges: np.ndarray): - """Split data in two, one subset for rows relevant for merging and one - without them. It uses an array of source tracklets and target tracklets + """ + Split data in two, one subset for rows relevant for merging and one + without them. + + Use an array of source tracklets and target tracklets to efficiently merge them. Parameters diff --git a/src/aliby/io/dataset.py b/src/aliby/io/dataset.py index 30f2cd6f490db4ed24048f4017d518fe252297f4..28ba59d1b9d690a073785d145b3ad4dc27535eca 100644 --- a/src/aliby/io/dataset.py +++ b/src/aliby/io/dataset.py @@ -54,7 +54,7 @@ class DatasetLocalABC(ABC): Abstract Base class to find local files, either OME-XML or raw images. """ - _valid_suffixes = ("tiff", "png", "zarr") + _valid_suffixes = ("tiff", "png", "zarr", "tif") _valid_meta_suffixes = ("txt", "log") def __init__(self, dpath: t.Union[str, Path], *args, **kwargs): diff --git a/src/aliby/io/image.py b/src/aliby/io/image.py index 282c236140e3198d63fdf2833453e9b9fb540f3e..5af04ca679d92fe9d40ca33f029b9553f42a9dc2 100644 --- a/src/aliby/io/image.py +++ b/src/aliby/io/image.py @@ -30,14 +30,14 @@ from agora.io.metadata import dir_to_meta, dispatch_metadata_parser def get_examples_dir(): - """Get examples directory which stores dummy image for tiler""" + """Get examples directory that stores dummy image for tiler.""" return files("aliby").parent.parent / "examples" / "tiler" def instantiate_image( source: t.Union[str, int, t.Dict[str, str], Path], **kwargs ): - """Wrapper to instatiate the appropiate image + """Wrapper to instantiate the appropriate image Parameters ---------- @@ -55,26 +55,26 @@ def instantiate_image( def dispatch_image(source: t.Union[str, int, t.Dict[str, str], Path]): - """ - Wrapper to pick the appropiate Image class depending on the source of data. - """ + """Pick the appropriate Image class depending on the source of data.""" if isinstance(source, (int, np.int64)): from aliby.io.omero import Image - instatiator = Image + instantiator = Image elif isinstance(source, dict) or ( isinstance(source, (str, Path)) and Path(source).is_dir() ): if Path(source).suffix == ".zarr": - instatiator = ImageZarr + instantiator = ImageZarr else: - instatiator = ImageDir + instantiator = ImageDir + elif isinstance(source, Path) and source.is_file(): + # my addition + instantiator = ImageLocalOME elif isinstance(source, str) and Path(source).is_file(): - instatiator = ImageLocalOME + instantiator = ImageLocalOME else: raise Exception(f"Invalid data source at {source}") - - return instatiator + return instantiator class BaseLocalImage(ABC): @@ -82,6 +82,7 @@ class BaseLocalImage(ABC): Base Image class to set path and provide context management method. """ + # default image order _default_dimorder = "tczyx" def __init__(self, path: t.Union[str, Path]): @@ -98,8 +99,7 @@ class BaseLocalImage(ABC): return False def rechunk_data(self, img): - # Format image using x and y size from metadata. - + """Format image using x and y size from metadata.""" self._rechunked_img = da.rechunk( img, chunks=( @@ -145,16 +145,16 @@ class ImageLocalOME(BaseLocalImage): in which a multidimensional tiff image contains the metadata. """ - def __init__(self, path: str, dimorder=None): + def __init__(self, path: str, dimorder=None, **kwargs): super().__init__(path) self._id = str(path) + self.set_meta(str(path)) - def set_meta(self): + def set_meta(self, path): meta = dict() try: with TiffFile(path) as f: self._meta = xmltodict.parse(f.ome_metadata)["OME"] - for dim in self.dimorder: meta["size_" + dim.lower()] = int( self._meta["Image"]["Pixels"]["@Size" + dim] @@ -165,21 +165,19 @@ class ImageLocalOME(BaseLocalImage): ] meta["name"] = self._meta["Image"]["@Name"] meta["type"] = self._meta["Image"]["Pixels"]["@Type"] - - except Exception as e: # Images not in OMEXML - + except Exception as e: + # images not in OMEXML print("Warning:Metadata not found: {}".format(e)) print( - f"Warning: No dimensional info provided. Assuming {self._default_dimorder}" + "Warning: No dimensional info provided. " + f"Assuming {self._default_dimorder}" ) - - # Mark non-existent dimensions for padding + # mark non-existent dimensions for padding self.base = self._default_dimorder # self.ids = [self.index(i) for i in dimorder] - - self._dimorder = base - + self._dimorder = self.base self._meta = meta + # self._meta["name"] = Path(path).name.split(".")[0] @property def name(self): @@ -246,7 +244,7 @@ class ImageDir(BaseLocalImage): It inherits from BaseLocalImage so we only override methods that are critical. Assumptions: - - One folders per position. + - One folder per position. - Images are flat. - Channel, Time, z-stack and the others are determined by filenames. - Provides Dimorder as it is set in the filenames, or expects order during instatiation @@ -318,7 +316,7 @@ class ImageZarr(BaseLocalImage): print(f"Could not add size info to metadata: {e}") def get_data_lazy(self) -> da.Array: - """Return 5D dask array. For lazy-loading local multidimensional zarr files""" + """Return 5D dask array for lazy-loading local multidimensional zarr files.""" return self._img def add_size_to_meta(self): diff --git a/src/aliby/pipeline.py b/src/aliby/pipeline.py index 144b4fb8fcdbaa20e488bc780e0a01868eb8f4c6..6e37bac203f9283223e633a3b8c5c157289a1694 100644 --- a/src/aliby/pipeline.py +++ b/src/aliby/pipeline.py @@ -154,6 +154,7 @@ class PipelineParameters(ParametersABC): defaults["tiler"]["backup_ref_channel"] = backup_ref_channel defaults["baby"] = BabyParameters.default(**baby).to_dict() + # why are BabyParameters here as an alternative? defaults["extraction"] = ( exparams_from_meta(meta_d) or BabyParameters.default(**extraction).to_dict() @@ -432,6 +433,7 @@ class Pipeline(ProcessABC): if process_from["extraction"] < tps: # TODO Move this parameter validation into Extractor av_channels = set((*steps["tiler"].channels, "general")) + # overwrite extraction specified by PipelineParameters !! config["extraction"]["tree"] = { k: v for k, v in config["extraction"]["tree"].items() @@ -681,12 +683,12 @@ class Pipeline(ProcessABC): for i, step in enumerate(self.step_sequence, 1) } - # Set up + # set up directory = config["general"]["directory"] - trackers_state: t.List[np.ndarray] = [] with dispatch_image(image_id)(image_id, **self.server_info) as image: filename = Path(f"{directory}/{image.name}.h5") + # load metadata meta = MetaData(directory, filename) from_start = True if np.any(ow.values()) else False # remove existing file if overwriting diff --git a/src/aliby/tile/tiler.py b/src/aliby/tile/tiler.py index b0769e1d22ef6306162b18b49af696a7ef55cdb7..3ea9817f773e49376d95be49119ccca7170ae40c 100644 --- a/src/aliby/tile/tiler.py +++ b/src/aliby/tile/tiler.py @@ -1,17 +1,29 @@ """ Tiler: Divides images into smaller tiles. -The tasks of the Tiler are selecting regions of interest, or tiles, of images - with one trap per tile, correcting for the drift of the microscope stage over time, and handling errors and bridging between the image data and Aliby’s image-processing steps. +The tasks of the Tiler are selecting regions of interest, or tiles, of +images - with one trap per tile, correcting for the drift of the microscope +stage over time, and handling errors and bridging between the image data +and Aliby’s image-processing steps. Tiler subclasses deal with either network connections or local files. -To find tiles, we use a two-step process: we analyse the bright-field image to produce the template of a trap, and we fit this template to the image to find the tiles' centres. +To find tiles, we use a two-step process: we analyse the bright-field image +to produce the template of a trap, and we fit this template to the image to +find the tiles' centres. -We use texture-based segmentation (entropy) to split the image into foreground -- cells and traps -- and background, which we then identify with an Otsu filter. Two methods are used to produce a template trap from these regions: pick the trap with the smallest minor axis length and average over all validated traps. +We use texture-based segmentation (entropy) to split the image into +foreground -- cells and traps -- and background, which we then identify with +an Otsu filter. Two methods are used to produce a template trap from these +regions: pick the trap with the smallest minor axis length and average over +all validated traps. -A peak-identifying algorithm recovers the x and y-axis location of traps in the original image, and we choose the approach to template that identifies the most tiles. +A peak-identifying algorithm recovers the x and y-axis location of traps in +the original image, and we choose the approach to template that identifies +the most tiles. -The experiment is stored as an array with a standard indexing order of (Time, Channels, Z-stack, X, Y). +The experiment is stored as an array with a standard indexing order of +(Time, Channels, Z-stack, X, Y). """ import logging import re @@ -593,7 +605,10 @@ class Tiler(StepABC): def get_channel_index(self, channel: str or int) -> int or None: """ - Find index for channel using regex. Returns the first matched string. + Find index for channel using regex. + + Return the first matched string. + If self.channels is integers (no image metadata) it returns None. If channel is integer @@ -602,10 +617,8 @@ class Tiler(StepABC): channel: string or int The channel or index to be used. """ - if all(map(lambda x: isinstance(x, int), self.channels)): channel = channel if isinstance(channel, int) else None - if isinstance(channel, str): channel = find_channel_index(self.channels, channel) return channel diff --git a/src/extraction/core/functions/defaults.py b/src/extraction/core/functions/defaults.py index e159842bad00ac61b22c177e1215319c2d9b460a..d4741ca46495488ca31a7c0ddb46405d0e25972a 100644 --- a/src/extraction/core/functions/defaults.py +++ b/src/extraction/core/functions/defaults.py @@ -1,10 +1,9 @@ # File with defaults for ease of use -import re import typing as t from pathlib import Path + import h5py -# should we move these functions here? from aliby.tile.tiler import find_channel_name @@ -59,6 +58,7 @@ def exparams_from_meta( for ch in extant_fluorescence_ch: base["tree"][ch] = default_reduction_metrics base["sub_bg"] = extant_fluorescence_ch + # additional extraction defaults if the channels are available if "ph" in extras: # SWAINLAB specific names diff --git a/src/postprocessor/core/processor.py b/src/postprocessor/core/processor.py index 32d87175fcc946618fca9833c21528620a2c4b4a..76be13cd9f7e6e339e2a5d02570ac5c21cccc8f8 100644 --- a/src/postprocessor/core/processor.py +++ b/src/postprocessor/core/processor.py @@ -229,7 +229,7 @@ class PostProcessor(ProcessABC): self.run_process(dataset, process, loaded_process) def run_process(self, dataset, process, loaded_process): - """Run process on a single dataset and write the result.""" + """Run process to obtain a single dataset and write the result.""" # define signal if isinstance(dataset, list): # multisignal process