diff --git a/docs/source/specifications/metadata.org b/docs/source/specifications/metadata.org index 6618730c2dfb78e133a64cb62a07a9fd8b90c93a..c7dc02b06c1e879b66dabfdfc8d8653575fbcc0f 100644 --- a/docs/source/specifications/metadata.org +++ b/docs/source/specifications/metadata.org @@ -4,7 +4,7 @@ Draft for recommended metadata for images to provide a standard interface for al * Essential data - DimensionOrder: str - Order of dimensions (e.g., CTZYX for Channel, Time, Z,Y,X) + Order of dimensions (e.g., TCZYX for Channel, Time, Z,Y,X) - PixelSize: float Size of pixel, useful for segmentation. - Channels: List[str] diff --git a/examples/tiler/pypipeline_unit_test_00_000001_Brightfield_003.tif b/examples/tiler/pypipeline_unit_test_00_000001_Brightfield_003.tif new file mode 100755 index 0000000000000000000000000000000000000000..f813c15a9c8aef8a6e629ff17e460aef4acdb630 Binary files /dev/null and b/examples/tiler/pypipeline_unit_test_00_000001_Brightfield_003.tif differ diff --git a/src/aliby/io/image.py b/src/aliby/io/image.py index 033cae2574d86197a50dc57c3c1bee296d523dcc..57283dc493fc226f013fb96c6e7c7d03abd05147 100644 --- a/src/aliby/io/image.py +++ b/src/aliby/io/image.py @@ -1,8 +1,22 @@ #!/usr/bin/env python3 +""" +Image: Loads images and registers them. + +Image instances loads images from a specified directory into an object that +also contains image properties such as name and metadata. Pixels from images +are stored in dask arrays; the standard way is to store them in 5-dimensional +arrays: T(ime point), C(channel), Z(-stack), X, Y. + +This module consists of a base Image class (BaseLocalImage). ImageLocalOME +handles local OMERO images. ImageDir handles cases in which images are split +into directories, with each time point and channel having its own image file. +ImageDummy is a dummy class for silent failure testing. +""" import typing as t -from abc import ABC, abstractproperty +from abc import ABC, abstractmethod, abstractproperty from datetime import datetime +from importlib_resources import files from pathlib import Path, PosixPath import dask.array as da @@ -13,6 +27,11 @@ from tifffile import TiffFile from agora.io.metadata import dir_to_meta +def get_examples_dir(): + """Get examples directory which stores dummy image for tiler""" + return files("aliby").parent.parent / "examples" / "tiler" + + def get_image_class(source: t.Union[str, int, t.Dict[str, str], PosixPath]): """ Wrapper to pick the appropiate Image class depending on the source of data. @@ -35,10 +54,10 @@ def get_image_class(source: t.Union[str, int, t.Dict[str, str], PosixPath]): class BaseLocalImage(ABC): """ - Base class to set path and provide context management method. + Base Image class to set path and provide context management method. """ - _default_dimorder = "tczxy" + _default_dimorder = "tczyx" def __init__(self, path: t.Union[str, PosixPath]): # If directory, assume contents are naturally sorted @@ -47,6 +66,12 @@ class BaseLocalImage(ABC): def __enter__(self): return self + def __exit__(self, *exc): + for e in exc: + if e is not None: + print(e) + return False + def rechunk_data(self, img): # Format image using x and y size from metadata. @@ -62,6 +87,10 @@ class BaseLocalImage(ABC): ) return self._rechunked_img + @abstractmethod + def get_data_lazy(self) -> da.Array: + pass + @abstractproperty def name(self): pass @@ -74,23 +103,128 @@ class BaseLocalImage(ABC): def data(self): return self.get_data_lazy() - def __enter__(self): - return self - - def __exit__(self, *exc): - for e in exc: - if e is not None: - print(e) - return False - @property def metadata(self): return self._meta +class ImageDummy(BaseLocalImage): + """ + Dummy Image class. + + ImageDummy mimics the other Image classes in such a way that it is accepted + by Tiler. The purpose of this class is for testing, in particular, + identifying silent failures. If something goes wrong, we should be able to + know whether it is because of bad parameters or bad input data. + + For the purposes of testing parameters, ImageDummy assumes that we already + know the tiler parameters before Image instances are instantiated. This is + true for a typical pipeline run. + """ + + def __init__(self, tiler_parameters: dict): + """Builds image instance + + Parameters + ---------- + tiler_parameters : dict + Tiler parameters, in dict form. Following + aliby.tile.tiler.TilerParameters, the keys are: "tile_size" (size of + tile), "ref_channel" (reference channel for tiling), and "ref_z" + (reference z-stack, 0 to choose a default). + """ + self.ref_channel = tiler_parameters["ref_channel"] + self.ref_z = tiler_parameters["ref_z"] + + # Goal: make Tiler happy. + @staticmethod + def pad_array( + image_array: da.Array, + dim: int, + n_empty_slices: int, + image_position: int = 0, + ): + """Extends a dimension in a dask array and pads with zeros + + Extends a dimension in a dask array that has existing content, then pads + with zeros. + + Parameters + ---------- + image_array : da.Array + Input dask array + dim : int + Dimension in which to extend the dask array. + n_empty_slices : int + Number of empty slices to extend the dask array by, in the specified + dimension/axis. + image_position : int + Position within the new dimension to place the input arary, default 0 + (the beginning). + + Examples + -------- + ``` + extended_array = pad_array( + my_da_array, dim = 2, n_empty_slices = 4, image_position = 1) + ``` + Extends a dask array called `my_da_array` in the 3rd dimension + (dimensions start from 0) by 4 slices, filled with zeros. And puts the + original content in slice 1 of the 3rd dimension + """ + # Concats zero arrays with same dimensions as image_array, and puts + # image_array as first element in list of arrays to be concatenated + zeros_array = da.zeros_like(image_array) + return da.concatenate( + [ + *([zeros_array] * image_position), + image_array, + *([zeros_array] * (n_empty_slices - image_position)), + ], + axis=dim, + ) + + # Logic: We want to return a image instance + def get_data_lazy(self) -> da.Array: + """Return 5D dask array. For lazy-loading multidimensional tiff files. Dummy image.""" + examples_dir = get_examples_dir() + # TODO: Make this robust to having multiple TIFF images, one for each z-section, + # all falling under the same "pypipeline_unit_test_00_000001_Brightfield_*.tif" + # naming scheme. The aim is to create a multidimensional dask array that stores + # the z-stacks. + img_filename = "pypipeline_unit_test_00_000001_Brightfield_003.tif" + img_path = examples_dir / img_filename + # img is a dask array has three dimensions: z, x, y + # TODO: Write a test to confirm this: If everything worked well, + # z = 1, x = 1200, y = 1200 + img = imread(str(img_path)) + # Adds t & c dimensions + img = da.reshape( + img, (1, 1, img.shape[-3], img.shape[-2], img.shape[-1]) + ) + # Pads t, c, and z dimensions + img = self.pad_array( + img, dim=0, n_empty_slices=199 + ) # 200 timepoints total + img = self.pad_array(img, dim=1, n_empty_slices=2) # 3 channels + img = self.pad_array( + img, dim=2, n_empty_slices=4, image_position=self.ref_z + ) # 5 z-stacks + return img + + def name(self): + pass + + def dimorder(self): + pass + + class ImageLocalOME(BaseLocalImage): """ - Fetch image from OMEXML data format, in which a multidimensional tiff image contains the metadata. + Local OMERO Image class. + + This is a derivative Image class. It fetches an image from OMEXML data format, + in which a multidimensional tiff image contains the metadata. """ def __init__(self, path: str, dimorder=None): @@ -190,7 +324,7 @@ class ImageDir(BaseLocalImage): """ Image class for the case in which all images are split in one or multiple folders with time-points and channels as independent files. - It inherits from Imagelocal so we only override methods that are critical. + It inherits from BaseLocalImage so we only override methods that are critical. Assumptions: - One folders per position. diff --git a/src/aliby/tile/tiler.py b/src/aliby/tile/tiler.py index fea4aaad961e265dcb04d059b960a9bf314f8403..a3216e9eb7f0987238161a977249b4c74a1bb4dc 100644 --- a/src/aliby/tile/tiler.py +++ b/src/aliby/tile/tiler.py @@ -15,7 +15,7 @@ One key method is Tiler.run. The image-processing is performed by traps/segment_traps. -The experiment is stored as an array with a standard indexing order of (Time, Channels, Z-stack, Y, X). +The experiment is stored as an array with a standard indexing order of (Time, Channels, Z-stack, X, Y). """ import re import typing as t @@ -30,7 +30,7 @@ from skimage.registration import phase_cross_correlation from agora.abc import ParametersABC, StepABC from agora.io.writer import BridgeH5 -from aliby.io.image import ImageLocalOME, ImageDir +from aliby.io.image import ImageLocalOME, ImageDir, ImageDummy from aliby.tile.traps import segment_traps @@ -247,6 +247,44 @@ class Tiler(StepABC): self.tile_size = self.tile_size or min(self.image.shape[-2:]) + @classmethod + def dummy(cls, parameters: dict): + """ + Instantiate dummy Tiler from dummy image + + If image.dimorder exists dimensions are saved in that order. + Otherwise default to "tczyx". + + Parameters + ---------- + parameters: dictionary output of an instance of TilerParameters + """ + imgdmy_obj = ImageDummy(parameters) + dummy_image = imgdmy_obj.get_data_lazy() + dummy_omero_metadata = ( # Default to "tczyx" if image.dimorder is None + { + f"size_{dim}": dim_size + for dim, dim_size in zip( + imgdmy_obj.dimorder or "tczyx", dummy_image.shape + ) + }, + ) + dummy_omero_metadata.update( + { + "channels": [ + parameters["ref_channel"], + *(["nil"] * (dummy_omero_metadata["size_c"] - 1)), + ], + "name": "", + } + ) + + return cls( + imgdmy_obj.data, + dummy_omero_metadata, + TilerParameters.from_dict(parameters), + ) + @classmethod def from_image(cls, image, parameters: TilerParameters): """ @@ -321,15 +359,10 @@ class Tiler(StepABC): @property def shape(self): """ - Returns properties of the time-lapse experiment - no of channels - no of time points - no of z stacks - no of pixels in y direction - no of pixels in z direction + Returns properties of the time-lapse as shown by self.image.shape + """ - c, t, z, y, x = self.image.shape - return (c, t, x, y, z) + return self.image.shape @property def n_processed(self): diff --git a/src/extraction/core/extractor.py b/src/extraction/core/extractor.py index f854dc802abf4e358edf64d28b9d63007e3b6a8d..36d03cb8c814de84e1fec6cc8a4e667733abf980 100644 --- a/src/extraction/core/extractor.py +++ b/src/extraction/core/extractor.py @@ -261,7 +261,7 @@ class Extractor(StepABC): channel_ids = None if z is None: # gets the tiles data via tiler - z: t.List[int] = list(range(self.tiler.shape[-1])) + z: t.List[int] = list(range(self.tiler.shape[-3])) tiles = ( self.tiler.get_tiles_timepoint( tp, channels=channel_ids, z=z, **kwargs diff --git a/tests/aliby/network/test_tiler.py b/tests/aliby/network/test_tiler.py index 52187b58ee4db91d11856bf7cdd6584adf4c1b6d..1ec1755c11ee519c9bdcd1556ed331dfb2d97e49 100644 --- a/tests/aliby/network/test_tiler.py +++ b/tests/aliby/network/test_tiler.py @@ -19,6 +19,12 @@ def define_parser(): return parser +def initialise_dummy(): + tiler_parameters = TilerParameters.default().to_dict() + dummy_tiler = Tiler.dummy(tiler_parameters) + return dummy_tiler + + def initialise_objects(data_path, template=None): image = ImageLocalOME(data_path) tiler = Tiler.from_image(image, TilerParameters.default()) @@ -53,6 +59,8 @@ if __name__ == "__main__": parser = define_parser() args = parser.parse_args() + dummy_tiler = initialise_dummy() + tiler = initialise_objects(args.root_dir, template=args.template) if args.position is not None: diff --git a/tests/aliby/pipeline/test_image.py b/tests/aliby/pipeline/test_image.py new file mode 100644 index 0000000000000000000000000000000000000000..3a2114006c0bd2703bbfed159ee99201853bae05 --- /dev/null +++ b/tests/aliby/pipeline/test_image.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python3 +import numpy as np +import dask.array as da +import pytest + +from aliby.io.image import ImageDummy + +tiler_parameters = {"tile_size": 117, "ref_channel": "Brightfield", "ref_z": 0} + +sample_da = da.from_array(np.array([[1, 2], [3, 4]])) +# Make it 5-dimensional +sample_da = da.reshape( + sample_da, (1, 1, 1, sample_da.shape[-2], sample_da.shape[-1]) +) + + +@pytest.mark.parametrize("sample_da", [sample_da]) +@pytest.mark.parametrize("dim", [2]) +@pytest.mark.parametrize("n_empty_slices", [4]) +@pytest.mark.parametrize("image_position", [1]) +def test_pad_array(sample_da, dim, n_empty_slices, image_position): + """Test ImageDummy.pad_array() method""" + # create object + imgdmy = ImageDummy(tiler_parameters) + # pads array + padded_da = imgdmy.pad_array( + sample_da, + dim=dim, + n_empty_slices=n_empty_slices, + image_position=image_position, + ) + + # select which dimension to index the multidimensional array + indices = {dim: image_position} + ix = [ + indices.get(dim, slice(None)) + for dim in range(padded_da.compute().ndim) + ] + + # Checks that original image array is there and is at the correct index + assert np.array_equal(padded_da.compute()[ix], sample_da.compute()[0]) + # Checks that the additional axis is extended correctly + assert padded_da.compute().shape[dim] == n_empty_slices + 1