diff --git a/src/agora/io/metadata.py b/src/agora/io/metadata.py index 025075024756d98ba907ddfbe753dc0946dad286..8842bc96cd71abb4002399b948256019adf0398d 100644 --- a/src/agora/io/metadata.py +++ b/src/agora/io/metadata.py @@ -69,7 +69,7 @@ def flatten_dict(nested_dict, separator="/"): """ df = pd.json_normalize(nested_dict, sep=separator) - flattened = df.to_dict(orient="records") or {} + flattened = df.to_dict(orient="records")[0] or {} return flattened @@ -90,7 +90,7 @@ def find_file(root_dir, regex): ) file = [sorted(file)[0]] if len(file) == 0: - print("Warning:Metadata: No valid logfile found.") + print("Warning:Metadata: No valid swainlab .log found.") else: return file[0] return None diff --git a/src/aliby/io/dataset.py b/src/aliby/io/dataset.py index e0f3b968856153cbb2cecb856722fc126efcfe3e..62366d0f4446b98b449ffd6afbb34b42b6128501 100644 --- a/src/aliby/io/dataset.py +++ b/src/aliby/io/dataset.py @@ -1,26 +1,35 @@ #!/usr/bin/env python3 +""" +Dataset is a group of classes to manage multiple types of experiments: + - Remote experiments on an OMERO server + - Local experiments in a multidimensional OME-TIFF image containing the metadata + - Local experiments in a directory containing multiple positions in independent images with or without metadata +""" +import os import shutil +import time import typing as t +from abc import ABC, abstractproperty, abstractmethod from pathlib import Path, PosixPath from typing import Union import omero -from agora.io.bridge import BridgeH5 -from aliby.io.image import ImageLocal +from agora.io.bridge import BridgeH5 +from aliby.io.image import ImageLocalOME from aliby.io.omero import BridgeOmero -class DatasetLocal: - """Load a dataset from a folder - - We use a given image of a dataset to obtain the metadata, for we cannot expect folders to contain it straight away. - +class DatasetLocalABC(ABC): + """ + Abstract Base class to fetch local files, either OME-XML or raw images. """ + _valid_suffixes = ("tiff", "png") + _valid_meta_suffixes = ("txt", "log") + def __init__(self, dpath: Union[str, PosixPath], *args, **kwargs): - self.fpath = Path(dpath) - assert len(self.get_images()), "No tif files found" + self.path = Path(dpath) def __enter__(self): return self @@ -30,36 +39,99 @@ class DatasetLocal: @property def dataset(self): - return self.fpath + return self.path @property def name(self): - return self.fpath.name + return self.path.name @property def unique_name(self): - return self.fpath.name + return self.path.name - @property + @abstractproperty def date(self): - return ImageLocal(list(self.get_images().values())[0]).date - - def get_images(self): - return {f.name: str(f) for f in self.fpath.glob("*.tif")} + pass @property def files(self): if not hasattr(self, "_files"): self._files = { - f: f for f in self.fpath.rglob("*") if str(f).endswith(".txt") + f: f + for f in self.path.rglob("*") + if any( + str(f).endswith(suffix) + for suffix in self._valid_meta_suffixes + ) } return self._files def cache_logs(self, root_dir): + # Copy metadata files to results folder for name, annotation in self.files.items(): shutil.copy(annotation, root_dir / name.name) return True + @abstractmethod + def get_images(self): + # Return location of images and their unique names + pass + + +class DatasetLocalDir(DatasetLocalABC): + """ + Organise an entire dataset, composed of multiple images, as a directory containing directories with individual files. + It relies on ImageDir to manage images. + """ + + def __init__(self, dpath: Union[str, PosixPath], *args, **kwargs): + super().__init__(dpath) + + @property + def date(self): + # Use folder creation date, for cases where metadata is minimal + return time.strftime( + "%Y%m%d", time.strptime(time.ctime(os.path.getmtime(self.path))) + ) + + def get_images(self): + return [ + folder + for folder in self.path.glob("*/") + if any( + path + for suffix in self._valid_meta_suffixes + for path in folder.glob(f"*.{suffix}") + ) + ] + + +class DatasetLocalOME(DatasetLocalABC): + """Load a dataset from a folder + + We use a given image of a dataset to obtain the metadata, + as we cannot expect folders to contain this information. + + It uses the standard OME-TIFF file format. + """ + + def __init__(self, dpath: Union[str, PosixPath], *args, **kwargs): + super().__init__(dpath) + assert len(self.get_images()), "No .tiff files found" + + @property + def date(self): + # Access the date from the metadata of the first position + return ImageLocalOME(list(self.get_images().values())[0]).date + + def get_images(self): + # Fetches all valid formats and overwrites if duplicates with different suffix + return { + f.name: str(f) + for suffix in self._valid_suffixes + for f in self.path.glob(f"*.{suffix}") + } + class Dataset(BridgeOmero): def __init__(self, expt_id, **server_info): diff --git a/src/aliby/io/image.py b/src/aliby/io/image.py index 35d454d798751b4f3baecdb83d5849f523d8e60e..816c9e13d6de8f525f5bd3ca58ef1cc1d48ab426 100644 --- a/src/aliby/io/image.py +++ b/src/aliby/io/image.py @@ -1,19 +1,20 @@ #!/usr/bin/env python3 import typing as t +from abc import ABC, abstractproperty from datetime import datetime from pathlib import Path, PosixPath import dask.array as da import numpy as np import xmltodict -from agora.io.bridge import BridgeH5 from dask import delayed from dask.array.image import imread from omero.model import enums as omero_enums from tifffile import TiffFile from yaml import safe_load +from agora.io.bridge import BridgeH5 from agora.io.metadata import dir_to_meta from aliby.io.omero import BridgeOmero @@ -41,14 +42,14 @@ def get_image_class(source: t.Union[str, int, t.Dict[str, str], PosixPath]): ): instatiator = ImageDirectory elif isinstance(source, str) and Path(source).is_file(): - instatiator = ImageLocal + instatiator = ImageLocalOME else: raise Exception(f"Invalid data source at {source}") return instatiator -class BaseLocalImage: +class BaseLocalImage(ABC): """ Base class to set path and provide context management method. """ @@ -61,6 +62,7 @@ class BaseLocalImage: return self def format_data(self, img): + # Format image using x and y size from metadata. self._formatted_img = da.rechunk( img, @@ -68,17 +70,30 @@ class BaseLocalImage: 1, 1, 1, - *[self._meta[f"size_{n}"] for n in self.dimorder[-2:]], + self._meta["size_x"], + self._meta["size_y"], ), ) return self._formatted_img + @abstractproperty + def name(self): + pass + + @abstractproperty + def dimorder(self): + pass + @property def data(self): return self.get_data_lazy() -class ImageLocal(BaseLocalImage): +class ImageLocalOME(BaseLocalImage): + """ + Fetch image from OMEXML data format, in which a multidimensional tiff image contains the metadata. + """ + def __init__(self, path: str, dimorder=None): super().__init__(path) self._id = str(path) @@ -125,10 +140,6 @@ class ImageLocal(BaseLocalImage): def name(self): return self._meta["name"] - @property - def data(self): - return self.get_data_lazy() - @property def date(self): date_str = [ @@ -186,8 +197,6 @@ class ImageLocal(BaseLocalImage): return self.format_data(img) - # TODO continue here. Ensure _dim_values are generated, or called from _meta - class ImageDir(BaseLocalImage): """ @@ -199,7 +208,7 @@ class ImageDir(BaseLocalImage): - One folders per position. - Images are flat. - Channel, Time, z-stack and the others are determined by filenames. - - Provides Dimorder as TCZYX + - Provides Dimorder as it is set in the filenames, or expects order during instatiation """ def __init__(self, path: t.Union[str, PosixPath]): @@ -215,16 +224,28 @@ class ImageDir(BaseLocalImage): # If extra channels, pick the first stack of the last dimensions - pixels = img while len(img.shape) > 3: img = img[..., 0] + if self._meta: self._meta["size_x"], self._meta["size_y"] = img.shape[-2:] - img = da.reshape(img, (*self._dim_values(), *img.shape[1:])) + # img = da.reshape(img, (*self._meta, *img.shape[1:])) + img = da.reshape(img, self._meta.values()) pixels = self.format_data(img) return pixels + @property + def name(self): + return self.path.stem + + @property + def dimorder(self): + # Assumes only dimensions start with "size" + return [ + k.split("_")[-1] for k in self._meta.keys() if k.startswith("size") + ] + class Image(BridgeOmero): """ diff --git a/src/aliby/pipeline.py b/src/aliby/pipeline.py index a3392433fce39b8d6df5d87701fe4af8c429c669..989a0e4afc7794691d65b473c8984e8b2e6524fe 100644 --- a/src/aliby/pipeline.py +++ b/src/aliby/pipeline.py @@ -29,7 +29,7 @@ from agora.io.writer import ( # BabyWriter, ) from aliby.baby_client import BabyParameters, BabyRunner from aliby.haystack import initialise_tf -from aliby.io.dataset import Dataset, DatasetLocal +from aliby.io.dataset import Dataset, DatasetLocalOME, DatasetLocalDir from aliby.io.image import get_image_class from aliby.tile.tiler import Tiler, TilerParameters from extraction.core.extractor import Extractor, ExtractorParameters diff --git a/src/aliby/tile/tiler.py b/src/aliby/tile/tiler.py index cddb2152262ddba9bb3cde5f94d49d69044b5d0f..f36492e010ec8b4349cb6bb29cd1820d5436b931 100644 --- a/src/aliby/tile/tiler.py +++ b/src/aliby/tile/tiler.py @@ -30,7 +30,7 @@ from skimage.registration import phase_cross_correlation from agora.abc import ParametersABC, ProcessABC from agora.io.writer import BridgeH5 -from aliby.io.image import Image, ImageLocal, ImageDir +from aliby.io.image import Image, ImageLocalOME, ImageDir from aliby.tile.traps import segment_traps @@ -262,7 +262,7 @@ class Tiler(ProcessABC): @classmethod def from_h5( cls, - image: t.Union[Image, ImageLocal, ImageDir], + image: t.Union[Image, ImageLocalOME, ImageDir], filepath: t.Union[str, PosixPath], parameters: TilerParameters = None, ): diff --git a/tests/aliby/network/test_tiler.py b/tests/aliby/network/test_tiler.py index e4e0473059149f4c788bb9c7509668898bfe27a2..52187b58ee4db91d11856bf7cdd6584adf4c1b6d 100644 --- a/tests/aliby/network/test_tiler.py +++ b/tests/aliby/network/test_tiler.py @@ -1,6 +1,6 @@ import argparse -from aliby.io.image import ImageLocal +from aliby.io.image import ImageLocalOME # from aliby.experiment import ExperimentLocal from aliby.tile.tiler import Tiler, TilerParameters @@ -20,7 +20,7 @@ def define_parser(): def initialise_objects(data_path, template=None): - image = ImageLocal(data_path) + image = ImageLocalOME(data_path) tiler = Tiler.from_image(image, TilerParameters.default()) return tiler