From 27fd9ffde4fecca45ceb61bc48f79dcd2f3c8159 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Al=C3=A1n=20Mu=C3=B1oz?= <alan.munoz@ed.ac.uk> Date: Fri, 6 Jan 2023 14:50:45 +0000 Subject: [PATCH] [WIP] feat(dataset): add DatasetDir --- src/aliby/io/dataset.py | 58 +++++++++++++++++++++++++++++++---------- src/aliby/pipeline.py | 2 +- 2 files changed, 45 insertions(+), 15 deletions(-) diff --git a/src/aliby/io/dataset.py b/src/aliby/io/dataset.py index e4042bcc..62366d0f 100644 --- a/src/aliby/io/dataset.py +++ b/src/aliby/io/dataset.py @@ -5,9 +5,11 @@ Dataset is a group of classes to manage multiple types of experiments: - Local experiments in a multidimensional OME-TIFF image containing the metadata - Local experiments in a directory containing multiple positions in independent images with or without metadata """ +import os import shutil +import time import typing as t -from abc import ABC, abstractproperty +from abc import ABC, abstractproperty, abstractmethod from pathlib import Path, PosixPath from typing import Union @@ -27,7 +29,7 @@ class DatasetLocalABC(ABC): _valid_meta_suffixes = ("txt", "log") def __init__(self, dpath: Union[str, PosixPath], *args, **kwargs): - self.fpath = Path(dpath) + self.path = Path(dpath) def __enter__(self): return self @@ -37,34 +39,26 @@ class DatasetLocalABC(ABC): @property def dataset(self): - return self.fpath + return self.path @property def name(self): - return self.fpath.name + return self.path.name @property def unique_name(self): - return self.fpath.name + return self.path.name @abstractproperty def date(self): pass - def get_images(self): - # Fetches all valid formats and overwrites if duplicates with different suffix - return { - f.name: str(f) - for suffix in self._valid_suffixes - for f in self.fpath.glob(f"*.{suffix}") - } - @property def files(self): if not hasattr(self, "_files"): self._files = { f: f - for f in self.fpath.rglob("*") + for f in self.path.rglob("*") if any( str(f).endswith(suffix) for suffix in self._valid_meta_suffixes @@ -78,11 +72,39 @@ class DatasetLocalABC(ABC): shutil.copy(annotation, root_dir / name.name) return True + @abstractmethod + def get_images(self): + # Return location of images and their unique names + pass + class DatasetLocalDir(DatasetLocalABC): + """ + Organise an entire dataset, composed of multiple images, as a directory containing directories with individual files. + It relies on ImageDir to manage images. + """ + def __init__(self, dpath: Union[str, PosixPath], *args, **kwargs): super().__init__(dpath) + @property + def date(self): + # Use folder creation date, for cases where metadata is minimal + return time.strftime( + "%Y%m%d", time.strptime(time.ctime(os.path.getmtime(self.path))) + ) + + def get_images(self): + return [ + folder + for folder in self.path.glob("*/") + if any( + path + for suffix in self._valid_meta_suffixes + for path in folder.glob(f"*.{suffix}") + ) + ] + class DatasetLocalOME(DatasetLocalABC): """Load a dataset from a folder @@ -102,6 +124,14 @@ class DatasetLocalOME(DatasetLocalABC): # Access the date from the metadata of the first position return ImageLocalOME(list(self.get_images().values())[0]).date + def get_images(self): + # Fetches all valid formats and overwrites if duplicates with different suffix + return { + f.name: str(f) + for suffix in self._valid_suffixes + for f in self.path.glob(f"*.{suffix}") + } + class Dataset(BridgeOmero): def __init__(self, expt_id, **server_info): diff --git a/src/aliby/pipeline.py b/src/aliby/pipeline.py index a3392433..989a0e4a 100644 --- a/src/aliby/pipeline.py +++ b/src/aliby/pipeline.py @@ -29,7 +29,7 @@ from agora.io.writer import ( # BabyWriter, ) from aliby.baby_client import BabyParameters, BabyRunner from aliby.haystack import initialise_tf -from aliby.io.dataset import Dataset, DatasetLocal +from aliby.io.dataset import Dataset, DatasetLocalOME, DatasetLocalDir from aliby.io.image import get_image_class from aliby.tile.tiler import Tiler, TilerParameters from extraction.core.extractor import Extractor, ExtractorParameters -- GitLab