From 27fd9ffde4fecca45ceb61bc48f79dcd2f3c8159 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Al=C3=A1n=20Mu=C3=B1oz?= <alan.munoz@ed.ac.uk>
Date: Fri, 6 Jan 2023 14:50:45 +0000
Subject: [PATCH] [WIP] feat(dataset): add DatasetDir

---
 src/aliby/io/dataset.py | 58 +++++++++++++++++++++++++++++++----------
 src/aliby/pipeline.py   |  2 +-
 2 files changed, 45 insertions(+), 15 deletions(-)

diff --git a/src/aliby/io/dataset.py b/src/aliby/io/dataset.py
index e4042bcc..62366d0f 100644
--- a/src/aliby/io/dataset.py
+++ b/src/aliby/io/dataset.py
@@ -5,9 +5,11 @@ Dataset is a group of classes to manage multiple types of experiments:
  - Local experiments in a multidimensional OME-TIFF image containing the metadata
  - Local experiments in a directory containing multiple positions in independent images with or without metadata
 """
+import os
 import shutil
+import time
 import typing as t
-from abc import ABC, abstractproperty
+from abc import ABC, abstractproperty, abstractmethod
 from pathlib import Path, PosixPath
 from typing import Union
 
@@ -27,7 +29,7 @@ class DatasetLocalABC(ABC):
     _valid_meta_suffixes = ("txt", "log")
 
     def __init__(self, dpath: Union[str, PosixPath], *args, **kwargs):
-        self.fpath = Path(dpath)
+        self.path = Path(dpath)
 
     def __enter__(self):
         return self
@@ -37,34 +39,26 @@ class DatasetLocalABC(ABC):
 
     @property
     def dataset(self):
-        return self.fpath
+        return self.path
 
     @property
     def name(self):
-        return self.fpath.name
+        return self.path.name
 
     @property
     def unique_name(self):
-        return self.fpath.name
+        return self.path.name
 
     @abstractproperty
     def date(self):
         pass
 
-    def get_images(self):
-        # Fetches all valid formats and overwrites if duplicates with different suffix
-        return {
-            f.name: str(f)
-            for suffix in self._valid_suffixes
-            for f in self.fpath.glob(f"*.{suffix}")
-        }
-
     @property
     def files(self):
         if not hasattr(self, "_files"):
             self._files = {
                 f: f
-                for f in self.fpath.rglob("*")
+                for f in self.path.rglob("*")
                 if any(
                     str(f).endswith(suffix)
                     for suffix in self._valid_meta_suffixes
@@ -78,11 +72,39 @@ class DatasetLocalABC(ABC):
             shutil.copy(annotation, root_dir / name.name)
         return True
 
+    @abstractmethod
+    def get_images(self):
+        # Return location of images and their unique names
+        pass
+
 
 class DatasetLocalDir(DatasetLocalABC):
+    """
+    Organise an entire dataset, composed of multiple images, as a directory containing directories with individual files.
+    It relies on ImageDir to manage images.
+    """
+
     def __init__(self, dpath: Union[str, PosixPath], *args, **kwargs):
         super().__init__(dpath)
 
+    @property
+    def date(self):
+        # Use folder creation date, for cases where metadata is minimal
+        return time.strftime(
+            "%Y%m%d", time.strptime(time.ctime(os.path.getmtime(self.path)))
+        )
+
+    def get_images(self):
+        return [
+            folder
+            for folder in self.path.glob("*/")
+            if any(
+                path
+                for suffix in self._valid_meta_suffixes
+                for path in folder.glob(f"*.{suffix}")
+            )
+        ]
+
 
 class DatasetLocalOME(DatasetLocalABC):
     """Load a dataset from a folder
@@ -102,6 +124,14 @@ class DatasetLocalOME(DatasetLocalABC):
         # Access the date from the metadata of the first position
         return ImageLocalOME(list(self.get_images().values())[0]).date
 
+    def get_images(self):
+        # Fetches all valid formats and overwrites if duplicates with different suffix
+        return {
+            f.name: str(f)
+            for suffix in self._valid_suffixes
+            for f in self.path.glob(f"*.{suffix}")
+        }
+
 
 class Dataset(BridgeOmero):
     def __init__(self, expt_id, **server_info):
diff --git a/src/aliby/pipeline.py b/src/aliby/pipeline.py
index a3392433..989a0e4a 100644
--- a/src/aliby/pipeline.py
+++ b/src/aliby/pipeline.py
@@ -29,7 +29,7 @@ from agora.io.writer import (  # BabyWriter,
 )
 from aliby.baby_client import BabyParameters, BabyRunner
 from aliby.haystack import initialise_tf
-from aliby.io.dataset import Dataset, DatasetLocal
+from aliby.io.dataset import Dataset, DatasetLocalOME, DatasetLocalDir
 from aliby.io.image import get_image_class
 from aliby.tile.tiler import Tiler, TilerParameters
 from extraction.core.extractor import Extractor, ExtractorParameters
-- 
GitLab