Add extraction to pipeline script

4f42962e · Alán Muñoz · f7c43c6e · 4f42962e · 4f42962e
Commit 4f42962e authored 3 years ago by Alán Muñoz
--- a/core/cells.py
+++ b/core/cells.py
@@ -31,94 +31,153 @@ class Cells:
        pass
    @staticmethod
-    def from_source(source, type=None):
+    def from_source(source: Union[PosixPath, str], kind: str = None):
        if isinstance(source, str):
            source = Path(source)
-            if type is None:
+        if kind is None:
-                # Infer type from filename
+            # Infer kind from filename
-                type = 'matlab' if source.suffix == '.mat' else 'hdf5'
+            kind = "matlab" if source.suffix == ".mat" else "hdf5"
-        return cell_factory(source, type)
+        return cell_factory(source, kind)
+    @staticmethod
+    def _asdense(array):
+        if not isdense(array):
+            array = array.todense()
+        return array
-def is_or_in(item, arr):
+    @staticmethod
-    if isinstance(arr, (list, np.ndarray)):
+    def _astype(array, kind):
-        return item in arr
+        # Convert sparse arrays if needed and if kind is 'mask' it fills the outline
-    else:
+        array = Cells._asdense(array)
-        return item == arr
+        if kind == "mask":
+            array = ndimage.binary_fill_holes(array).astype(int)
+        return array
+# def is_or_in(item, arr): #TODO CLEAN if not being used
+#     if isinstance(arr, (list, np.ndarray)):
+#         return item in arr
+#     else:
+#         return item == arr
+from core.io.hdf5 import hdf_dict
+from functools import wraps
 class CellsHDF(Cells):
-    # TODO implement cells information from HDF5 file format
+    # DONE implement cells information from HDF5 file format
    # TODO combine all the cells of one strain into a cellResults?
    # TODO filtering
    def __init__(self, file):
-        pass
+        self._file = file
+        self._info = hdf_dict(self._file.get("/cell_info"))
+    def __getitem__(self, item):
+        _item = "_" + item
+        if not hasattr(self, _item):
+            setattr(self, _item, self._info[item][()])
+        return getattr(self, _item)
+    def _get_idx(self, cell_id, trap_id):
+        return (self["cell_label"] == cell_id) & (self["trap"] == trap_id)
    def where(self, cell_id, trap_id):
-        pass
+        indices = self._get_idx(cell_id, trap_id)
+        return self["timepoints"][indices], indices
    def outline(self, cell_id, trap_id):
-        pass
+        times, indices = self.where(cell_id, trap_id)
+        return times, self["edgemasks"][indices]
    def mask(self, cell_id, trap_id):
-        pass
+        times, outlines = self.outline(cell_id, trap_id)
+        return times, np.array(
+            [ndimage.morphology.binary_fill_holes(o) for o in outlines]
+        )
+    def at_time(self, timepoint, kind="mask"):
+        self.traps = self["trap"]
+        self.edgemasks = self["edgemasks"]
+        tp_indices = self["timepoint"] == timepoint
+        trap_indices = [
+            self.edgemasks[(self.traps == trap_id) & tp_indices]
+            for trap_id in set(self.traps)
+        ]
+        return [
+            [self._astype(cells, kind) for cells in trap_id] for trap_id in trap_indices
+        ]
+    def split_by_trap_timepoint(self):
+        # Convert from hdf5 flat format to nested lists
+        tp_set = set(self["timepoint"])
+        trap_set = set(self["trap"])
+        cell_labels = [
+            [[] for _ in range(max(trap_set) + 1)] for _ in range(max(tp_set) + 1)
+        ]
+        for lbl, tp, trap in zip(self["cell_label"], self["timepoint"], self["trap"]):
+            cell_labels[tp][trap].append(lbl)  # TODO watch out for different timepoints
+        return cell_labels
+    def close(self):
+        self._file.close()
 class CellsMat(Cells):
    def __init__(self, mat_object):
        super(CellsMat, self).__init__()
        # TODO add __contains__ to the matObject
-        timelapse_traps = mat_object.get('timelapseTrapsOmero',
+        timelapse_traps = mat_object.get(
-                                            mat_object.get('timelapseTraps',
+            "timelapseTrapsOmero", mat_object.get("timelapseTraps", None)
-                                                              None))
+        )
        if timelapse_traps is None:
-            raise NotImplementedError("Could not find a timelapseTraps or "
+            raise NotImplementedError(
-                                      "timelapseTrapsOmero object. Cells "
+                "Could not find a timelapseTraps or "
-                                      "from cellResults not implemented")
+                "timelapseTrapsOmero object. Cells "
+                "from cellResults not implemented"
+            )
        else:
-            self.trap_info = timelapse_traps['cTimepoint']['trapInfo']
+            self.trap_info = timelapse_traps["cTimepoint"]["trapInfo"]
            if isinstance(self.trap_info, list):
-                self.trap_info = {k: list([res.get(k, [])
+                self.trap_info = {
-                                           for res in self.trap_info])
+                    k: list([res.get(k, []) for res in self.trap_info])
-                                        for k in self.trap_info[0].keys()}
+                    for k in self.trap_info[0].keys()
+                }
    def where(self, cell_id, trap_id):
-        times, indices = zip(*[(tp, np.where(cell_id == x)[0][0])
+        times, indices = zip(
-                               for tp, x in
+            *[
-                               enumerate(self.trap_info['cellLabel'][:,
+                (tp, np.where(cell_id == x)[0][0])
-                                         trap_id].tolist())
+                for tp, x in enumerate(self.trap_info["cellLabel"][:, trap_id].tolist())
-                               if np.any(cell_id == x)])
+                if np.any(cell_id == x)
+            ]
+        )
        return times, indices
    def outline(self, cell_id, trap_id):
        times, indices = self.where(cell_id, trap_id)
-        info = self.trap_info['cell'][times, trap_id]
+        info = self.trap_info["cell"][times, trap_id]
        def get_segmented(cell, index):
-            if cell['segmented'].ndim == 0:
+            if cell["segmented"].ndim == 0:
-                return cell['segmented'][()].todense()
+                return cell["segmented"][()].todense()
            else:
-                return cell['segmented'][index].todense()
+                return cell["segmented"][index].todense()
-        segmentation_outline = [get_segmented(cell, idx)
+        segmentation_outline = [
-                                for idx, cell in zip(indices, info)]
+            get_segmented(cell, idx) for idx, cell in zip(indices, info)
+        ]
        return times, np.array(segmentation_outline)
    def mask(self, cell_id, trap_id):
        times, outlines = self.outline(cell_id, trap_id)
-        return times, np.array([ndimage.morphology.binary_fill_holes(o) for
+        return times, np.array(
-                                o in outlines])
+            [ndimage.morphology.binary_fill_holes(o) for o in outlines]
+        )
-    def _astype(self, array, type):
-        if type == 'outline':
-            return np.array(array.todense())
-        elif type == 'mask':
-            arr = np.array(array.todense())
-            return ndimage.binary_fill_holes(arr).astype(int)
-        else:
-            return array
-    def at_time(self, timepoint, type='outline'):
+    def at_time(self, timepoint, kind="outline"):
        """Returns the segmentations for all the cells at a given timepoint.
        FIXME: this is extremely hacky and accounts for differently saved

--- a/scripts/pipeline_test.py
+++ b/scripts/pipeline_test.py
@@ -26,69 +26,83 @@ import time
 # %%
 t = time.perf_counter()
-expt = ExperimentOMERO(18020, #Experiment ID on OMERO
+expt = ExperimentOMERO(
-                      'islay.bio.ed.ac.uk', #OMERO host
+    18020,  # Experiment ID on OMERO
-                      port=4064, #This is default
+    "islay.bio.ed.ac.uk",  # OMERO host
-                      save_dir='./data')
+    port=4064,  # This is default
+    save_dir="./data",
+    username="upload",
+    password="***REMOVED***",
+)
 tiler = Tiler(expt)
 # TODO pull config out of the metadata
-config = {"camera": "prime95b",
+config = {
-          "channel": "Brightfield",
+    "camera": "prime95b",
-          "zoom": "60x",
+    "channel": "Brightfield",
-          "n_stacks": "5z",
+    "zoom": "60x",
-          "default_image_size": 96}
+    "n_stacks": "5z",
+    "default_image_size": 96,
+}
 runner = BabyRunner(tiler, **config)
-#extractor = ExtractionRunner(tiler)
+# extractor = ExtractionRunner(tiler)
 # Pipeline
-store_name = "store.h5" # The base name
+store_name = "store.h5"  # The base name
-keys = create_keys(expt)# Run for full experiment
+keys = create_keys(expt)  # Run for full experiment
 # For each position in the experiment, create store in expt.run
-print(f'Running expt for {keys}')
+print(f"Running expt for {keys}")
 keys = expt.run(keys, store_name)
 # For each position/time-point run the trap location algorithm and then save
 # to store
-print(f'Running tiler for {keys}')
+print(f"Running tiler for {keys}")
-keys = tiler.run(keys, store_name) # Raises an error if the store does not
+keys = tiler.run(keys, store_name)  # Raises an error if the store does not
 # exist
 # stores under /trap_info/
 # For each position and timepoint, run the BABY algorithm
-run_config = {"with_edgemasks": True,
+run_config = {"with_edgemasks": True, "assign_mothers": True}
-              "assign_mothers": True}
+runner.run(keys, store_name, verbose=True, **run_config)  # Raises an error if the
-runner.run(keys, store_name, verbose=True, **run_config) # Raises an error if the
 # store does not exist
 # stores under /cell_info/
 # For each position and time-point, run the extractor
-#extractor.run() # Raises an error if the store does not exist
+# extractor.run() # Raises an error if the store does not exist
 # store under /extraction/
-#OPTIONAL
+# OPTIONAL
 # Run post-processing.
-tot_time = time.perf_counter() - t
+total_time = time.perf_counter() - t
 print(f"Total time {total_time:.2f}")
 # %%
-print(f'{total_time/ 60:.2f} minutes for {len(expt.positions)} positions at {expt.shape[1]} timepoints:')
+print(
-per_tp_per_pos = total_time/ (len(expt.positions)* expt.shape[1])
+    f"{total_time/ 60:.2f} minutes for {len(expt.positions)} positions at {expt.shape[1]} timepoints:"
-print(f'{per_tp_per_pos:.2f}s per time point per position')
+)
-print(f'{(per_tp_per_pos * 20 * 200)/3600:.2f}h for an average experiment.')
+per_tp_per_pos = total_time / (len(expt.positions) * expt.shape[1])
+print(f"{per_tp_per_pos:.2f}s per time point per position")
+print(f"{(per_tp_per_pos * 20 * 200)/3600:.2f}h for an average experiment.")
 # %%
 # TEST RESULTS
 # check results
-position_test  = expt.positions[0]
+position_test = expt.positions[0]
-with h5py.File(expt.root_dir / f'{position_test}{store_name}', 'r') as hfile:
+with h5py.File(expt.root_dir / f"{position_test}{store_name}", "r") as hfile:
    print(hfile.keys())
    for group in hfile:
        print(group)
        print(hfile[group].keys())
-# %%
+from extraction.core.extractor import Extractor
+from extraction.core.parameters import Parameters
+from extraction.core.functions.defaults import get_params
+params = Parameters(**get_params("batgirl_fast"))
+ext = Extractor.from_object(params, object=tiler)
+tp0 = ext.extract_exp()
 expt.close()