From 5c9484033954cd9f5d4b5bddffd5f3ce6e17cb22 Mon Sep 17 00:00:00 2001 From: pswain <peter.swain@ed.ac.uk> Date: Thu, 30 May 2024 14:58:59 +0100 Subject: [PATCH] change: alibylite.org; dataset to get_position_ids --- alibylite.org | 75 +++++++++++++++++++++++++++ src/agora/io/cells.py | 2 +- src/aliby/io/dataset.py | 32 ++++++------ src/extraction/core/extractor.py | 21 ++++---- src/extraction/core/functions/cell.py | 5 +- 5 files changed, 107 insertions(+), 28 deletions(-) create mode 100644 alibylite.org diff --git a/alibylite.org b/alibylite.org new file mode 100644 index 0000000..1e686ec --- /dev/null +++ b/alibylite.org @@ -0,0 +1,75 @@ +#+title: aliby + +The microscope visits multiple positions during an experiment. Each position may have a different setup or strain. We denote this strain as a *group*. For every position, we take an image for every time point. + +We divide all images into *tiles*, one per trap. Baby determines masks, mother-bud pairs, and tracking for each tile. We obtain data on individual cells first for each tile and then for each position for all time points: *cells* and *signal* provide this information for a position; *grouper* concatenates over all positions. + +All global parameters, such as possible fluorescence channels and pixel sizes, are stored in *global_parameters*. + +* aliby/pipeline +Runs the *tiler*, *baby*, and *extraction* steps of the pipeline, and then *postprocessing*. +The *run* function loops through positions, calling *run_one_position*, which loops through time points. +For each time point, each step of the pipeline has a *_run_tp* function, which StepABC renames to *run_tp*, to process one time point for a position. +Extractor does not have an independent writer, but writes to the h5 file in *_run_tp*. + +* aliby/tile/tiler +Tiles image into smaller regions of interest or tiles, one per trap, for faster processing. We ignore tiles without cells. + +* aliby/baby_sitter +Interfaces with Baby through the *BabyRunner* class, which returns a dict of Baby's results. + +* extraction/core/extractor +Extracts areas and volumes and the fluorescence data from the images for the cells in one position, via the image tiles, using the cell masks found by Baby. + +We save the cell properties we wish to extract as a nested dictionary, such as + {'general': {'None': ['area', 'volume', 'eccentricity']}}. +*extract_tp* extracts data for one time point. + +** extraction/core/functions/cell +Defines the standard functions, such as area and median, that we apply to pixels from individual cells. +** extraction/core/functions/trap +Determines properties of a tile's background. +** extraction/core/functions/distributors +Collapses multiple z-sections to a 2D image. +** extraction/core/functions/defaults +Defines the standard fluorescence signals and metrics, like median, we extract in *exparams_from_meta*. +** extraction/core/function/custom/localisation +Defines more complex functions to apply to cells, such as *nuc_est_conv*, which estimates nuclear localisation of a fluorescent protein. + +* agora/bridge +Interfaces with h5 files. +* agora/cells +Accesses information on cells and masks in tiles from an h5 file. +* agora/signal +Gets extracted properties, such as median fluorescence, for all cells and all time points from an h5 file - data for one position. + +Signal applies picking and merging of cells using the choices made by *picker* and *merger*. *get_raw* gets the data from the h5 file without any picking and merging. + +* postprocessor/core/processor +For one position, the *run* function performs picking, of appropriate cells, and merging, of tracklets, via *run_prepost* and then runs processes, such as the *buddings* and *bud_metric* functions, on signals, such as *volume*, to get new signals, such as *buddings* and *bud_volume*. + +*run_process* writes the results to an h5 file. + +The class *PostProcessorParameters* lists the standard processes we perform, such as running *buddings* and *bud_metric* on *area*. + +* postprocessor/core/reshapers/picker +Selects cells from a Signal for which there is lineage information and by how long they remain in the experiment, writing the choices to the h5 file. +* postprocessor/core/reshapers/merger +Combines tracks that should be a single track of the same cell, writing the choices to the h5 file. +* agora/utils/indexing +Core code needed when *picker* uses Baby's lineage information to select mother-bud pairs in a Signal. + +* postprocessor/grouper +*concat_signal*: Concatenates signals from different h5 files - we have one per position - to generate dataframes for the entire experiment. + uses either *concat_signal_ind* for independent signals or *concat_standard*. + +* aliby/utils/argo +Gets information on the data available in an OMERO data base. + +* aliby/io/omero +Contains functions to interact with OMERO and extract information on an *Image* corresponding to an OMERO image ID or a *Dataset* corresponding to an OMERO experiment ID. + +* Language +We use *tile* and *trap* interchangeably, but *tile* is preferred. +We use *bud* and *daughter* interchangeably, but *bud* is preferred. +We use *record* and *kymograph* interchangeably, but *record* is preferred. diff --git a/src/agora/io/cells.py b/src/agora/io/cells.py index a8ea422..29851f0 100644 --- a/src/agora/io/cells.py +++ b/src/agora/io/cells.py @@ -16,7 +16,7 @@ class Cells: """ Extract information from an h5 file. - Use output from BABY to find cells detected, get, and fill, edge masks + Use output from BABY to find cells detected, get and fill edge masks, and retrieve mother-bud relationships. This class accesses in the h5 file: diff --git a/src/aliby/io/dataset.py b/src/aliby/io/dataset.py index 0ec3185..dabd00e 100644 --- a/src/aliby/io/dataset.py +++ b/src/aliby/io/dataset.py @@ -3,29 +3,31 @@ Dataset is a group of classes to manage multiple types of experiments: - Remote experiments on an OMERO server (located in src/aliby/io/omero.py) - Local experiments in a multidimensional OME-TIFF image containing the metadata - - Local experiments in a directory containing multiple positions in independent images with or without metadata + - Local experiments in a directory containing multiple positions in independent +images with or without metadata """ import os import shutil import time import typing as t -from abc import ABC, abstractproperty, abstractmethod +from abc import ABC, abstractmethod, abstractproperty from pathlib import Path -from agora.io.bridge import BridgeH5 from aliby.io.image import ImageLocalOME +from aliby.io.omero import Dataset def dispatch_dataset(expt_id: int or str, **kwargs): """ Find paths to the data. - Connects to OMERO if data is remotely available. + Connect to OMERO if data is remotely available. Parameters ---------- expt_id: int or str - To identify the data, either an OMERO ID or an OME-TIFF file or a local directory. + To identify the data, either an OMERO ID or an OME-TIFF file + or a local directory. Returns ------- @@ -33,20 +35,18 @@ def dispatch_dataset(expt_id: int or str, **kwargs): """ if isinstance(expt_id, int): # data available online - from aliby.io.omero import Dataset - return Dataset(expt_id, **kwargs) elif isinstance(expt_id, str): # data available locally expt_path = Path(expt_id) if expt_path.is_dir(): - # data in multiple folders + # data in multiple folders, such as zarr return DatasetLocalDir(expt_path) else: # data in one folder as OME-TIFF files return DatasetLocalOME(expt_path) else: - raise Warning(f"{expt_id} is an invalid expt_id") + raise Warning(f"{expt_id} is an invalid expt_id.") class DatasetLocalABC(ABC): @@ -103,7 +103,7 @@ class DatasetLocalABC(ABC): pass @abstractmethod - def get_images(self): + def get_position_ids(self): pass @@ -120,12 +120,13 @@ class DatasetLocalDir(DatasetLocalABC): "%Y%m%d", time.strptime(time.ctime(os.path.getmtime(self.path))) ) - def get_images(self): - """Return a dictionary of folder or file names and their paths. + def get_position_ids(self): + """ + Return a dict of file paths for each position. FUTURE 3.12 use pathlib is_junction to pick Dir or File """ - images = { + position_ids_dict = { item.name: item for item in self.path.glob("*/") if item.is_dir() @@ -136,8 +137,7 @@ class DatasetLocalDir(DatasetLocalABC): ) or item.suffix[1:] in self._valid_suffixes } - - return images + return position_ids_dict class DatasetLocalOME(DatasetLocalABC): @@ -154,7 +154,7 @@ class DatasetLocalOME(DatasetLocalABC): """Get the date from the metadata of the first position.""" return ImageLocalOME(list(self.get_position_ids().values())[0]).date - def get_images(self): + def get_position_ids(self): """Return a dictionary with the names of the image files.""" return { f.name: str(f) diff --git a/src/extraction/core/extractor.py b/src/extraction/core/extractor.py index fa379e4..b34bc49 100644 --- a/src/extraction/core/extractor.py +++ b/src/extraction/core/extractor.py @@ -122,6 +122,7 @@ class Extractor(StepABC): or leaf level. """ + # get pixel_size; z_size; spacing default_meta = global_parameters.imaging_specifications def __init__( @@ -758,7 +759,7 @@ class Extractor(StepABC): elif isinstance(tps, int): tps = [tps] # store results in dict - d = {} + extract_dict = {} for tp in tps: # extract for each time point and convert to dict of pd.Series new = flatten_nesteddict( @@ -767,21 +768,23 @@ class Extractor(StepABC): tp=tp, ) # concatenate with data extracted from earlier time points - for k in new.keys(): - d[k] = pd.concat((d.get(k, None), new[k]), axis=1) + for key in new.keys(): + extract_dict[key] = pd.concat( + (extract_dict.get(key, None), new[key]), axis=1 + ) # add indices to pd.Series containing the extracted data - for k in d.keys(): + for k in extract_dict.keys(): indices = ["experiment", "position", "trap", "cell_label"] idx = ( - indices[-d[k].index.nlevels :] - if d[k].index.nlevels > 1 + indices[-extract_dict[k].index.nlevels :] + if extract_dict[k].index.nlevels > 1 else [indices[-2]] ) - d[k].index.names = idx + extract_dict[k].index.names = idx # save if save: - self.save_to_h5(d) - return d + self.save_to_h5(extract_dict) + return extract_dict def save_to_h5(self, dict_series, path=None): """ diff --git a/src/extraction/core/functions/cell.py b/src/extraction/core/functions/cell.py index c83b17e..7570749 100644 --- a/src/extraction/core/functions/cell.py +++ b/src/extraction/core/functions/cell.py @@ -8,9 +8,10 @@ must return only one value. They assume that there are no NaNs in the image. We use the module bottleneck when it performs faster than numpy: -- Median -- values containing NaNs (but we make sure this does not happen) +- median +- values containing NaNs (but we make sure this never happens). """ + import math import typing as t -- GitLab