Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • swain-lab/aliby/aliby-mirror
  • swain-lab/aliby/alibylite
2 results
Show changes
Showing
with 1385 additions and 2898 deletions
import itertools
import re
import typing as t
from pathlib import Path
import numpy as np
from baby import BabyCrawler, modelsets
from agora.abc import ParametersABC, StepABC
class BabyParameters(ParametersABC):
"""Parameters used for running BABY."""
def __init__(
self,
modelset_name,
clogging_thresh,
min_bud_tps,
isbud_thresh,
):
"""Initialise parameters for BABY."""
# pixel_size is specified in BABY's model sets
self.modelset_name = modelset_name
self.clogging_thresh = clogging_thresh
self.min_bud_tps = min_bud_tps
self.isbud_thresh = isbud_thresh
@classmethod
def default(cls, **kwargs):
"""Define default parameters; kwargs choose BABY model set."""
return cls(
modelset_name=get_modelset_name_from_params(**kwargs),
clogging_thresh=1,
min_bud_tps=3,
isbud_thresh=0.5,
)
def update_baby_modelset(self, path: t.Union[str, Path, t.Dict[str, str]]):
"""
Replace default BABY model and flattener.
Both are saved in a folder by our retraining script.
"""
if isinstance(path, dict):
weights_flattener = {k: Path(v) for k, v in path.items()}
else:
weights_dir = Path(path)
weights_flattener = {
"flattener_file": weights_dir.parent / "flattener.json",
"morph_model_file": weights_dir / "weights.h5",
}
self.update("modelset_name", weights_flattener)
class BabyRunner(StepABC):
"""
A BabyRunner object for cell segmentation.
Segments one time point at a time.
"""
def __init__(self, tiler, parameters=None, **kwargs):
"""Instantiate from a Tiler object."""
self.tiler = tiler
modelset_name = (
get_modelset_name_from_params(**kwargs)
if parameters is None
else parameters.modelset_name
)
tiler_z = self.tiler.shape[-3]
if f"{tiler_z}z" not in modelset_name:
raise KeyError(
f"Tiler z-stack ({tiler_z}) and model"
f" ({modelset_name}) do not match."
)
if parameters is None:
brain = modelsets.get(modelset_name)
else:
brain = modelsets.get(
modelset_name,
clogging_thresh=parameters.clogging_thresh,
min_bud_tps=parameters.min_bud_tps,
isbud_thresh=parameters.isbud_thresh,
)
self.crawler = BabyCrawler(brain)
self.brightfield_channel = self.tiler.ref_channel_index
@classmethod
def from_tiler(cls, parameters: BabyParameters, tiler):
"""Explicitly instantiate from a Tiler object."""
return cls(tiler, parameters)
def get_data(self, tp):
"""Get image and re-arrange axes."""
img_from_tiler = self.tiler.get_tp_data(tp, self.brightfield_channel)
# move z axis to the last axis; Baby expects (n, x, y, z)
img = np.moveaxis(img_from_tiler, 1, destination=-1)
return img
def _run_tp(
self,
tp,
refine_outlines=True,
assign_mothers=True,
with_edgemasks=True,
**kwargs,
):
"""Segment data from one time point."""
img = self.get_data(tp)
segmentation = self.crawler.step(
img,
refine_outlines=refine_outlines,
assign_mothers=assign_mothers,
with_edgemasks=with_edgemasks,
**kwargs,
)
res = format_segmentation(segmentation, tp)
return res
def get_modelset_name_from_params(
imaging_device="alcatras",
channel="brightfield",
camera="sCMOS",
zoom="60x",
n_stacks="5z",
):
"""Get the appropriate model set from BABY's trained models."""
# list of models - microscopy setups - for which BABY has been trained
# cameras prime95 and evolve have become sCMOS and EMCCD
possible_models = list(modelsets.remote_modelsets()["models"].keys())
# filter possible_models
params = [
str(x) if x is not None else ".+"
for x in [imaging_device, channel.lower(), camera, zoom, n_stacks]
]
params_regex = re.compile("-".join(params) + "$")
valid_models = [
res for res in filter(params_regex.search, possible_models)
]
# check that there are valid models
if len(valid_models) == 1:
return valid_models[0]
else:
raise KeyError(
"Error in finding BABY model sets matching {}".format(
", ".join(params)
)
)
def format_segmentation(segmentation, tp):
"""
Format BABY's results for a single time point into a dict.
The dict has BABY's outputs as keys and lists of the results
for each segmented cell as values.
Parameters
------------
segmentation: list
A list of BABY's results as dicts for each tile.
tp: int
The time point.
"""
# segmentation is a list of dictionaries for each tile
for i, tile_dict in enumerate(segmentation):
# assign the trap ID to each cell identified
tile_dict["trap"] = [i] * len(tile_dict["cell_label"])
# record mothers for each labelled cell
tile_dict["mother_assign_dynamic"] = np.array(
tile_dict["mother_assign"]
)[np.array(tile_dict["cell_label"], dtype=int) - 1]
# merge into a dict with BABY's outputs as keys and
# lists of results for all cells as values
merged = {
output: list(
itertools.chain.from_iterable(
tile_dict[output] for tile_dict in segmentation
)
)
for output in segmentation[0].keys()
}
# remove mother_assign
merged.pop("mother_assign", None)
# ensure that each value is a list of the same length
no_cells = min([len(v) for v in merged.values()])
merged = {k: v[:no_cells] for k, v in merged.items()}
# define time point key
merged["timepoint"] = [tp] * no_cells
return merged
"""
Command Line Interface utilities.
"""
"""
Asynchronous annotation (in one thread). Used as a base to build threading-based annotation.
Currently only works on UNIX-like systems due to using "/" to split addresses.
Usage example
From python
$ python annotator.py --image_path path/to/folder/with/h5files --results_path path/to/folder/with/images/zarr --pos position_name --ncells max_n_to_annotate
As executable (installed via poetry)
$ annotator.py --image_path path/to/folder/with/h5files --results_path path/to/folder/with/images/zarr --pos position_name --ncells max_n_to_annotate
During annotation:
- Assign a (binary) label by typing '1' or '2'.
- Type 'u' to undo.
- Type 's' to skip.
- Type 'q' to quit.
File will be saved in: ./YYYY-MM-DD_annotation/annotation.csv, where YYYY-MM-DD is the current date.
"""
import argparse
import logging
import typing as t
from copy import copy
from datetime import datetime
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import readchar
import trio
from agora.utils.cast import _str_to_int
from aliby.utils.vis_tools import _sample_n_tiles_masks
from aliby.utils.plot import stretch
# Remove logging warnings
logging.getLogger("aliby").setLevel(40)
# Defaults
essential = {"image_path": "zarr", "results_path": "h5"}
param_values = dict(
out_dir=f"./{datetime.today().strftime('%Y_%m_%d')}_annotation/",
pos=None,
ncells=100,
min_tp=100,
max_tp=150,
seed=0,
)
annot_filename = "annotation.csv"
# Parsing
parser = argparse.ArgumentParser(
prog="aliby-annot-binary",
description="Annotate cells in a binary manner",
)
for i, arg in enumerate((*essential, *param_values)):
parser.add_argument(
f"--{arg}",
action="store",
default=param_values.get(arg),
required=i < len(essential),
)
args = parser.parse_args()
for i, k in enumerate((*essential, *param_values.keys())):
# Assign essential values as-is
if i < len(essential):
param_values[k] = getattr(args, k)
# Fill additional values
if passed_value := getattr(args, k):
param_values[k] = passed_value
try:
param_values[k] = _str_to_int(passed_value)
except Exception as exc:
pass
for k, suffix in essential.items(): # Autocomplete if fullpath not provided
if not str(param_values[k]).endswith(suffix):
param_values[k] = (
Path(param_values[k]) / f"{ param_values['pos'] }.{suffix}"
)
# Functions
async def generate_image(stack, skip: bool = False):
await trio.sleep(1)
result = np.random.randint(100, size=(10, 10))
stack.append(result)
async def draw(data, drawing):
if len(drawing) > 1:
for ax, img in zip(drawing, data):
if np.isnan(img).sum(): # Stretch masked channel
img = stretch(img)
ax.set_data(img)
else:
drawing.set_data(data)
plt.draw()
plt.pause(0.1)
def annotate_image(current_key=None, valid_values: t.Tuple[int] = (1, 2)):
# Show image to annotate
while current_key is None or current_key not in valid_values:
if current_key is not None:
print(
f"Invalid value. Please try with valid values {valid_values}"
)
if (current_key := readchar.readkey()) in "qsu":
# if (current_key := input()) in "qsu":
break
current_key = _parse_input(current_key, valid_values)
return current_key
async def generate_image(
generator,
location_stack: t.List[t.Tuple[np.ndarray, t.Tuple[int, int, int]]],
):
new_location_image = next(generator)
location_stack.append((new_location_image[0], new_location_image[1]))
def _parse_input(value: str, valid_values: t.Tuple[int]):
try:
return int(value)
except:
print(
f"Non-parsable value. Please try again with valid values {valid_values}"
)
return None
def write_annotation(
experiment_position: str,
out_dir: Path,
annotation: str,
location_stack: t.Tuple[t.Tuple[int, int, int], np.ndarray],
):
location, stack = location_stack
unique_location = list(map(str, (*experiment_position, *location)))
write_into_file(
out_dir / annot_filename,
",".join((*unique_location, str(annotation))) + "\n",
)
bg_zero = copy(stack[1])
bg_zero[np.isnan(bg_zero)] = 0
tosave = np.stack((stack[0], bg_zero.astype(int)))
# np.savez(out_dir / f"{'_'.join( unique_location )}.npz", tosave)
np.save(out_dir / f"{'.'.join( unique_location )}.npy", tosave)
def write_into_file(file_path: str, line: str):
with open(file_path, "a") as f:
f.write(str(line))
async def annotate_images(
image_path, results_path, out_dir, ncells, seed, interval
):
preemptive_cache = 3
location_stack = []
out_dir = Path(out_dir)
out_annot_file = str(out_dir / annot_filename)
generator = _sample_n_tiles_masks(
image_path, results_path, ncells, seed=seed, interval=interval
)
# Fetch a few positions preemtively
async with trio.open_nursery() as nursery:
for _ in range(preemptive_cache):
nursery.start_soon(generate_image, generator, location_stack)
print("parent: waiting for first annotations.")
_, ax = plt.subplots(figsize=(10, 8))
while not location_stack: # Wait until first image is loaded
await trio.sleep(0.1)
from aliby.utils.plot import plot_overlay
# drawing = ax.imshow(location_stack[0][1])
axes = plot_overlay(*location_stack[0][1], ax=ax.axes)
plt.show(block=False)
plt.draw()
plt.pause(0.5) # May be adjusted based on display speed
try:
out_dir.mkdir(parents=True)
except:
pass
if not Path(out_annot_file).exists():
write_into_file(
out_annot_file,
",".join(
(
"experiment",
"position",
"tile",
"cell_label",
"tp",
"annotation",
)
)
+ "\n",
)
# Loop until n_max or quit
for i in range(1, ncells - preemptive_cache + 1):
# Wait for input
print("Enter a key")
annotation = str(annotate_image())
if annotation == "q":
break
elif annotation == "s":
print("Skipping...")
# continue
elif annotation == "u":
i -= 1
elif isinstance(_str_to_int(annotation), int):
write_annotation(
str(results_path).split(".")[0].split("/")[-2:],
out_dir,
annotation,
location_stack[i],
)
print(location_stack[i][0])
# Append into annotations file
async with trio.open_nursery() as nursery:
nursery.start_soon(generate_image, generator, location_stack)
nursery.start_soon(draw, location_stack[i][1], axes)
print("Annotation done!")
# if __name__ == "__main__":
def annotate():
if any([param_values.get(k) is None for k in ("min_tp", "max_tp")]):
interval = None
else:
interval = (param_values["min_tp"], param_values["max_tp"])
print(param_values)
trio.run(
annotate_images,
param_values["image_path"],
param_values["results_path"],
param_values["out_dir"],
param_values["ncells"],
param_values["seed"],
interval,
)
#!/usr/bin/env jupyter
import argparse
from agora.utils.cast import _str_to_int
from aliby.pipeline import Pipeline, PipelineParameters
def run():
"""
Run a default microscopy analysis pipeline.
Parse command-line arguments and set default parameter values for running a pipeline, then
construct and execute the pipeline with the parameters obtained. Command-line arguments can
override default parameter values. If a command-line argument is a string representation of
an integer, convert it to an integer.
Returns
-------
None
Examples
--------
FIXME: Add docs.
"""
parser = argparse.ArgumentParser(
prog="aliby-run",
description="Run a default microscopy analysis pipeline",
)
param_values = {
"expt_id": None,
"distributed": 2,
"tps": 2,
"directory": "./data",
"filter": 0,
"host": None,
"username": None,
"password": None,
}
for k in param_values:
parser.add_argument(f"--{k}", action="store")
args = parser.parse_args()
for k in param_values:
if passed_value := _str_to_int(getattr(args, k)):
param_values[k] = passed_value
params = PipelineParameters.default(general=param_values)
p = Pipeline(params)
p.run()
# parameters to stop the pipeline when exceeded
earlystop = dict(
min_tp=100,
thresh_pos_clogged=0.4,
thresh_trap_ncells=8,
thresh_trap_area=0.9,
ntps_to_eval=5,
)
# imaging properties of the microscope
imaging_specifications = {
"pixel_size": 0.236,
"z_size": 0.6,
"spacing": 0.6,
}
# possible imaging channels
possible_imaging_channels = [
"Citrine",
"GFP",
"GFPFast",
"mCherry",
"Flavin",
"Citrine",
"mKO2",
"Cy5",
"pHluorin405",
"pHluorin488",
]
# functions to apply to the fluorescence of each cell
fluorescence_functions = [
"mean",
"median",
"std",
"imBackground",
"max5px_median",
]
"""
Neural network initialisation.
"""
from pathlib import Path
from time import perf_counter
import numpy as np
import tensorflow as tf
from agora.io.writer import DynamicWriter
def initialise_tf(version):
# Initialise tensorflow
if version == 1:
core_config = tf.ConfigProto()
core_config.gpu_options.allow_growth = True
session = tf.Session(config=core_config)
return session
# TODO this only works for TF2
if version == 2:
gpus = tf.config.experimental.list_physical_devices("GPU")
if gpus:
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
logical_gpus = tf.config.experimental.list_logical_devices("GPU")
print(
len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs"
)
return None
def timer(func, *args, **kwargs):
start = perf_counter()
result = func(*args, **kwargs)
print(f"Function {func.__name__}: {perf_counter() - start}s")
return result
################## CUSTOM OBJECTS ##################################
class ModelPredictor:
"""Generic object that takes a NN and returns the prediction.
Use for predicting fluorescence/other from bright field.
This does not do instance segmentations of anything.
"""
def __init__(self, tiler, model, name):
self.tiler = tiler
self.model = model
self.name = name
def get_data(self, tp):
# Change axes to X,Y,Z rather than Z,Y,X
return (
self.tiler.get_tp_data(tp, self.bf_channel)
.swapaxes(1, 3)
.swapaxes(1, 2)
)
def format_result(self, result, tp):
return {self.name: result, "timepoints": [tp] * len(result)}
def run_tp(self, tp):
"""Simulating processing time with sleep"""
# Access the image
segmentation = self.model.predict(self.get_data(tp))
return self._format_result(segmentation, tp)
class ModelPredictorWriter(DynamicWriter):
def __init__(self, file, name, shape, dtype):
super.__init__(file)
self.datatypes = {
name: (shape, dtype),
"timepoint": ((None,), np.uint16),
}
self.group = f"{self.name}_info"
......@@ -54,7 +54,7 @@ class DatasetLocalABC(ABC):
Abstract Base class to find local files, either OME-XML or raw images.
"""
_valid_suffixes = ("tiff", "png", "zarr")
_valid_suffixes = ("tiff", "png", "zarr", "tif")
_valid_meta_suffixes = ("txt", "log")
def __init__(self, dpath: t.Union[str, Path], *args, **kwargs):
......
......@@ -23,21 +23,21 @@ import numpy as np
import xmltodict
import zarr
from dask.array.image import imread
from importlib_resources import files
from tifffile import TiffFile
from agora.io.metadata import dir_to_meta, dispatch_metadata_parser
try:
from importlib_resources import files
except ModuleNotFoundError:
from importlib.resources import files
def get_examples_dir():
"""Get examples directory which stores dummy image for tiler"""
return files("aliby").parent.parent / "examples" / "tiler"
from agora.io.metadata import parse_metadata
from tifffile import TiffFile
def instantiate_image(
source: t.Union[str, int, t.Dict[str, str], Path], **kwargs
):
"""Wrapper to instatiate the appropiate image
"""
Instantiate the image.
Parameters
----------
......@@ -46,43 +46,76 @@ def instantiate_image(
Examples
--------
image_path = "path/to/image"]
image_path = "path/to/image"
with instantiate_image(image_path) as img:
print(imz.data, img.metadata)
"""
return dispatch_image(source)(source, **kwargs)
def dispatch_image(source: t.Union[str, int, t.Dict[str, str], Path]):
"""
Wrapper to pick the appropiate Image class depending on the source of data.
"""
"""Pick the appropriate Image class for the source of data."""
if isinstance(source, (int, np.int64)):
# requires omero module
from aliby.io.omero import Image
instatiator = Image
instantiator = Image
elif isinstance(source, dict) or (
isinstance(source, (str, Path)) and Path(source).is_dir()
):
# zarr files are considered directories
if Path(source).suffix == ".zarr":
instatiator = ImageZarr
instantiator = ImageZarr
else:
instatiator = ImageDir
elif isinstance(source, str) and Path(source).is_file():
instatiator = ImageLocalOME
instantiator = ImageDir
elif isinstance(source, (str, Path)) and Path(source).is_file():
instantiator = ImageLocalOME
else:
raise Exception(f"Invalid data source at {source}")
return instantiator
return instatiator
def files_to_image_sizes(path: Path, suffix="tiff"):
"""Deduce image sizes from the naming convention of tiff files."""
filenames = list(path.glob(f"*.{suffix}"))
try:
# deduce order from filenames
dimorder = "".join(
map(lambda x: x[0], filenames[0].stem.split("_")[1:])
)
dim_value = list(
map(
lambda f: filename_to_dict_indices(f.stem),
path.glob("*.tiff"),
)
)
maxes = [max(map(lambda x: x[dim], dim_value)) for dim in dimorder]
mins = [min(map(lambda x: x[dim], dim_value)) for dim in dimorder]
dim_shapes = [
max_val - min_val + 1 for max_val, min_val in zip(maxes, mins)
]
meta = {
"size_" + dim: shape for dim, shape in zip(dimorder, dim_shapes)
}
except Exception as e:
print("Warning: files_to_image_sizes failed." f"\nError: {e}")
meta = {}
return meta
def filename_to_dict_indices(stem: str):
"""Split string into a dict."""
return {
dim_number[0]: int(dim_number[1:])
for dim_number in stem.split("_")[1:]
}
class BaseLocalImage(ABC):
"""
Base Image class to set path and provide context management method.
"""
"""Set path and provide method for context management."""
_default_dimorder = "tczyx"
# default image order
default_dimorder = "tczyx"
def __init__(self, path: t.Union[str, Path]):
# If directory, assume contents are naturally sorted
......@@ -98,44 +131,52 @@ class BaseLocalImage(ABC):
return False
def rechunk_data(self, img):
# Format image using x and y size from metadata.
"""Format image using x and y size from metadata."""
self._rechunked_img = da.rechunk(
img,
chunks=(
1,
1,
1,
self._meta["size_y"],
self._meta["size_x"],
self.meta["size_y"],
self.meta["size_x"],
),
)
return self._rechunked_img
@property
def data(self):
"""Get data."""
return self.get_data_lazy()
@property
def metadata(self):
"""Get metadata."""
return self.meta
def set_meta(self):
"""Load metadata using parser dispatch."""
parsed_meta = parse_metadata(self.path)
if parsed_meta is None:
# try to deduce metadata
parsed_meta = files_to_image_sizes(self.path)
self.meta = parsed_meta
@abstractmethod
def get_data_lazy(self) -> da.Array:
"""Define in child class."""
pass
@abstractproperty
def name(self):
"""Define in child class."""
pass
@abstractproperty
def dimorder(self):
"""Define in child class."""
pass
@property
def data(self):
return self.get_data_lazy()
@property
def metadata(self):
return self._meta
def set_meta(self):
"""Load metadata using parser dispatch"""
self._meta = dispatch_metadata_parser(self.path)
class ImageLocalOME(BaseLocalImage):
"""
......@@ -145,78 +186,75 @@ class ImageLocalOME(BaseLocalImage):
in which a multidimensional tiff image contains the metadata.
"""
def __init__(self, path: str, dimorder=None):
def __init__(self, path: str, dimorder=None, **kwargs):
"""Initialise using file name."""
super().__init__(path)
self._id = str(path)
self.set_meta(str(path))
def set_meta(self):
def set_meta(self, path):
"""Get metadata from the associated tiff file."""
meta = dict()
try:
with TiffFile(path) as f:
self._meta = xmltodict.parse(f.ome_metadata)["OME"]
self.meta = xmltodict.parse(f.ome_metadata)["OME"]
for dim in self.dimorder:
meta["size_" + dim.lower()] = int(
self._meta["Image"]["Pixels"]["@Size" + dim]
self.meta["Image"]["Pixels"]["@Size" + dim]
)
meta["channels"] = [
x["@Name"]
for x in self._meta["Image"]["Pixels"]["Channel"]
x["@Name"] for x in self.meta["Image"]["Pixels"]["Channel"]
]
meta["name"] = self._meta["Image"]["@Name"]
meta["type"] = self._meta["Image"]["Pixels"]["@Type"]
except Exception as e: # Images not in OMEXML
meta["name"] = self.meta["Image"]["@Name"]
meta["type"] = self.meta["Image"]["Pixels"]["@Type"]
except Exception as e:
# images not in OMEXML
print("Warning:Metadata not found: {}".format(e))
print(
f"Warning: No dimensional info provided. Assuming {self._default_dimorder}"
"Warning: No dimensional info provided. "
f"Assuming {self.default_dimorder}"
)
# Mark non-existent dimensions for padding
self.base = self._default_dimorder
# self.ids = [self.index(i) for i in dimorder]
self._dimorder = base
self._meta = meta
# mark non-existent dimensions for padding
self.base = self.default_dimorder
self.dimorder = self.base
self.meta = meta
@property
def name(self):
return self._meta["name"]
return self.meta["name"]
@property
def date(self):
date_str = [
x
for x in self._meta["StructuredAnnotations"]["TagAnnotation"]
for x in self.meta["StructuredAnnotations"]["TagAnnotation"]
if x["Description"] == "Date"
][0]["Value"]
return datetime.strptime(date_str, "%d-%b-%Y")
@property
def dimorder(self):
"""Order of dimensions in image"""
if not hasattr(self, "_dimorder"):
self._dimorder = self._meta["Image"]["Pixels"]["@DimensionOrder"]
return self._dimorder
"""Return order of dimensions in the image."""
if not hasattr(self, "dimorder"):
self.dimorder = self.meta["Image"]["Pixels"]["@DimensionOrder"]
return self.dimorder
@dimorder.setter
def dimorder(self, order: str):
self._dimorder = order
return self._dimorder
self.dimorder = order
return self.dimorder
def get_data_lazy(self) -> da.Array:
"""Return 5D dask array. For lazy-loading multidimensional tiff files"""
"""Return 5D dask array via lazy-loading of tiff files."""
if not hasattr(self, "formatted_img"):
if not hasattr(self, "ids"): # Standard dimension order
if not hasattr(self, "ids"):
# standard order of image dimensions
img = (imread(str(self.path))[0],)
else: # Custom dimension order, we rearrange the axes for compatibility
else:
# bespoke order, so rearrange axes for compatibility
img = imread(str(self.path))[0]
for i, d in enumerate(self._dimorder):
self._meta["size_" + d.lower()] = img.shape[i]
for i, d in enumerate(self.dimorder):
self.meta["size_" + d.lower()] = img.shape[i]
target_order = (
*self.ids,
*[
......@@ -235,51 +273,46 @@ class ImageLocalOME(BaseLocalImage):
img = da.moveaxis(
reshaped, range(len(reshaped.shape)), target_order
)
return self.rechunk_data(img)
class ImageDir(BaseLocalImage):
"""
Standard image class for tiff files.
Image class for the case in which all images are split in one or
multiple folders with time-points and channels as independent files.
It inherits from BaseLocalImage so we only override methods that are critical.
Assumptions:
- One folders per position.
- One folder per position.
- Images are flat.
- Channel, Time, z-stack and the others are determined by filenames.
- Provides Dimorder as it is set in the filenames, or expects order during instatiation
- Provides Dimorder as it is set in the filenames, or expects order
"""
def __init__(self, path: t.Union[str, Path], **kwargs):
"""Initialise using file name."""
super().__init__(path)
self.image_id = str(self.path.stem)
self._meta = dir_to_meta(self.path)
self.meta = files_to_image_sizes(self.path)
def get_data_lazy(self) -> da.Array:
"""Return 5D dask array. For lazy-loading local multidimensional tiff files"""
"""Return 5D dask array."""
img = imread(str(self.path / "*.tiff"))
# If extra channels, pick the first stack of the last dimensions
while len(img.shape) > 3:
img = img[..., 0]
if self._meta:
self._meta["size_x"], self._meta["size_y"] = img.shape[-2:]
if self.meta:
self.meta["size_x"], self.meta["size_y"] = img.shape[-2:]
# Reshape using metadata
# img = da.reshape(img, (*self._meta, *img.shape[1:]))
img = da.reshape(img, self._meta.values())
img = da.reshape(img, self.meta.values())
original_order = [
i[-1] for i in self._meta.keys() if i.startswith("size")
i[-1] for i in self.meta.keys() if i.startswith("size")
]
# Swap axis to conform with normal order
target_order = [
self._default_dimorder.index(x) for x in original_order
self.default_dimorder.index(x) for x in original_order
]
img = da.moveaxis(
img,
......@@ -291,38 +324,42 @@ class ImageDir(BaseLocalImage):
@property
def name(self):
"""Return name of image directory."""
return self.path.stem
@property
def dimorder(self):
# Assumes only dimensions start with "size"
return [
k.split("_")[-1] for k in self._meta.keys() if k.startswith("size")
k.split("_")[-1] for k in self.meta.keys() if k.startswith("size")
]
class ImageZarr(BaseLocalImage):
"""
Read zarr compressed files.
These are outputed by the script
These files are generated by the script
skeletons/scripts/howto_omero/convert_clone_zarr_to_tiff.py
"""
def __init__(self, path: t.Union[str, Path], **kwargs):
"""Initialise using file name."""
super().__init__(path)
self.set_meta()
try:
self._img = zarr.open(self.path)
self.add_size_to_meta()
except Exception as e:
print(f"Could not add size info to metadata: {e}")
print(f"ImageZarr: Could not add size info to metadata: {e}.")
def get_data_lazy(self) -> da.Array:
"""Return 5D dask array. For lazy-loading local multidimensional zarr files"""
"""Return 5D dask array for lazy-loading local multidimensional zarr files."""
return self._img
def add_size_to_meta(self):
self._meta.update(
"""Add shape of image array to metadata."""
self.meta.update(
{
f"size_{dim}": shape
for dim, shape in zip(self.dimorder, self._img.shape)
......@@ -331,126 +368,10 @@ class ImageZarr(BaseLocalImage):
@property
def name(self):
"""Return name of zarr directory."""
return self.path.stem
@property
def dimorder(self):
# FIXME hardcoded order based on zarr compression/cloning script
"""Impose a hard-coded order of dimensions based on the zarr compression script."""
return "TCZYX"
# Assumes only dimensions start with "size"
# return [
# k.split("_")[-1] for k in self._meta.keys() if k.startswith("size")
# ]
class ImageDummy(BaseLocalImage):
"""
Dummy Image class.
ImageDummy mimics the other Image classes in such a way that it is accepted
by Tiler. The purpose of this class is for testing, in particular,
identifying silent failures. If something goes wrong, we should be able to
know whether it is because of bad parameters or bad input data.
For the purposes of testing parameters, ImageDummy assumes that we already
know the tiler parameters before Image instances are instantiated. This is
true for a typical pipeline run.
"""
def __init__(self, tiler_parameters: dict):
"""Builds image instance
Parameters
----------
tiler_parameters : dict
Tiler parameters, in dict form. Following
aliby.tile.tiler.TilerParameters, the keys are: "tile_size" (size of
tile), "ref_channel" (reference channel for tiling), and "ref_z"
(reference z-stack, 0 to choose a default).
"""
self.ref_channel = tiler_parameters["ref_channel"]
self.ref_z = tiler_parameters["ref_z"]
# Goal: make Tiler happy.
@staticmethod
def pad_array(
image_array: da.Array,
dim: int,
n_empty_slices: int,
image_position: int = 0,
):
"""Extends a dimension in a dask array and pads with zeros
Extends a dimension in a dask array that has existing content, then pads
with zeros.
Parameters
----------
image_array : da.Array
Input dask array
dim : int
Dimension in which to extend the dask array.
n_empty_slices : int
Number of empty slices to extend the dask array by, in the specified
dimension/axis.
image_position : int
Position within the new dimension to place the input arary, default 0
(the beginning).
Examples
--------
```
extended_array = pad_array(
my_da_array, dim = 2, n_empty_slices = 4, image_position = 1)
```
Extends a dask array called `my_da_array` in the 3rd dimension
(dimensions start from 0) by 4 slices, filled with zeros. And puts the
original content in slice 1 of the 3rd dimension
"""
# Concats zero arrays with same dimensions as image_array, and puts
# image_array as first element in list of arrays to be concatenated
zeros_array = da.zeros_like(image_array)
return da.concatenate(
[
*([zeros_array] * image_position),
image_array,
*([zeros_array] * (n_empty_slices - image_position)),
],
axis=dim,
)
# Logic: We want to return a image instance
def get_data_lazy(self) -> da.Array:
"""Return 5D dask array. For lazy-loading multidimensional tiff files. Dummy image."""
examples_dir = get_examples_dir()
# TODO: Make this robust to having multiple TIFF images, one for each z-section,
# all falling under the same "pypipeline_unit_test_00_000001_Brightfield_*.tif"
# naming scheme. The aim is to create a multidimensional dask array that stores
# the z-stacks.
img_filename = "pypipeline_unit_test_00_000001_Brightfield_003.tif"
img_path = examples_dir / img_filename
# img is a dask array has three dimensions: z, x, y
# TODO: Write a test to confirm this: If everything worked well,
# z = 1, x = 1200, y = 1200
img = imread(str(img_path))
# Adds t & c dimensions
img = da.reshape(
img, (1, 1, img.shape[-3], img.shape[-2], img.shape[-1])
)
# Pads t, c, and z dimensions
img = self.pad_array(
img, dim=0, n_empty_slices=199
) # 200 timepoints total
img = self.pad_array(img, dim=1, n_empty_slices=2) # 3 channels
img = self.pad_array(
img, dim=2, n_empty_slices=4, image_position=self.ref_z
) # 5 z-stacks
return img
@property
def name(self):
pass
@property
def dimorder(self):
pass
......@@ -4,7 +4,6 @@ Tools to manage I/O using a remote OMERO server.
import re
import typing as t
from abc import abstractmethod
from pathlib import Path
import dask.array as da
......@@ -131,7 +130,6 @@ class BridgeOmero:
FIXME: Add docs.
"""
# metadata = load_attributes(filepath)
bridge = BridgeH5(filepath)
meta = safe_load(bridge.meta_h5["parameters"])["general"]
server_info = {k: meta[k] for k in ("host", "username", "password")}
......@@ -208,6 +206,13 @@ class Dataset(BridgeOmero):
im.getName(): im.getId() for im in self.ome_class.listChildren()
}
def get_channels(self):
"""Get channels from OMERO."""
for im in self.ome_class.listChildren():
channels = [ch.getLabel() for ch in im.getChannels()]
break
return channels
@property
def files(self):
if not hasattr(self, "_files"):
......@@ -254,7 +259,8 @@ class Dataset(BridgeOmero):
cls,
filepath: t.Union[str, Path],
):
"""Instatiate Dataset from a hdf5 file.
"""
Instantiate data set from a h5 file.
Parameters
----------
......@@ -268,7 +274,6 @@ class Dataset(BridgeOmero):
FIXME: Add docs.
"""
# metadata = load_attributes(filepath)
bridge = BridgeH5(filepath)
dataset_keys = ("omero_id", "omero_id,", "dataset_id")
for k in dataset_keys:
......@@ -301,21 +306,21 @@ class Image(BridgeOmero):
cls,
filepath: t.Union[str, Path],
):
"""Instatiate Image from a hdf5 file.
"""
Instantiate Image from a h5 file.
Parameters
----------
cls : Image
Image class
filepath : t.Union[str, Path]
Location of hdf5 file.
Location of h5 file.
Examples
--------
FIXME: Add docs.
"""
# metadata = load_attributes(filepath)
bridge = BridgeH5(filepath)
image_id = bridge.meta_h5["image_id"]
return cls(image_id, **cls.server_info_from_h5(filepath))
......
This diff is collapsed.
This diff is collapsed.
"""Additional tools to fetch and handle datasets programatically.
"""
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
__version__ = "0.1.64 lite"
This diff is collapsed.
This diff is collapsed.