Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • swain-lab/aliby/aliby-mirror
  • swain-lab/aliby/alibylite
2 results
Show changes
Showing
with 1764 additions and 699 deletions
#!/usr/bin/env jupyter
"""
Utilities based on association are used to efficiently acquire indices of tracklets with some kind of relationship.
This can be:
- Cells that are to be merged
- Cells that have a linear relationship
"""
import numpy as np
import typing as t
def validate_association(
association: np.ndarray,
indices: np.ndarray,
match_column: t.Optional[int] = None,
) -> t.Tuple[np.ndarray, np.ndarray]:
"""Select rows from the first array that are present in both.
We use casting for fast multiindexing, generalising for lineage dynamics
Parameters
----------
association : np.ndarray
2-D array where columns are (trap, mother, daughter) or 3-D array where
dimensions are (X,trap,2), containing tuples ((trap,mother), (trap,daughter))
across the 3rd dimension.
indices : np.ndarray
2-D array where each column is a different level. This should not include mother_label.
match_column: int
int indicating a specific column is required to match (i.e.
0-1 for target-source when trying to merge tracklets or mother-bud for lineage)
must be present in indices. If it is false one match suffices for the resultant indices
vector to be True.
Returns
-------
np.ndarray
1-D boolean array indicating valid merge events.
np.ndarray
1-D boolean array indicating indices with an association relationship.
Examples
--------
>>> import numpy as np
>>> from agora.utils.indexing import validate_association
>>> merges = np.array(range(12)).reshape(3,2,2)
>>> indices = np.array(range(6)).reshape(3,2)
>>> print(merges, indices)
>>> print(merges); print(indices)
[[[ 0 1]
[ 2 3]]
[[ 4 5]
[ 6 7]]
[[ 8 9]
[10 11]]]
[[0 1]
[2 3]
[4 5]]
>>> valid_associations, valid_indices = validate_association(merges, indices)
>>> print(valid_associations, valid_indices)
[ True False False] [ True True False]
"""
if association.ndim == 2:
# Reshape into 3-D array for broadcasting if neded
# association = np.stack(
# (association[:, [0, 1]], association[:, [0, 2]]), axis=1
# )
association = _assoc_indices_to_3d(association)
# Compare existing association with available indices
# Swap trap and label axes for the association array to correctly cast
valid_ndassociation = association[..., None] == indices.T[None, ...]
# Broadcasting is confusing (but efficient):
# First we check the dimension across trap and cell id, to ensure both match
valid_cell_ids = valid_ndassociation.all(axis=2)
if match_column is None:
# Then we check the merge tuples to check which cases have both target and source
valid_association = valid_cell_ids.any(axis=2).all(axis=1)
# Finally we check the dimension that crosses all indices, to ensure the pair
# is present in a valid merge event.
valid_indices = (
valid_ndassociation[valid_association].all(axis=2).any(axis=(0, 1))
)
else: # We fetch specific indices if we aim for the ones with one present
valid_indices = valid_cell_ids[:, match_column].any(axis=0)
# Valid association then becomes a boolean array, true means that there is a
# match (match_column) between that cell and the index
valid_association = (
valid_cell_ids[:, match_column] & valid_indices
).any(axis=1)
return valid_association, valid_indices
def _assoc_indices_to_3d(ndarray: np.ndarray):
"""
Convert the last column to a new row while repeating all previous indices.
This is useful when converting a signal multiindex before comparing association.
Assumes the input array has shape (N,3)
"""
result = ndarray
if len(ndarray) and ndarray.ndim > 1:
if ndarray.shape[1] == 3: # Faster indexing for single positions
result = np.transpose(
np.hstack((ndarray[:, [0]], ndarray)).reshape(-1, 2, 2),
axes=[0, 2, 1],
)
else: # 20% slower but more general indexing
columns = np.arange(ndarray.shape[1])
result = np.stack(
(
ndarray[:, np.delete(columns, -1)],
ndarray[:, np.delete(columns, -2)],
),
axis=1,
)
return result
def _3d_index_to_2d(array: np.ndarray):
"""
Opposite to _assoc_indices_to_3d.
"""
result = array
if len(array):
result = np.concatenate(
(array[:, 0, :], array[:, 1, 1, np.newaxis]), axis=1
)
return result
def compare_indices(x: np.ndarray, y: np.ndarray) -> np.ndarray:
"""
Fetch two 2-D indices and return a binary 2-D matrix
where a True value links two cells where all cells are the same
"""
return (x[..., None] == y.T[None, ...]).all(axis=1)
......@@ -6,6 +6,8 @@ import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
from agora.utils.indexing import validate_association
index_row = t.Tuple[str, str, int, int]
......@@ -120,7 +122,9 @@ def bidirectional_retainment_filter(
def melt_reset(df: pd.DataFrame, additional_ids: t.Dict[str, pd.Series] = {}):
new_df = add_index_levels(df, additional_ids)
return new_df.melt(ignore_index=False).reset_index()
return new_df.melt(
ignore_index=False, var_name="time (minutes)", value_name="signal"
).reset_index()
# Drop cells that if used would reduce info the most
......@@ -163,3 +167,79 @@ def slices_from_spans(spans: t.Tuple[int], df: pd.DataFrame) -> t.List[slice]:
for start, end in zip(cumsum[:-1], cumsum[1:])
]
return slices
def drop_mother_label(index: pd.MultiIndex) -> np.ndarray:
no_mother_label = index
if "mother_label" in index.names:
no_mother_label = index.droplevel("mother_label")
return np.array(no_mother_label.tolist())
def get_index_as_np(signal: pd.DataFrame):
# Get mother labels from multiindex dataframe
return np.array(signal.index.to_list())
def standard_filtering(
raw: pd.DataFrame,
lin: np.ndarray,
presence_high: float = 0.8,
presence_low: int = 7,
):
# Get all mothers
_, valid_indices = validate_association(
lin, np.array(raw.index.to_list()), match_column=0
)
in_lineage = raw.loc[valid_indices]
# Filter mothers by presence
present = in_lineage.loc[
in_lineage.notna().sum(axis=1) > (in_lineage.shape[1] * presence_high)
]
# Get indices
indices = np.array(present.index.to_list())
to_cast = np.stack((lin[:, :2], lin[:, [0, 2]]), axis=1)
ndin = to_cast[..., None] == indices.T[None, ...]
# use indices to fetch all daughters
valid_association = ndin.all(axis=2)[:, 0].any(axis=-1)
# Remove repeats
mothers, daughters = np.split(to_cast[valid_association], 2, axis=1)
mothers = mothers[:, 0]
daughters = daughters[:, 0]
d_m_dict = {tuple(d): m[-1] for m, d in zip(mothers, daughters)}
# assuming unique sorts
raw_mothers = raw.loc[_as_tuples(mothers)]
raw_mothers["mother_label"] = 0
raw_daughters = raw.loc[_as_tuples(daughters)]
raw_daughters["mother_label"] = d_m_dict.values()
concat = pd.concat((raw_mothers, raw_daughters)).sort_index()
concat.set_index("mother_label", append=True, inplace=True)
# Last filter to remove tracklets that are too short
removed_buds = concat.notna().sum(axis=1) <= presence_low
filt = concat.loc[~removed_buds]
# We check that no mothers are left child-less
m_d_dict = {tuple(m): [] for m in mothers}
for (trap, d), m in d_m_dict.items():
m_d_dict[(trap, m)].append(d)
for trap, daughter, mother in concat.index[removed_buds]:
idx_to_delete = m_d_dict[(trap, mother)].index(daughter)
del m_d_dict[(trap, mother)][idx_to_delete]
bud_free = []
for m, d in m_d_dict.items():
if not d:
bud_free.append(m)
final_result = filt.drop(bud_free)
# In the end, we get the mothers present for more than {presence_lineage1}% of the experiment
# and their tracklets present for more than {presence_lineage2} time-points
return final_result
......@@ -9,7 +9,7 @@ import numpy as np
import pandas as pd
from utils_find_1st import cmp_larger, find_1st
from agora.utils.association import validate_association
from agora.utils.indexing import compare_indices, validate_association
def apply_merges(data: pd.DataFrame, merges: np.ndarray):
......@@ -31,23 +31,29 @@ def apply_merges(data: pd.DataFrame, merges: np.ndarray):
"""
indices = data.index
if "mother_label" in indices.names:
indices = indices.droplevel("mother_label")
valid_merges, indices = validate_association(
merges, np.array(list(data.index))
merges, np.array(list(indices))
)
# Assign non-merged
merged = data.loc[~indices]
# Implement the merges and drop source rows.
# TODO Use matrices to perform merges in batch
# for ecficiency
if valid_merges.any():
to_merge = data.loc[indices]
for target, source in merges[valid_merges]:
target, source = tuple(target), tuple(source)
targets, sources = zip(*merges[valid_merges])
for source, target in zip(sources, targets):
target = tuple(target)
to_merge.loc[target] = join_tracks_pair(
to_merge.loc[target].values,
to_merge.loc[source].values,
to_merge.loc[tuple(source)].values,
)
to_merge.drop(source, inplace=True)
to_merge.drop(map(tuple, sources), inplace=True)
merged = pd.concat((merged, to_merge), names=data.index.names)
return merged
......@@ -56,9 +62,85 @@ def apply_merges(data: pd.DataFrame, merges: np.ndarray):
def join_tracks_pair(target: np.ndarray, source: np.ndarray) -> np.ndarray:
"""
Join two tracks and return the new value of the target.
TODO replace this with arrays only.
"""
target_copy = copy(target)
target_copy = target
end = find_1st(target_copy[::-1], 0, cmp_larger)
target_copy[-end:] = source[-end:]
return target_copy
def group_merges(merges: np.ndarray) -> t.List[t.Tuple]:
# Return a list where the cell is present as source and target
# (multimerges)
sources_targets = compare_indices(merges[:, 0, :], merges[:, 1, :])
is_multimerge = sources_targets.any(axis=0) | sources_targets.any(axis=1)
is_monomerge = ~is_multimerge
multimerge_subsets = union_find(zip(*np.where(sources_targets)))
merge_groups = [merges[np.array(tuple(x))] for x in multimerge_subsets]
sorted_merges = list(map(sort_association, merge_groups))
# Ensure that source and target are at the edges
return [
*sorted_merges,
*[[event] for event in merges[is_monomerge]],
]
def union_find(lsts):
sets = [set(lst) for lst in lsts if lst]
merged = True
while merged:
merged = False
results = []
while sets:
common, rest = sets[0], sets[1:]
sets = []
for x in rest:
if x.isdisjoint(common):
sets.append(x)
else:
merged = True
common |= x
results.append(common)
sets = results
return sets
def sort_association(array: np.ndarray):
# Sort the internal associations
order = np.where(
(array[:, 0, ..., None] == array[:, 1].T[None, ...]).all(axis=1)
)
res = []
[res.append(x) for x in np.flip(order).flatten() if x not in res]
sorted_array = array[np.array(res)]
return sorted_array
def merge_association(
association: np.ndarray, merges: np.ndarray
) -> np.ndarray:
grouped_merges = group_merges(merges)
flat_indices = association.reshape(-1, 2)
comparison_mat = compare_indices(merges[:, 0], flat_indices)
valid_indices = comparison_mat.any(axis=0)
if valid_indices.any(): # Where valid, perform transformation
replacement_d = {}
for dataset in grouped_merges:
for k in dataset:
replacement_d[tuple(k[0])] = dataset[-1][1]
flat_indices[valid_indices] = [
replacement_d[tuple(i)] for i in flat_indices[valid_indices]
]
merged_indices = flat_indices.reshape(-1, 2, 2)
return merged_indices
......@@ -6,7 +6,7 @@ import logging
import re
import time
import typing as t
from pathlib import Path, PosixPath
from pathlib import Path
from time import perf_counter
import baby.errors
......@@ -108,9 +108,7 @@ class BabyParameters(ParametersABC):
tf_version=2,
)
def update_baby_modelset(
self, path: t.Union[str, PosixPath, t.Dict[str, str]]
):
def update_baby_modelset(self, path: t.Union[str, Path, t.Dict[str, str]]):
"""
Replace default BABY model and flattener with another one from a folder outputted
by our standard retraining script.
......@@ -141,6 +139,14 @@ class BabyRunner(StepABC):
if parameters is None
else parameters.model_config
)
tiler_z = self.tiler.shape[-3]
model_name = self.model_config["flattener_file"]
if tiler_z != 5:
assert (
f"{tiler_z}z" in model_name
), f"Tiler z-stack ({tiler_z}) and Model shape ({model_name}) do not match "
self.brain = BabyBrain(**self.model_config)
self.crawler = BabyCrawler(self.brain)
self.bf_channel = self.tiler.ref_channel_index
......
#!/usr/bin/env jupyter
"""
Command Line Interface utilities.
"""
"""
Asynchronous annotation (in one thread). Used as a base to build threading-based annotation.
Currently only works on UNIX-like systems due to using "/" to split addresses.
Usage example
From python
$ python annotator.py --image_path path/to/folder/with/h5files --results_path path/to/folder/with/images/zarr --pos position_name --ncells max_n_to_annotate
As executable (installed via poetry)
$ annotator.py --image_path path/to/folder/with/h5files --results_path path/to/folder/with/images/zarr --pos position_name --ncells max_n_to_annotate
During annotation:
- Assign a (binary) label by typing '1' or '2'.
- Type 'u' to undo.
- Type 's' to skip.
- Type 'q' to quit.
File will be saved in: ./YYYY-MM-DD_annotation/annotation.csv, where YYYY-MM-DD is the current date.
"""
import argparse
import logging
import typing as t
from copy import copy
from datetime import datetime
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import readchar
import trio
from agora.utils.cast import _str_to_int
from aliby.utils.vis_tools import _sample_n_tiles_masks
from aliby.utils.plot import stretch
# Remove logging warnings
logging.getLogger("aliby").setLevel(40)
# Defaults
essential = {"image_path": "zarr", "results_path": "h5"}
param_values = dict(
out_dir=f"./{datetime.today().strftime('%Y_%m_%d')}_annotation/",
pos=None,
ncells=100,
min_tp=100,
max_tp=150,
seed=0,
)
annot_filename = "annotation.csv"
# Parsing
parser = argparse.ArgumentParser(
prog="aliby-annot-binary",
description="Annotate cells in a binary manner",
)
for i, arg in enumerate((*essential, *param_values)):
parser.add_argument(
f"--{arg}",
action="store",
default=param_values.get(arg),
required=i < len(essential),
)
args = parser.parse_args()
for i, k in enumerate((*essential, *param_values.keys())):
# Assign essential values as-is
if i < len(essential):
param_values[k] = getattr(args, k)
# Fill additional values
if passed_value := getattr(args, k):
param_values[k] = passed_value
try:
param_values[k] = _str_to_int(passed_value)
except Exception as exc:
pass
for k, suffix in essential.items(): # Autocomplete if fullpath not provided
if not str(param_values[k]).endswith(suffix):
param_values[k] = (
Path(param_values[k]) / f"{ param_values['pos'] }.{suffix}"
)
# Functions
async def generate_image(stack, skip: bool = False):
await trio.sleep(1)
result = np.random.randint(100, size=(10, 10))
stack.append(result)
async def draw(data, drawing):
if len(drawing) > 1:
for ax, img in zip(drawing, data):
if np.isnan(img).sum(): # Stretch masked channel
img = stretch(img)
ax.set_data(img)
else:
drawing.set_data(data)
plt.draw()
plt.pause(0.1)
def annotate_image(current_key=None, valid_values: t.Tuple[int] = (1, 2)):
# Show image to annotate
while current_key is None or current_key not in valid_values:
if current_key is not None:
print(
f"Invalid value. Please try with valid values {valid_values}"
)
if (current_key := readchar.readkey()) in "qsu":
# if (current_key := input()) in "qsu":
break
current_key = _parse_input(current_key, valid_values)
return current_key
async def generate_image(
generator,
location_stack: t.List[t.Tuple[np.ndarray, t.Tuple[int, int, int]]],
):
new_location_image = next(generator)
location_stack.append((new_location_image[0], new_location_image[1]))
def _parse_input(value: str, valid_values: t.Tuple[int]):
try:
return int(value)
except:
print(
f"Non-parsable value. Please try again with valid values {valid_values}"
)
return None
def write_annotation(
experiment_position: str,
out_dir: Path,
annotation: str,
location_stack: t.Tuple[t.Tuple[int, int, int], np.ndarray],
):
location, stack = location_stack
unique_location = list(map(str, (*experiment_position, *location)))
write_into_file(
out_dir / annot_filename,
",".join((*unique_location, str(annotation))) + "\n",
)
bg_zero = copy(stack[1])
bg_zero[np.isnan(bg_zero)] = 0
tosave = np.stack((stack[0], bg_zero.astype(int)))
# np.savez(out_dir / f"{'_'.join( unique_location )}.npz", tosave)
np.save(out_dir / f"{'.'.join( unique_location )}.npy", tosave)
def write_into_file(file_path: str, line: str):
with open(file_path, "a") as f:
f.write(str(line))
async def annotate_images(
image_path, results_path, out_dir, ncells, seed, interval
):
preemptive_cache = 3
location_stack = []
out_dir = Path(out_dir)
out_annot_file = str(out_dir / annot_filename)
generator = _sample_n_tiles_masks(
image_path, results_path, ncells, seed=seed, interval=interval
)
# Fetch a few positions preemtively
async with trio.open_nursery() as nursery:
for _ in range(preemptive_cache):
nursery.start_soon(generate_image, generator, location_stack)
print("parent: waiting for first annotations.")
_, ax = plt.subplots(figsize=(10, 8))
while not location_stack: # Wait until first image is loaded
await trio.sleep(0.1)
from aliby.utils.plot import plot_overlay
# drawing = ax.imshow(location_stack[0][1])
axes = plot_overlay(*location_stack[0][1], ax=ax.axes)
plt.show(block=False)
plt.draw()
plt.pause(0.5) # May be adjusted based on display speed
try:
out_dir.mkdir(parents=True)
except:
pass
if not Path(out_annot_file).exists():
write_into_file(
out_annot_file,
",".join(
(
"experiment",
"position",
"tile",
"cell_label",
"tp",
"annotation",
)
)
+ "\n",
)
# Loop until n_max or quit
for i in range(1, ncells - preemptive_cache + 1):
# Wait for input
print("Enter a key")
annotation = str(annotate_image())
if annotation == "q":
break
elif annotation == "s":
print("Skipping...")
# continue
elif annotation == "u":
i -= 1
elif isinstance(_str_to_int(annotation), int):
write_annotation(
str(results_path).split(".")[0].split("/")[-2:],
out_dir,
annotation,
location_stack[i],
)
print(location_stack[i][0])
# Append into annotations file
async with trio.open_nursery() as nursery:
nursery.start_soon(generate_image, generator, location_stack)
nursery.start_soon(draw, location_stack[i][1], axes)
print("Annotation done!")
# if __name__ == "__main__":
def annotate():
if any([param_values.get(k) is None for k in ("min_tp", "max_tp")]):
interval = None
else:
interval = (param_values["min_tp"], param_values["max_tp"])
print(param_values)
trio.run(
annotate_images,
param_values["image_path"],
param_values["results_path"],
param_values["out_dir"],
param_values["ncells"],
param_values["seed"],
interval,
)
#!/usr/bin/env jupyter
import argparse
from agora.utils.cast import _str_to_int
from aliby.pipeline import Pipeline, PipelineParameters
def run():
import argparse
from aliby.pipeline import Pipeline, PipelineParameters
def run():
"""
Run a default microscopy analysis pipeline.
Parse command-line arguments and set default parameter values for running a pipeline, then
construct and execute the pipeline with the parameters obtained. Command-line arguments can
override default parameter values. If a command-line argument is a string representation of
an integer, convert it to an integer.
Returns
-------
None
Examples
--------
FIXME: Add docs.
"""
parser = argparse.ArgumentParser(
prog="aliby-run",
description="Run a default microscopy analysis pipeline",
......@@ -22,23 +38,13 @@ def run():
"password": None,
}
def _cast_str(x: str or None):
"""
Cast string as int if possible. If Nonetype return None.
"""
if x:
try:
return int(x)
except:
return x
for k in param_values:
parser.add_argument(f"--{k}", action="store")
args = parser.parse_args()
for k in param_values:
if passed_value := _cast_str(getattr(args, k)):
if passed_value := _str_to_int(getattr(args, k)):
param_values[k] = passed_value
......
......@@ -10,49 +10,54 @@ import shutil
import time
import typing as t
from abc import ABC, abstractproperty, abstractmethod
from pathlib import Path, PosixPath
from pathlib import Path
from agora.io.bridge import BridgeH5
from aliby.io.image import ImageLocalOME
def dispatch_dataset(expt_id: int or str, **kwargs):
"""
Choose a subtype of dataset based on the identifier.
Find paths to the data.
Input:
--------
expt_id: int or string serving as dataset identifier.
Connects to OMERO if data is remotely available.
Returns:
--------
Callable Dataset instance, either network-dependent or local.
"""
if isinstance(expt_id, int): # Is an experiment online
Parameters
----------
expt_id: int or str
To identify the data, either an OMERO ID or an OME-TIFF file or a local directory.
Returns
-------
A callable Dataset instance, either network-dependent or local.
"""
if isinstance(expt_id, int):
# data available online
from aliby.io.omero import Dataset
return Dataset(expt_id, **kwargs)
elif isinstance(expt_id, str): # Files or Dir
elif isinstance(expt_id, str):
# data available locally
expt_path = Path(expt_id)
if expt_path.is_dir():
# data in multiple folders
return DatasetLocalDir(expt_path)
else:
# data in one folder as OME-TIFF files
return DatasetLocalOME(expt_path)
else:
raise Warning("Invalid expt_id")
raise Warning(f"{expt_id} is an invalid expt_id")
class DatasetLocalABC(ABC):
"""
Abstract Base class to fetch local files, either OME-XML or raw images.
Abstract Base class to find local files, either OME-XML or raw images.
"""
_valid_suffixes = ("tiff", "png")
_valid_suffixes = ("tiff", "png", "zarr")
_valid_meta_suffixes = ("txt", "log")
def __init__(self, dpath: t.Union[str, PosixPath], *args, **kwargs):
def __init__(self, dpath: t.Union[str, Path], *args, **kwargs):
self.path = Path(dpath)
def __enter__(self):
......@@ -73,12 +78,9 @@ class DatasetLocalABC(ABC):
def unique_name(self):
return self.path.name
@abstractproperty
def date(self):
pass
@property
def files(self):
"""Return a dictionary with any available metadata files."""
if not hasattr(self, "_files"):
self._files = {
f: f
......@@ -91,65 +93,69 @@ class DatasetLocalABC(ABC):
return self._files
def cache_logs(self, root_dir):
# Copy metadata files to results folder
"""Copy metadata files to results folder."""
for name, annotation in self.files.items():
shutil.copy(annotation, root_dir / name.name)
return True
@abstractproperty
def date(self):
pass
@abstractmethod
def get_images(self):
# Return a dictionary with the name of images and their unique identifiers
pass
class DatasetLocalDir(DatasetLocalABC):
"""
Organise an entire dataset, composed of multiple images, as a directory containing directories with individual files.
It relies on ImageDir to manage images.
"""
"""Find paths to a data set, comprising multiple images in different folders."""
def __init__(self, dpath: t.Union[str, PosixPath], *args, **kwargs):
def __init__(self, dpath: t.Union[str, Path], *args, **kwargs):
super().__init__(dpath)
@property
def date(self):
# Use folder creation date, for cases where metadata is minimal
"""Find date when a folder was created."""
return time.strftime(
"%Y%m%d", time.strptime(time.ctime(os.path.getmtime(self.path)))
)
def get_images(self):
return {
folder.name: folder
for folder in self.path.glob("*/")
if any(
"""Return a dictionary of folder or file names and their paths.
FUTURE 3.12 use pathlib is_junction to pick Dir or File
"""
images = {
item.name: item
for item in self.path.glob("*/")
if item.is_dir()
and any(
path
for suffix in self._valid_suffixes
for path in folder.glob(f"*.{suffix}")
for path in item.glob(f"*.{suffix}")
)
or item.suffix[1:] in self._valid_suffixes
}
return images
class DatasetLocalOME(DatasetLocalABC):
"""Load a dataset from a folder
We use a given image of a dataset to obtain the metadata,
as we cannot expect folders to contain this information.
It uses the standard OME-TIFF file format.
"""
class DatasetLocalOME(DatasetLocalABC):
"""Find names of images in a folder, assuming images in OME-TIFF format."""
def __init__(self, dpath: t.Union[str, PosixPath], *args, **kwargs):
def __init__(self, dpath: t.Union[str, Path], *args, **kwargs):
super().__init__(dpath)
assert len(self.get_images()), "No .tiff files found"
assert len(
self.get_images()
), f"No valid files found. Formats are {self._valid_suffixes}"
@property
def date(self):
# Access the date from the metadata of the first position
"""Get the date from the metadata of the first position."""
return ImageLocalOME(list(self.get_images().values())[0]).date
def get_images(self):
# Fetches all valid formats and overwrites if duplicates with different suffix
"""Return a dictionary with the names of the image files."""
return {
f.name: str(f)
for suffix in self._valid_suffixes
......
......@@ -16,15 +16,17 @@ ImageDummy is a dummy class for silent failure testing.
import typing as t
from abc import ABC, abstractmethod, abstractproperty
from datetime import datetime
from importlib_resources import files
from pathlib import Path, PosixPath
from pathlib import Path
import dask.array as da
import numpy as np
import xmltodict
import zarr
from dask.array.image import imread
from importlib_resources import files
from tifffile import TiffFile
from agora.io.metadata import dir_to_meta
from agora.io.metadata import dir_to_meta, dispatch_metadata_parser
def get_examples_dir():
......@@ -32,18 +34,41 @@ def get_examples_dir():
return files("aliby").parent.parent / "examples" / "tiler"
def get_image_class(source: t.Union[str, int, t.Dict[str, str], PosixPath]):
def instantiate_image(
source: t.Union[str, int, t.Dict[str, str], Path], **kwargs
):
"""Wrapper to instatiate the appropiate image
Parameters
----------
source : t.Union[str, int, t.Dict[str, str], Path]
Image identifier
Examples
--------
image_path = "path/to/image"]
with instantiate_image(image_path) as img:
print(imz.data, img.metadata)
"""
return dispatch_image(source)(source, **kwargs)
def dispatch_image(source: t.Union[str, int, t.Dict[str, str], Path]):
"""
Wrapper to pick the appropiate Image class depending on the source of data.
"""
if isinstance(source, int):
if isinstance(source, (int, np.int64)):
from aliby.io.omero import Image
instatiator = Image
elif isinstance(source, dict) or (
isinstance(source, (str, PosixPath)) and Path(source).is_dir()
isinstance(source, (str, Path)) and Path(source).is_dir()
):
instatiator = ImageDir
if Path(source).suffix == ".zarr":
instatiator = ImageZarr
else:
instatiator = ImageDir
elif isinstance(source, str) and Path(source).is_file():
instatiator = ImageLocalOME
else:
......@@ -59,7 +84,7 @@ class BaseLocalImage(ABC):
_default_dimorder = "tczyx"
def __init__(self, path: t.Union[str, PosixPath]):
def __init__(self, path: t.Union[str, Path]):
# If directory, assume contents are naturally sorted
self.path = Path(path)
......@@ -107,118 +132,9 @@ class BaseLocalImage(ABC):
def metadata(self):
return self._meta
class ImageDummy(BaseLocalImage):
"""
Dummy Image class.
ImageDummy mimics the other Image classes in such a way that it is accepted
by Tiler. The purpose of this class is for testing, in particular,
identifying silent failures. If something goes wrong, we should be able to
know whether it is because of bad parameters or bad input data.
For the purposes of testing parameters, ImageDummy assumes that we already
know the tiler parameters before Image instances are instantiated. This is
true for a typical pipeline run.
"""
def __init__(self, tiler_parameters: dict):
"""Builds image instance
Parameters
----------
tiler_parameters : dict
Tiler parameters, in dict form. Following
aliby.tile.tiler.TilerParameters, the keys are: "tile_size" (size of
tile), "ref_channel" (reference channel for tiling), and "ref_z"
(reference z-stack, 0 to choose a default).
"""
self.ref_channel = tiler_parameters["ref_channel"]
self.ref_z = tiler_parameters["ref_z"]
# Goal: make Tiler happy.
@staticmethod
def pad_array(
image_array: da.Array,
dim: int,
n_empty_slices: int,
image_position: int = 0,
):
"""Extends a dimension in a dask array and pads with zeros
Extends a dimension in a dask array that has existing content, then pads
with zeros.
Parameters
----------
image_array : da.Array
Input dask array
dim : int
Dimension in which to extend the dask array.
n_empty_slices : int
Number of empty slices to extend the dask array by, in the specified
dimension/axis.
image_position : int
Position within the new dimension to place the input arary, default 0
(the beginning).
Examples
--------
```
extended_array = pad_array(
my_da_array, dim = 2, n_empty_slices = 4, image_position = 1)
```
Extends a dask array called `my_da_array` in the 3rd dimension
(dimensions start from 0) by 4 slices, filled with zeros. And puts the
original content in slice 1 of the 3rd dimension
"""
# Concats zero arrays with same dimensions as image_array, and puts
# image_array as first element in list of arrays to be concatenated
zeros_array = da.zeros_like(image_array)
return da.concatenate(
[
*([zeros_array] * image_position),
image_array,
*([zeros_array] * (n_empty_slices - image_position)),
],
axis=dim,
)
# Logic: We want to return a image instance
def get_data_lazy(self) -> da.Array:
"""Return 5D dask array. For lazy-loading multidimensional tiff files. Dummy image."""
examples_dir = get_examples_dir()
# TODO: Make this robust to having multiple TIFF images, one for each z-section,
# all falling under the same "pypipeline_unit_test_00_000001_Brightfield_*.tif"
# naming scheme. The aim is to create a multidimensional dask array that stores
# the z-stacks.
img_filename = "pypipeline_unit_test_00_000001_Brightfield_003.tif"
img_path = examples_dir / img_filename
# img is a dask array has three dimensions: z, x, y
# TODO: Write a test to confirm this: If everything worked well,
# z = 1, x = 1200, y = 1200
img = imread(str(img_path))
# Adds t & c dimensions
img = da.reshape(
img, (1, 1, img.shape[-3], img.shape[-2], img.shape[-1])
)
# Pads t, c, and z dimensions
img = self.pad_array(
img, dim=0, n_empty_slices=199
) # 200 timepoints total
img = self.pad_array(img, dim=1, n_empty_slices=2) # 3 channels
img = self.pad_array(
img, dim=2, n_empty_slices=4, image_position=self.ref_z
) # 5 z-stacks
return img
@property
def name(self):
pass
@property
def dimorder(self):
pass
def set_meta(self):
"""Load metadata using parser dispatch"""
self._meta = dispatch_metadata_parser(self.path)
class ImageLocalOME(BaseLocalImage):
......@@ -233,6 +149,7 @@ class ImageLocalOME(BaseLocalImage):
super().__init__(path)
self._id = str(path)
def set_meta(self):
meta = dict()
try:
with TiffFile(path) as f:
......@@ -335,7 +252,7 @@ class ImageDir(BaseLocalImage):
- Provides Dimorder as it is set in the filenames, or expects order during instatiation
"""
def __init__(self, path: t.Union[str, PosixPath], **kwargs):
def __init__(self, path: t.Union[str, Path], **kwargs):
super().__init__(path)
self.image_id = str(self.path.stem)
......@@ -382,3 +299,158 @@ class ImageDir(BaseLocalImage):
return [
k.split("_")[-1] for k in self._meta.keys() if k.startswith("size")
]
class ImageZarr(BaseLocalImage):
"""
Read zarr compressed files.
These are outputed by the script
skeletons/scripts/howto_omero/convert_clone_zarr_to_tiff.py
"""
def __init__(self, path: t.Union[str, Path], **kwargs):
super().__init__(path)
self.set_meta()
try:
self._img = zarr.open(self.path)
self.add_size_to_meta()
except Exception as e:
print(f"Could not add size info to metadata: {e}")
def get_data_lazy(self) -> da.Array:
"""Return 5D dask array. For lazy-loading local multidimensional zarr files"""
return self._img
def add_size_to_meta(self):
self._meta.update(
{
f"size_{dim}": shape
for dim, shape in zip(self.dimorder, self._img.shape)
}
)
@property
def name(self):
return self.path.stem
@property
def dimorder(self):
# FIXME hardcoded order based on zarr compression/cloning script
return "TCZYX"
# Assumes only dimensions start with "size"
# return [
# k.split("_")[-1] for k in self._meta.keys() if k.startswith("size")
# ]
class ImageDummy(BaseLocalImage):
"""
Dummy Image class.
ImageDummy mimics the other Image classes in such a way that it is accepted
by Tiler. The purpose of this class is for testing, in particular,
identifying silent failures. If something goes wrong, we should be able to
know whether it is because of bad parameters or bad input data.
For the purposes of testing parameters, ImageDummy assumes that we already
know the tiler parameters before Image instances are instantiated. This is
true for a typical pipeline run.
"""
def __init__(self, tiler_parameters: dict):
"""Builds image instance
Parameters
----------
tiler_parameters : dict
Tiler parameters, in dict form. Following
aliby.tile.tiler.TilerParameters, the keys are: "tile_size" (size of
tile), "ref_channel" (reference channel for tiling), and "ref_z"
(reference z-stack, 0 to choose a default).
"""
self.ref_channel = tiler_parameters["ref_channel"]
self.ref_z = tiler_parameters["ref_z"]
# Goal: make Tiler happy.
@staticmethod
def pad_array(
image_array: da.Array,
dim: int,
n_empty_slices: int,
image_position: int = 0,
):
"""Extends a dimension in a dask array and pads with zeros
Extends a dimension in a dask array that has existing content, then pads
with zeros.
Parameters
----------
image_array : da.Array
Input dask array
dim : int
Dimension in which to extend the dask array.
n_empty_slices : int
Number of empty slices to extend the dask array by, in the specified
dimension/axis.
image_position : int
Position within the new dimension to place the input arary, default 0
(the beginning).
Examples
--------
```
extended_array = pad_array(
my_da_array, dim = 2, n_empty_slices = 4, image_position = 1)
```
Extends a dask array called `my_da_array` in the 3rd dimension
(dimensions start from 0) by 4 slices, filled with zeros. And puts the
original content in slice 1 of the 3rd dimension
"""
# Concats zero arrays with same dimensions as image_array, and puts
# image_array as first element in list of arrays to be concatenated
zeros_array = da.zeros_like(image_array)
return da.concatenate(
[
*([zeros_array] * image_position),
image_array,
*([zeros_array] * (n_empty_slices - image_position)),
],
axis=dim,
)
# Logic: We want to return a image instance
def get_data_lazy(self) -> da.Array:
"""Return 5D dask array. For lazy-loading multidimensional tiff files. Dummy image."""
examples_dir = get_examples_dir()
# TODO: Make this robust to having multiple TIFF images, one for each z-section,
# all falling under the same "pypipeline_unit_test_00_000001_Brightfield_*.tif"
# naming scheme. The aim is to create a multidimensional dask array that stores
# the z-stacks.
img_filename = "pypipeline_unit_test_00_000001_Brightfield_003.tif"
img_path = examples_dir / img_filename
# img is a dask array has three dimensions: z, x, y
# TODO: Write a test to confirm this: If everything worked well,
# z = 1, x = 1200, y = 1200
img = imread(str(img_path))
# Adds t & c dimensions
img = da.reshape(
img, (1, 1, img.shape[-3], img.shape[-2], img.shape[-1])
)
# Pads t, c, and z dimensions
img = self.pad_array(
img, dim=0, n_empty_slices=199
) # 200 timepoints total
img = self.pad_array(img, dim=1, n_empty_slices=2) # 3 channels
img = self.pad_array(
img, dim=2, n_empty_slices=4, image_position=self.ref_z
) # 5 z-stacks
return img
@property
def name(self):
pass
@property
def dimorder(self):
pass
......@@ -5,7 +5,7 @@ Tools to manage I/O using a remote OMERO server.
import re
import typing as t
from abc import abstractmethod
from pathlib import PosixPath
from pathlib import Path
import dask.array as da
import numpy as np
......@@ -115,7 +115,7 @@ class BridgeOmero:
@classmethod
def server_info_from_h5(
cls,
filepath: t.Union[str, PosixPath],
filepath: t.Union[str, Path],
):
"""Return server info from hdf5 file.
......@@ -123,7 +123,7 @@ class BridgeOmero:
----------
cls : BridgeOmero
BridgeOmero class
filepath : t.Union[str, PosixPath]
filepath : t.Union[str, Path]
Location of hdf5 file.
Examples
......@@ -133,9 +133,8 @@ class BridgeOmero:
"""
# metadata = load_attributes(filepath)
bridge = BridgeH5(filepath)
server_info = safe_load(bridge.meta_h5["parameters"])["general"][
"server_info"
]
meta = safe_load(bridge.meta_h5["parameters"])["general"]
server_info = {k: meta[k] for k in ("host", "username", "password")}
return server_info
def set_id(self, ome_id: int):
......@@ -151,7 +150,7 @@ class BridgeOmero:
return valid_annotations
def add_file_as_annotation(
self, file_to_upload: t.Union[str, PosixPath], **kwargs
self, file_to_upload: t.Union[str, Path], **kwargs
):
"""Upload annotation to object on OMERO server. Only valid in subclasses.
......@@ -171,7 +170,19 @@ class BridgeOmero:
class Dataset(BridgeOmero):
def __init__(self, expt_id: str or int, **server_info):
"""
Tool to interact with Omero Datasets remotely, access their
metadata and associated files and images.
Parameters
----------
expt_id: int Dataset id on server
server_info: dict host, username and password
"""
def __init__(self, expt_id: int, **server_info):
super().__init__(ome_id=expt_id, **server_info)
@property
......@@ -241,7 +252,7 @@ class Dataset(BridgeOmero):
@classmethod
def from_h5(
cls,
filepath: t.Union[str, PosixPath],
filepath: t.Union[str, Path],
):
"""Instatiate Dataset from a hdf5 file.
......@@ -249,7 +260,7 @@ class Dataset(BridgeOmero):
----------
cls : Image
Image class
filepath : t.Union[str, PosixPath]
filepath : t.Union[str, Path]
Location of hdf5 file.
Examples
......@@ -288,7 +299,7 @@ class Image(BridgeOmero):
@classmethod
def from_h5(
cls,
filepath: t.Union[str, PosixPath],
filepath: t.Union[str, Path],
):
"""Instatiate Image from a hdf5 file.
......@@ -296,7 +307,7 @@ class Image(BridgeOmero):
----------
cls : Image
Image class
filepath : t.Union[str, PosixPath]
filepath : t.Union[str, Path]
Location of hdf5 file.
Examples
......
#!/usr/bin/env jupyter
"""
Models that link regions of interest, such as mothers and buds.
"""
#!/usr/bin/env jupyter
"""
Extracted from the baby repository. Bud Tracker algorithm to link
cell outlines as mothers and buds.
"""
# /usr/bin/env jupyter
import pickle
from os.path import join
......
This diff is collapsed.
This diff is collapsed.
"""
A set of utilities for dealing with ALCATRAS traps
"""
"""Functions for identifying and dealing with ALCATRAS traps."""
import numpy as np
from skimage import feature, transform
......@@ -31,10 +28,10 @@ def segment_traps(
**identify_traps_kwargs,
):
"""
Uses an entropy filter and Otsu thresholding to find a trap template,
Use an entropy filter and Otsu thresholding to find a trap template,
which is then passed to identify_trap_locations.
To obtain candidate traps it the major axis length of a tile must be smaller than tilesize.
To obtain candidate traps the major axis length of a tile must be smaller than tilesize.
The hyperparameters have not been optimised.
......@@ -60,7 +57,7 @@ def segment_traps(
Returns
-------
traps: an array of pairs of integers
The coordinates of the centroids of the traps
The coordinates of the centroids of the traps.
"""
# keep a memory of image in case need to re-run
img = image
......@@ -120,7 +117,7 @@ def segment_traps(
for x, y in centroids
]
# make a mean template from all the found traps
mean_template = np.dstack(candidate_templates).astype(int).mean(axis=-1)
mean_template = np.stack(candidate_templates).astype(int).mean(axis=0)
# find traps using the mean trap template
traps = identify_trap_locations(
......@@ -144,17 +141,18 @@ def identify_trap_locations(
image, trap_template, optimize_scale=True, downscale=0.35, trap_size=None
):
"""
Identify the traps in a single image based on a trap template,
which requires the trap template to be similar to the image
(same camera, same magification - ideally the same experiment).
Identify the traps in a single image based on a trap template.
Requires the trap template to be similar to the image
(same camera, same magnification - ideally the same experiment).
Uses normalised correlation in scikit-image's match_template.
Use normalised correlation in scikit-image's to match_template.
The search is speeded up by downscaling both the image and
The search is sped up by down-scaling both the image and
the trap template before running the template matching.
The trap template is rotated and re-scaled to improve matching.
The parameters of the rotation and rescaling are optimised, although
The parameters of the rotation and re-scaling are optimised, although
over restricted ranges.
Parameters
......@@ -229,18 +227,3 @@ def identify_trap_locations(
exclude_border=(trap_size // 3),
)
return coordinates
###############################################################
# functions below here do not appear to be used any more
###############################################################
def stretch_image(image):
# FIXME Used in aliby.utils.imageViewer
image = ((image - image.min()) / (image.max() - image.min())) * 255
minval = np.percentile(image, 2)
maxval = np.percentile(image, 98)
image = np.clip(image, minval, maxval)
image = (image - minval) / (maxval - minval)
return image
\ No newline at end of file
......@@ -6,7 +6,7 @@ import pickle
import typing as t
from collections import Counter
from os.path import dirname, join
from pathlib import Path, PosixPath
from pathlib import Path
import numpy as np
from scipy.optimize import linear_sum_assignment
......@@ -70,11 +70,11 @@ class CellTracker(FeatureCalculator):
if extrafeats is None:
extrafeats = ()
if type(model) is str or type(model) is PosixPath:
if type(model) is str or type(model) is Path:
with open(Path(model), "rb") as f:
model = pickle.load(f)
if type(bak_model) is str or type(bak_model) is PosixPath:
if type(bak_model) is str or type(bak_model) is Path:
with open(Path(bak_model), "rb") as f:
bak_model = pickle.load(f)
......
......@@ -5,7 +5,7 @@ import re
import typing as t
from collections import Counter
from datetime import datetime
from pathlib import Path, PosixPath
from pathlib import Path
import numpy as np
from logfile_parser import Parser
......@@ -56,7 +56,7 @@ class OmeroExplorer:
def acq(self):
return {k: parse_annot(v, "acq") for k, v in self.raw_acq.items()}
def load(self, min_id=18000, min_date=None):
def load(self, min_id=0, min_date=None):
"""
:min_id: int
:min_date: tuple
......@@ -77,7 +77,10 @@ class OmeroExplorer:
# sort by dates
dates = [d.getDate() for d in self._dsets_bak]
self._dsets_bak[:] = [
a for _, a in sorted(zip(dates, self._dsets_bak))
a
for _, a in sorted(
zip(dates, self._dsets_bak), key=lambda x: x[0]
)
]
self._dsets_bak = [
......@@ -389,7 +392,7 @@ def get_chs(exptype):
def load_annot_from_cache(exp_id, cache_dir="cache/"):
# TODO Documentation
if type(cache_dir) is not PosixPath:
if type(cache_dir) is not Path:
cache_dir = Path(cache_dir)
annot_sets = {}
......
This diff is collapsed.
#!/usr/bin/env jupyter
"""
Basic plotting functions for cell visualisation
"""
import typing as t
import numpy as np
from grid_strategy import strategies
from matplotlib import pyplot as plt
def plot_overlay(
bg: np.ndarray, fg: np.ndarray, alpha: float = 1.0, ax=plt
) -> None:
"""
Plot two images, one on top of the other.
"""
ax1 = ax.imshow(
bg, cmap=plt.cm.gray, interpolation="none", interpolation_stage="rgba"
)
ax2 = ax.imshow(
stretch(fg),
alpha=alpha,
interpolation="none",
interpolation_stage="rgba",
)
plt.axis("off")
return ax1, ax2
def plot_overlay_in_square(data: t.Tuple[np.ndarray, np.ndarray]):
"""
Plot images in an automatically-arranged grid.
"""
specs = strategies.SquareStrategy("center").get_grid(len(data))
for i, (gs, (tile, mask)) in enumerate(zip(specs, data)):
ax = plt.subplot(gs)
plot_overlay(tile, mask, ax=ax)
def plot_in_square(data: t.Iterable):
"""
Plot images in an automatically-arranged grid. Only takes one mask
"""
specs = strategies.SquareStrategy("center").get_grid(len(data))
for i, (gs, datum) in enumerate(zip(specs, data)):
ax = plt.subplot(gs)
ax.imshow(datum)
def stretch_clip(image, clip=True):
"""
Performs contrast stretching on an input image.
This function takes an array-like input image and enhances its contrast by adjusting
the dynamic range of pixel values. It first scales the pixel values between 0 and 255,
then clips the values that are below the 2nd percentile or above the 98th percentile.
Finally, the pixel values are scaled to the range between 0 and 1.
Parameters
----------
image : array-like
Input image.
Returns
-------
stretched : ndarray
Contrast-stretched version of the input image.
Examples
--------
FIXME: Add docs.
"""
from copy import copy
image = image[~np.isnan(image)]
image = ((image - image.min()) / (image.max() - image.min())) * 255
if clip:
minval = np.percentile(image, 2)
maxval = np.percentile(image, 98)
image = np.clip(image, minval, maxval)
image = (image - minval) / (maxval - minval)
return image
def stretch(image):
nona = image[~np.isnan(image)]
return (image - nona.min()) / (nona.max() - nona.min())
This diff is collapsed.