Skip to content
Snippets Groups Projects
Commit 529b3208 authored by pswain's avatar pswain
Browse files

more docs

parent d4360872
No related branches found
No related tags found
No related merge requests found
"""
The Extractor applies a metric, such as area or median, to image tiles using the cell masks.
Its methods therefore require both tile images and masks.
Usually one metric is applied per mask, but there are tile-specific backgrounds, which apply one metric per tile.
Extraction follows a three-level tree structure. Channels, such as GFP, are the root level; the second level is the reduction algorithm, such as maximum projection; the last level is the metric -- the specific operation to apply to the cells in the image identified by the mask, such as median -- the median value of the pixels in each cell.
"""
import logging
import typing as t
from time import perf_counter
......@@ -42,9 +33,6 @@ MERGE_FUNS = load_mergefuns()
class ExtractorParameters(ParametersABC):
"""
Base class to define parameters for extraction
:tree: dict of depth n. If not of depth three tree will be filled with Nones
str channel -> U(function,None) reduction -> str metric
"""
def __init__(
......@@ -53,31 +41,36 @@ class ExtractorParameters(ParametersABC):
sub_bg: set = set(),
multichannel_ops: Dict = {},
):
"""
Parameters
----------
tree: dict
Nested dictionary indicating channels, reduction functions and
metrics to be used.
str channel -> U(function,None) reduction -> str metric
If not of depth three, tree will be filled with Nones.
sub_bg: set
multichannel_ops: dict
"""
self.tree = fill_tree(tree)
self.sub_bg = sub_bg
self.multichannel_ops = multichannel_ops
@staticmethod
def guess_from_meta(store_name: str, suffix="fast"):
"""
Make a guess on the parameters using the hdf5 metadata
Find the microscope used from the h5 metadata
Add anything as a suffix, default "fast"
Parameters:
store_name : str or Path indicating the results' storage.
suffix : str to add at the end of the predicted parameter set
Parameters
----------
store_name : str or Path
For a h5 file
suffix : str
Added at the end of the predicted parameter set
"""
with h5py.open(store_name, "r") as f:
microscope = f["/"].attrs.get(
"microscope"
) # TODO Check this with Arin
with h5py.File(store_name, "r") as f:
microscope = f["/"].attrs.get("microscope")
assert microscope, "No metadata found"
return "_".join((microscope, suffix))
@classmethod
......@@ -91,20 +84,30 @@ class ExtractorParameters(ParametersABC):
class Extractor(ProcessABC):
"""
Base class to perform feature extraction.
The Extractor applies a metric, such as area or median, to cells identified in the image tiles using the cell masks.
Its methods therefore require both tile images and masks.
Usually one metric is applied per mask, but there are tile-specific backgrounds (Alan), which apply one metric per tile.
Extraction follows a three-level tree structure. Channels, such as GFP, are the root level; the second level is the reduction algorithm, such as maximum projection; the last level is the metric - the specific operation to apply to the cells in the image identified by the mask, such as median, which is the median value of the pixels in each cell.
Parameters
----------
parameters: core.extractor Parameters
Parameters that include with channels, reduction and
extraction functions to use.
store: str
Path to hdf5 storage file. Must contain cell outlines.
tiler: pipeline-core.core.segmentation tiler
Class that contains or fetches the image to be used for segmentation.
parameters: core.extractor Parameters
Parameters that include with channels, reduction and
extraction functions to use.
store: str
Path to hdf5 storage file. Must contain cell outlines.
tiler: pipeline-core.core.segmentation tiler
Class that contains or fetches the image to be used for segmentation.
"""
default_meta = {"pixel_size": 0.236, "z_size": 0.6, "spacing": 0.6}
default_meta = {
"pixel_size": 0.236,
"z_size": 0.6,
"spacing": 0.6,
}
def __init__(
self,
......@@ -112,11 +115,22 @@ class Extractor(ProcessABC):
store: str = None,
tiler: Tiler = None,
):
"""
Initialise Extractor.
Parameters
----------
parameters: ExtractorParameters object
store: str
Name of h5 file
tiler: Tiler object
"""
self.params = parameters
if store:
self.local = store
self.load_meta()
else: # In case no h5 file is used, just use the parameters straight ahead
else:
# if no h5 file, use the parameters directly
self.meta = {"channel": parameters.to_dict()["tree"].keys()}
if tiler:
self.tiler = tiler
......@@ -124,22 +138,30 @@ class Extractor(ProcessABC):
@classmethod
def from_tiler(
cls, parameters: ExtractorParameters, store: str, tiler: Tiler
cls,
parameters: ExtractorParameters,
store: str,
tiler: Tiler,
):
# initate from tiler
return cls(parameters, store=store, tiler=tiler)
@classmethod
def from_img(
cls, parameters: ExtractorParameters, store: str, img_meta: tuple
cls,
parameters: ExtractorParameters,
store: str,
img_meta: tuple,
):
# initiate from image
return cls(parameters, store=store, tiler=Tiler(*img_meta))
@property
def channels(self):
# returns a tuple of strings of the available channels
if not hasattr(self, "_channels"):
if type(self.params.tree) is dict:
self._channels = tuple(self.params.tree.keys())
return self._channels
@property
......@@ -147,16 +169,18 @@ class Extractor(ProcessABC):
return self.local.split("/")[-1][:-3]
@property
def group(self): # Path within hdf5
def group(self):
# returns path within h5 file
if not hasattr(self, "_out_path"):
self._group = "/extraction/"
return self._group
def load_custom_funs(self):
"""
Load parameters of functions that require them from expt.
These must be loaded within the Extractor instance because their parameters
depend on their experiment's metadata.
Load any necessary parameters for functions.
These parameters must be loaded within an Extractor instance because they depend on the experiment's metadata.
Alan: this is challenging to follow!
"""
funs = set(
[
......@@ -188,21 +212,40 @@ class Extractor(ProcessABC):
self._all_funs = {**self._custom_funs, **FUNS}
def load_meta(self):
# load metadata from h5 file whose name is given by self.local
self.meta = load_attributes(self.local)
def get_traps(
self, tp: int, channels: list = None, z: list = None, **kwargs
) -> tuple:
"""
Finds traps for a given time point and given channels and z-stacks.
Returns None if no traps are found.
Any additional keyword arguments are passed to tiler.get_traps_timepoint
Parameters
----------
tp: int
Time point of interest
channels: list of strings (optional)
Channels of interest
z: list of integers (optional)
Indices for the z-stacks of interest
"""
if channels is None:
# find channels from tiler
channel_ids = list(range(len(self.tiler.channels)))
elif len(channels):
# a subset of channels was specified
channel_ids = [self.tiler.get_channel_index(ch) for ch in channels]
else:
# oh oh
channel_ids = None
# a list of the indices of the z stacks
if z is None:
z = list(range(self.tiler.shape[-1]))
# find the appropiate traps from tiler
traps = (
self.tiler.get_traps_timepoint(
tp, channels=channel_ids, z=z, **kwargs
......@@ -210,7 +253,6 @@ class Extractor(ProcessABC):
if channel_ids
else None
)
return traps
def extract_traps(
......@@ -272,7 +314,6 @@ class Extractor(ProcessABC):
)
for metric in metrics
}
return d
def reduce_extract(
......@@ -322,8 +363,8 @@ class Extractor(ProcessABC):
"""
if method is None:
return img
return reduce_z(img, method)
else:
return reduce_z(img, method)
def extract_tp(
self,
......@@ -370,10 +411,10 @@ class Extractor(ProcessABC):
ch_tree = {ch: v for ch, v in tree.items() if ch != "general"}
# tuple of the channels
tree_chs = (*ch_tree,)
# create a Cells object to extract information from the h5 file
cells = Cells(self.local)
# labels
# find the cell labels and store as dict with trap_ids as keys
if labels is None:
raw_labels = cells.labels_at_time(tp)
labels = {
......@@ -381,22 +422,22 @@ class Extractor(ProcessABC):
for trap_id in range(cells.ntraps)
}
# masks
# find the cell masks as a dict with trap_ids as keys
if masks is None:
raw_masks = cells.at_time(tp, kind="mask")
masks = {trap_id: [] for trap_id in range(cells.ntraps)}
for trap_id, cells in raw_masks.items():
if len(cells):
masks[trap_id] = np.dstack(np.array(cells)).astype(bool)
# convert to a list of masks
masks = [np.array(v) for v in masks.values()]
# traps
# find traps at the time point
traps = self.get_traps(tp, tile_shape=tile_size, channels=tree_chs)
self.img_bgsub = {}
if self.params.sub_bg:
# Generate boolean masks for background
# generate boolean masks for background
bg = [
~np.sum(m, axis=2).astype(bool)
if np.any(m)
......@@ -405,7 +446,6 @@ class Extractor(ProcessABC):
]
d = {}
for ch, red_metrics in tree.items():
img = None
# ch != is necessary for threading
......
......@@ -4,24 +4,18 @@ Base functions to extract information from a single cell
These functions are automatically read, so only add new functions with
the same arguments as the existing ones.
Input:
:cell_mask: (x,y) 2-D cell mask
:trap_image: (x,y) 2-D or (x,y,z) 3-D cell mask
np.where is used to cover for cases where z>1
TODO: Implement membrane functions when needed
"""
import math
# Alan: why aren't we using skimage's regionprops?
import numpy as np
from scipy import ndimage
from sklearn.cluster import KMeans
def area(cell_mask):
# find the area of a cell mask
return np.sum(cell_mask, dtype=int)
......@@ -32,86 +26,176 @@ def eccentricity(cell_mask):
def mean(cell_mask, trap_image):
"""
Finds the mean of the pixels in the cell.
Parameters
----------
cell_mask: 2d array
Segmentation mask for the cell
trap_image: 2d array
"""
return np.mean(trap_image[np.where(cell_mask)], dtype=float)
def median(cell_mask, trap_image):
"""
Finds the median of the pixels in the cell.
Parameters
----------
cell_mask: 2d array
Segmentation mask for the cell
trap_image: 2d array
"""
return np.median(trap_image[np.where(cell_mask)])
def max2p5pc(cell_mask, trap_image):
"""
Finds the mean of the brightest 2.5% of pixels in the cell.
Parameters
----------
cell_mask: 2d array
Segmentation mask for the cell
trap_image: 2d array
"""
# number of pixels in mask
npixels = cell_mask.sum()
top_pixels = int(np.ceil(npixels * 0.025))
# sort pixels in cell
sorted_vals = np.sort(trap_image[np.where(cell_mask)], axis=None)
# find highest 2.5%
top_vals = sorted_vals[-top_pixels:]
# find mean of these highest pixels
max2p5pc = np.mean(top_vals, dtype=float)
return max2p5pc
def max5px(cell_mask, trap_image):
"""
Finds the mean of the five brightest pixels in the cell.
Parameters
----------
cell_mask: 2d array
Segmentation mask for the cell
trap_image: 2d array
"""
# sort pixels in cell
sorted_vals = np.sort(trap_image[np.where(cell_mask)], axis=None)
top_vals = sorted_vals[-5:]
# find mean of five brightest pixels
max5px = np.mean(top_vals, dtype=float)
return max5px
def max5px_med(cell_mask, trap_image):
"""
Finds the mean of the five brightest pixels in the cell divided by the median pixel value.
Parameters
----------
cell_mask: 2d array
Segmentation mask for the cell
trap_image: 2d array
"""
# sort pixels in cell
sorted_vals = np.sort(trap_image[np.where(cell_mask)], axis=None)
top_vals = sorted_vals[-5:]
# find mean of five brightest pixels
max5px = np.mean(top_vals, dtype=float)
med = sorted_vals[len(sorted_vals) // 2] if len(sorted_vals) else 1
return max5px / med if med else max5px
# find the median
med = np.median(sorted_vals)
return max5px / med
def max2p5pc_med(cell_mask, trap_image):
"""
Finds the mean of the brightest 2.5% of pixels in the cell
divided by the median pixel value.
Parameters
----------
cell_mask: 2d array
Segmentation mask for the cell
trap_image: 2d array
"""
# number of pixels in mask
npixels = cell_mask.sum()
top_pixels = int(np.ceil(npixels * 0.025))
# sort pixels in cell
sorted_vals = np.sort(trap_image[np.where(cell_mask)], axis=None)
# find highest 2.5%
top_vals = sorted_vals[-top_pixels:]
# find mean of these highest pixels
max2p5pc = np.mean(top_vals, dtype=float)
med = sorted_vals[len(sorted_vals) // 2] if len(sorted_vals) else 1
return max2p5pc / med if med else max2p5pc
med = np.median(sorted_vals)
return max2p5pc / med
def std(cell_mask, trap_image):
"""
Finds the standard deviation of the values of the pixels in the cell.
Parameters
----------
cell_mask: 2d array
Segmentation mask for the cell
trap_image: 2d array
"""
return np.std(trap_image[np.where(cell_mask)], dtype=float)
def k2_top_median(cell_mask, trap_image):
# Use kmeans to cluster the contents of a cell in two, return the high median
# Useful when a big non-tagged organelle (e.g. vacuole) occupies a big fraction
# of the cell
if not np.any(cell_mask):
return np.nan
"""
Finds the medians of the major and minor clusters after clustering the pixels in the cell into two clusters.
X = trap_image[np.where(cell_mask)].reshape(-1, 1)
kmeans = KMeans(n_clusters=2, random_state=0).fit(X)
high_clust_id = kmeans.cluster_centers_.argmax()
major_cluster = X[kmeans.predict(X) == high_clust_id]
These medians might be useful if the cell has a large vacuole.
k2_top_median = np.median(major_cluster, axis=None)
return k2_top_median
Parameters
----------
cell_mask: 2d array
Segmentation mask for the cell
trap_image: 2d array
Returns
-------
medians: tuple of floats
The median of the major cluster and the median of the minor cluster.
"""
if np.any(cell_mask):
X = trap_image[np.where(cell_mask)].reshape(-1, 1)
# cluster pixels in cell into two clusters
kmeans = KMeans(n_clusters=2, random_state=0).fit(X)
high_clust_id = kmeans.cluster_centers_.argmax()
low_clust_id = kmeans.cluster_centers_.argmin()
# find the median of pixels in the largest cluster
major_cluster = X[kmeans.predict(X) == high_clust_id]
major_median = np.median(major_cluster, axis=None)
# find the median of pixels in the smallest cluster
minor_cluster = X[kmeans.predict(X) == low_clust_id]
minor_median = np.median(minor_cluster, axis=None)
return major_median, minor_median
else:
return np.nan, np.nan
def volume(cell_mask):
"""Volume from a cell mask, assuming an ellipse.
Assumes the mask is the median plane of the ellipsoid.
Assumes rotational symmetry around the major axis.
def volume(cell_mask):
"""
Estimates the volume of the cell assuming it is an ellipsoid with the mask providing a cross-section through the median plane of the ellipsoid.
Parameters
----------
cell_mask: 2d array
Segmentation mask for the cell
"""
min_ax, maj_ax = min_maj_approximation(cell_mask)
return (4 * math.pi * min_ax**2 * maj_ax) / 3
return (4 * np.pi * min_ax**2 * maj_ax) / 3
def conical_volume(cell_mask):
padded = np.pad(cell_mask, 1, mode="constant", constant_values=0)
nearest_neighbor = (
ndimage.morphology.distance_transform_edt(padded == 1) * padded
......@@ -120,6 +204,14 @@ def conical_volume(cell_mask):
def spherical_volume(cell_mask):
'''
Estimates the volume of the cell assuming it is a sphere with the mask providing a cross-section through the median plane of the sphere.
Parameters
----------
cell_mask: 2d array
Segmentation mask for the cell
'''
area = cell_mask.sum()
r = np.sqrt(area / np.pi)
......@@ -127,14 +219,14 @@ def spherical_volume(cell_mask):
def min_maj_approximation(cell_mask):
"""Length approximation of minor and major axes of an ellipse from mask.
:param cell_mask:
:param trap_image:
:return:
"""
Finds the lengths of the minor and major axes of an ellipse from a cell mask.
Parameters
----------
cell_mask: 3d array
Segmentation masks for cells
"""
padded = np.pad(cell_mask, 1, mode="constant", constant_values=0)
nn = ndimage.morphology.distance_transform_edt(padded == 1) * padded
dn = ndimage.morphology.distance_transform_edt(nn - nn.max()) * padded
......
......@@ -69,8 +69,7 @@ def load_cellfuns():
def load_trapfuns():
"""
Load functions that are applied to an entire trap (or tile, or subsection of a given image),
instead of being applied to single cells.
Load functions that are applied to an entire trap (or tile or subsection of a given image) rather than to single cells.
"""
TRAPFUNS = {
f[0]: f[1]
......@@ -83,11 +82,10 @@ def load_trapfuns():
def load_funs():
"""
Combine all automatically-loaded functions
Combine all automatically loaded functions
"""
CELLFUNS = load_cellfuns()
TRAPFUNS = load_trapfuns()
return CELLFUNS, TRAPFUNS, {**TRAPFUNS, **CELLFUNS}
......
......@@ -5,25 +5,39 @@ import numpy as np
def imBackground(cell_masks, trap_image):
"""
:param cell_masks: (numpy 3d array) cells' segmentation mask
:param trap_image: the image for the trap in which the cell is (all
channels)
Finds the median background (pixels not comprising cells) from trap_image
Parameters
----------
cell_masks: 3d array
Segmentation masks for cells
trap_image:
The image (all channels) for the tile containing the cell
"""
if not len(cell_masks):
# create cell_masks if none are given
cell_masks = np.zeros_like(trap_image)
# find background pixels
# sum over all cells identified at a trap - one mask for each cell
background = ~cell_masks.sum(axis=2).astype(bool)
return np.median(trap_image[np.where(background)])
def background_max5(cell_masks, trap_image):
"""
:param cell_masks: (numpy 3d array) cells' segmentation mask
:param trap_image: the image for the trap in which the cell is (all
channels)
Finds the mean of the maximum five pixels of the background (pixels not comprising cells) from trap_image
Parameters
----------
cell_masks: 3d array
Segmentation masks for cells
trap_image:
The image (all channels) for the tile containing the cell
"""
if not len(cell_masks):
# create cell_masks if none are given
cell_masks = np.zeros_like(trap_image)
# find background pixels
# sum over all cells identified at a trap - one mask for each cell
background = ~cell_masks.sum(axis=2).astype(bool)
return np.mean(np.sort(trap_image[np.where(background)])[-5:])
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment