diff --git a/poetry.lock b/poetry.lock index c35b798c4d5a02e237ffcefa8eebdadb1790c661..aae36e42d9826f372fe644922c32a61eb2f0bb3f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -217,6 +217,20 @@ webencodings = "*" css = ["tinycss2 (>=1.1.0,<1.2)"] dev = ["Sphinx (==4.3.2)", "black (==22.3.0)", "build (==0.8.0)", "flake8 (==4.0.1)", "hashin (==0.17.0)", "mypy (==0.961)", "pip-tools (==6.6.2)", "pytest (==7.1.2)", "tox (==3.25.0)", "twine (==4.0.1)", "wheel (==0.37.1)"] +[[package]] +name = "Bottleneck" +version = "1.3.5" +description = "Fast NumPy array functions written in C" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +numpy = "*" + +[package.extras] +doc = ["gitpython", "numpydoc", "sphinx"] + [[package]] name = "cachetools" version = "5.2.0" @@ -407,6 +421,14 @@ category = "dev" optional = false python-versions = "*" +[[package]] +name = "faiss-gpu" +version = "1.7.2" +description = "A library for efficient similarity search and clustering of dense vectors." +category = "main" +optional = false +python-versions = "*" + [[package]] name = "fastjsonschema" version = "2.16.2" @@ -2793,7 +2815,7 @@ omero = ["omero-py"] [metadata] lock-version = "1.1" python-versions = ">=3.8, <3.11" -content-hash = "3a558cdc2c34c8a9864c4983c7e445167ab8040927f00a3dae3d4745421da2a0" +content-hash = "1a3d00dd7aa638b2c4da93e14df10aa17610dd4016a458a0cd2218b0d68814cc" [metadata.files] absl-py = [ @@ -2903,6 +2925,44 @@ bleach = [ {file = "bleach-5.0.1-py3-none-any.whl", hash = "sha256:085f7f33c15bd408dd9b17a4ad77c577db66d76203e5984b1bd59baeee948b2a"}, {file = "bleach-5.0.1.tar.gz", hash = "sha256:0d03255c47eb9bd2f26aa9bb7f2107732e7e8fe195ca2f64709fcf3b0a4a085c"}, ] +Bottleneck = [ + {file = "Bottleneck-1.3.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:75ed4ffbe26672da14d9e9e29b7bca79f82c7ecf1e8a32f1d3f7b4dda53b486e"}, + {file = "Bottleneck-1.3.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c8f5562e5f27ab39775758b3d57c6144e29ca33edc76ee60cb220b5a99d5a1f5"}, + {file = "Bottleneck-1.3.5-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d26675caa9ce86a299877b14e909bc456c0d1fec30ae63bbb52a20b01beedb9"}, + {file = "Bottleneck-1.3.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:cc1395aabff7db940f2cd952b8e42f014461feecd563c6dddb9077022a361a80"}, + {file = "Bottleneck-1.3.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d17af36baa48cd68c83eeefd830236410981556987c5ea9d8212f99c64c696ae"}, + {file = "Bottleneck-1.3.5-cp310-cp310-win32.whl", hash = "sha256:fe75dff7ad1ec63cd832aa7529fc60ef78ce13ee29318a1437b0634c01ad59f4"}, + {file = "Bottleneck-1.3.5-cp310-cp310-win_amd64.whl", hash = "sha256:9a53cfddc71bdfa14e444deab90173b658fbe98a784cdfee55725641b21be175"}, + {file = "Bottleneck-1.3.5-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:826ecaa26132179ce43b7796473eff62b232db47b39f3b6fcc4365488f3b0b00"}, + {file = "Bottleneck-1.3.5-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3283f98c01702404fde4c674b19ae444eaa2668da77124fcc6de40f82c6cda93"}, + {file = "Bottleneck-1.3.5-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5f7cffb0de10d83901a942a50f1a8421776fc0489603ce938b31bdf867ab0e9"}, + {file = "Bottleneck-1.3.5-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:005578d5ffbef148d1bc8cdd74d71734a5add7379142e1add339ad68c20ad13b"}, + {file = "Bottleneck-1.3.5-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:538dc29cf97410cb767ea7f369806be487ba6fe125329e0cf410cb6dd225c891"}, + {file = "Bottleneck-1.3.5-cp36-cp36m-win32.whl", hash = "sha256:7b1c0f1a4ab2240dcee5df4968d0b70892d958435750c1dfb1c835b091436453"}, + {file = "Bottleneck-1.3.5-cp36-cp36m-win_amd64.whl", hash = "sha256:698c8b92da3c0638910a81a5c404e199080f0dd6d64c56de7d21dd90870c58cb"}, + {file = "Bottleneck-1.3.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:97d628e66f59255d65d6bcd43f58d64b976388d3cc4e330832a260793cdbdb2a"}, + {file = "Bottleneck-1.3.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:103fe3f2166667448271698394a15eae9d43a252dbb41a9d4397e08e4852934e"}, + {file = "Bottleneck-1.3.5-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a56e945a323896f9c76e99c1401e86bbca68ec0828b38d5832b5881fbacd5da2"}, + {file = "Bottleneck-1.3.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:8a7dbf8e79b9f6821df964b964cfe10ba58ad92334a3cca6e6c8cc5c72ec92d1"}, + {file = "Bottleneck-1.3.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:da08bd0978be264daff090d3cddeff809810bb58ef4dbad418fcb1de0770dafb"}, + {file = "Bottleneck-1.3.5-cp37-cp37m-win32.whl", hash = "sha256:a4280a609b37b12827b3c86fe72869bb580462ae06c970f904412a0bc1b9ba12"}, + {file = "Bottleneck-1.3.5-cp37-cp37m-win_amd64.whl", hash = "sha256:1dc6bc824ff24e55edb5e0738e7e9de838091471406df63a20a12cca006eab2d"}, + {file = "Bottleneck-1.3.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:930a2e86ef95eab1a5ef00467de45d316a8dfb0de9604aa23cdbf7fefbdab765"}, + {file = "Bottleneck-1.3.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e069baf7a3208aaa55e3d2c2fa315661cd61a1e0c5cc33be42111e9520fa5802"}, + {file = "Bottleneck-1.3.5-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3bb670f70c86978b99566e39959b151e211c814b336cacda6931109f19b280a"}, + {file = "Bottleneck-1.3.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:9a4cb657afba02c01471e78879bc01652268a8d23dec3c8ea432df174c10f61d"}, + {file = "Bottleneck-1.3.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:0b624ebe4660545534f6408dbe069157600d89392caabfc0c337f2a01aa2c990"}, + {file = "Bottleneck-1.3.5-cp38-cp38-win32.whl", hash = "sha256:c907f1be11ab6e2b9c980a61feabee21dbe16d62db1e605fbffe6a85d2a8a23b"}, + {file = "Bottleneck-1.3.5-cp38-cp38-win_amd64.whl", hash = "sha256:73084b263d5ec3091e92664ae1276258db2865e6801b7f99864af42baebd6d2d"}, + {file = "Bottleneck-1.3.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f0eaf6e9dc1a8efe56b90b790c0bd47453639c4a6427d6206c8bd26f7c06c3a6"}, + {file = "Bottleneck-1.3.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b5cffd0b4a7b929f94412586e78ca3506b3c2b50d213f7a4aaff67f913736e38"}, + {file = "Bottleneck-1.3.5-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a87a21081fb27995ef2123d85a5cc392a3aab707189c6ba98b4a4808d0905d0"}, + {file = "Bottleneck-1.3.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:04c938dea6ca735bed69e0e01b3d83c31da4861c10d3332363a4f697db02a341"}, + {file = "Bottleneck-1.3.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5712542be5b067f10c064139227757008ecd6e494967e26ecc03e24d5953e6c4"}, + {file = "Bottleneck-1.3.5-cp39-cp39-win32.whl", hash = "sha256:f24fc607466edf97d46aeafe0ac4797e0f7b654aef4a547b1699bb8ae15db3ff"}, + {file = "Bottleneck-1.3.5-cp39-cp39-win_amd64.whl", hash = "sha256:dce5ba9eaab354fdc13c7969f026ef2e9f4757415f565a188322e6cc9a296ce5"}, + {file = "Bottleneck-1.3.5.tar.gz", hash = "sha256:2c0d27afe45351f6f421893362621804fa7dea14fe29a78eaa52d4323f646de7"}, +] cachetools = [ {file = "cachetools-5.2.0-py3-none-any.whl", hash = "sha256:f9f17d2aec496a9aa6b76f53e3b614c965223c061982d434d160f930c698a9db"}, {file = "cachetools-5.2.0.tar.gz", hash = "sha256:6a94c6402995a99c3970cc7e4884bb60b4a8639938157eeed436098bf9831757"}, @@ -3124,6 +3184,13 @@ executing = [ {file = "executing-1.0.0-py2.py3-none-any.whl", hash = "sha256:550d581b497228b572235e633599133eeee67073c65914ca346100ad56775349"}, {file = "executing-1.0.0.tar.gz", hash = "sha256:98daefa9d1916a4f0d944880d5aeaf079e05585689bebd9ff9b32e31dd5e1017"}, ] +faiss-gpu = [ + {file = "faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c98abc1aac06cb4cb94de223b3186bd4a60d15fd3cae42271604168abc081ca5"}, + {file = "faiss_gpu-1.7.2-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:38796433b2fe43f3a602be18668969af615a3a898e897366e6997b409b0deeab"}, + {file = "faiss_gpu-1.7.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:040a413be699077931b781e470468c6b5084342c5d5773ce8d916f04b25d8c9c"}, + {file = "faiss_gpu-1.7.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e85a6bc975b2f233eb056584f33bbce8613c453c9024c099052a423eebabee23"}, + {file = "faiss_gpu-1.7.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ca9bfa2fda868f438a2f05c1d5aed53021bfaa55b25fd3ae666d2da201f3caf"}, +] fastjsonschema = [ {file = "fastjsonschema-2.16.2-py3-none-any.whl", hash = "sha256:21f918e8d9a1a4ba9c22e09574ba72267a6762d47822db9add95f6454e51cc1c"}, {file = "fastjsonschema-2.16.2.tar.gz", hash = "sha256:01e366f25d9047816fe3d288cbfc3e10541daf0af2044763f3d0ade42476da18"}, diff --git a/pyproject.toml b/pyproject.toml index bb7a558d552a150fbd32328906513ee2f6117da1..f4a0759b1839fd63ef030ef46a1a972491280b42 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,6 +45,8 @@ pycatch22 = "^0.4.2" omero-py = { version = ">=5.6.2", optional = true } # contact omero server zeroc-ice = { version="3.6.5", optional = true } # networking interface, slow to build GitPython = "^3.1.27" +Bottleneck = "^1.3.5" +faiss-gpu = "^1.7.2" [tool.poetry.extras] omero = [ "omero-py" ] diff --git a/src/aliby/pipeline.py b/src/aliby/pipeline.py index 9343304b1338e99a8ec8b241c0f9b7a744c20946..2c4f792c9e1e1df9af661873d0be06d50c0e3cf9 100644 --- a/src/aliby/pipeline.py +++ b/src/aliby/pipeline.py @@ -708,7 +708,7 @@ class Pipeline(ProcessABC): meta.run() meta.add_fields( # Add non-logfile metadata { - "aliby_version": version('aliby') + "aliby_version": version("aliby"), "omero_id": config["general"]["id"], "image_id": image_id, "parameters": PipelineParameters.from_dict( diff --git a/src/extraction/core/extractor.py b/src/extraction/core/extractor.py index 1b3aa6451b34152dc13779dbf172647ad54cfc63..ecc34f3f232f7e3b87c18225d56360fb6eb4a38c 100644 --- a/src/extraction/core/extractor.py +++ b/src/extraction/core/extractor.py @@ -16,7 +16,6 @@ from extraction.core.functions.distributors import reduce_z, trap_apply from extraction.core.functions.loaders import ( load_custom_args, load_funs, - load_mergefuns, load_redfuns, ) @@ -33,7 +32,6 @@ extraction_result = t.Dict[ CELL_FUNS, TRAPFUNS, FUNS = load_funs() CUSTOM_FUNS, CUSTOM_ARGS = load_custom_args() RED_FUNS = load_redfuns() -MERGE_FUNS = load_mergefuns() # Assign datatype depending on the metric used # m2type = {"mean": np.float32, "median": np.ubyte, "imBackground": np.ubyte} @@ -406,10 +404,10 @@ class Extractor(ProcessABC): method: function The reduction function """ - if method is None: - return img - else: - return reduce_z(img, method) + reduced = img + if method is not None: + reduced = reduce_z(img, method) + return reduced def extract_tp( self, @@ -547,8 +545,10 @@ class Extractor(ProcessABC): set(self.img_bgsub.keys()).union(tree_chs) ) ) == len(chs): - imgs = [self.get_imgs(ch, tiles, tree_chs) for ch in chs] - merged = MERGE_FUNS[merge_fun](*imgs) + channels_stack = np.stack( + [self.get_imgs(ch, tiles, tree_chs) for ch in chs] + ) + merged = RED_FUNS[merge_fun](channels_stack, axis=-1) d[name] = self.reduce_extract( red_metrics=red_metrics, traps=merged, diff --git a/src/extraction/core/functions/cell.py b/src/extraction/core/functions/cell.py index 8c4ea97ef4343e40dc985dcca91439208ab420ad..d0b605c573d5862c7a4de890a4853b2769171923 100644 --- a/src/extraction/core/functions/cell.py +++ b/src/extraction/core/functions/cell.py @@ -3,12 +3,16 @@ Base functions to extract information from a single cell These functions are automatically read by extractor.py, and so can only have the cell_mask and trap_image as inputs and must return only one value. """ +import math +import typing as t + +import bottleneck as bn +import faiss import numpy as np from scipy import ndimage -from sklearn.cluster import KMeans -def area(cell_mask): +def area(cell_mask) -> int: """ Find the area of a cell mask @@ -17,10 +21,10 @@ def area(cell_mask): cell_mask: 2d array Segmentation mask for the cell """ - return np.sum(cell_mask, dtype=int) + return bn.nansum(cell_mask) -def eccentricity(cell_mask): +def eccentricity(cell_mask) -> float: """ Find the eccentricity using the approximate major and minor axes @@ -33,7 +37,7 @@ def eccentricity(cell_mask): return np.sqrt(maj_ax**2 - min_ax**2) / maj_ax -def mean(cell_mask, trap_image): +def mean(cell_mask, trap_image) -> float: """ Finds the mean of the pixels in the cell. @@ -43,10 +47,10 @@ def mean(cell_mask, trap_image): Segmentation mask for the cell trap_image: 2d array """ - return np.mean(trap_image[np.where(cell_mask)], dtype=float) + return bn.nanmean(trap_image[cell_mask]) -def median(cell_mask, trap_image): +def median(cell_mask, trap_image) -> int: """ Finds the median of the pixels in the cell. @@ -56,10 +60,10 @@ def median(cell_mask, trap_image): Segmentation mask for the cell trap_image: 2d array """ - return np.median(trap_image[np.where(cell_mask)]) + return bn.nanmedian(trap_image[cell_mask]) -def max2p5pc(cell_mask, trap_image): +def max2p5pc(cell_mask, trap_image) -> float: """ Finds the mean of the brightest 2.5% of pixels in the cell. @@ -70,18 +74,17 @@ def max2p5pc(cell_mask, trap_image): trap_image: 2d array """ # number of pixels in mask - npixels = cell_mask.sum() + npixels = bn.nansum(cell_mask) top_pixels = int(np.ceil(npixels * 0.025)) - # sort pixels in cell - sorted_vals = np.sort(trap_image[np.where(cell_mask)], axis=None) - # find highest 2.5% - top_vals = sorted_vals[-top_pixels:] + # sort pixels in cell and find highest 2.5% + pixels = trap_image[cell_mask] + top_values = pixels[bn.rankdata(pixels)[:top_pixels].astype(int) - 1] + # find mean of these highest pixels - max2p5pc = np.mean(top_vals, dtype=float) - return max2p5pc + return bn.nanmean(top_values) -def max5px(cell_mask, trap_image): +def max5px(cell_mask, trap_image) -> float: """ Finds the mean of the five brightest pixels in the cell. @@ -92,63 +95,13 @@ def max5px(cell_mask, trap_image): trap_image: 2d array """ # sort pixels in cell - sorted_vals = np.sort(trap_image[np.where(cell_mask)], axis=None) - top_vals = sorted_vals[-5:] + pixels = trap_image[cell_mask] + top_values = pixels[bn.rankdata(pixels)[:5].astype(int) - 1] # find mean of five brightest pixels - max5px = np.mean(top_vals, dtype=float) + max5px = bn.nanmean(top_values) return max5px -def max5px_med(cell_mask, trap_image): - """ - Finds the mean of the five brightest pixels in the cell divided by the median pixel value. - - Parameters - ---------- - cell_mask: 2d array - Segmentation mask for the cell - trap_image: 2d array - """ - # sort pixels in cell - sorted_vals = np.sort(trap_image[np.where(cell_mask)], axis=None) - top_vals = sorted_vals[-5:] - # find mean of five brightest pixels - max5px = np.mean(top_vals, dtype=float) - # find the median - med = np.median(sorted_vals) - if med == 0: - return np.nan - else: - return max5px / med - - -def max2p5pc_med(cell_mask, trap_image): - """ - Finds the mean of the brightest 2.5% of pixels in the cell - divided by the median pixel value. - - Parameters - ---------- - cell_mask: 2d array - Segmentation mask for the cell - trap_image: 2d array - """ - # number of pixels in mask - npixels = cell_mask.sum() - top_pixels = int(np.ceil(npixels * 0.025)) - # sort pixels in cell - sorted_vals = np.sort(trap_image[np.where(cell_mask)], axis=None) - # find highest 2.5% - top_vals = sorted_vals[-top_pixels:] - # find mean of these highest pixels - max2p5pc = np.mean(top_vals, dtype=float) - med = np.median(sorted_vals) - if med == 0: - return np.nan - else: - return max2p5pc / med - - def std(cell_mask, trap_image): """ Finds the standard deviation of the values of the pixels in the cell. @@ -159,7 +112,7 @@ def std(cell_mask, trap_image): Segmentation mask for the cell trap_image: 2d array """ - return np.std(trap_image[np.where(cell_mask)], dtype=float) + return bn.nanstd(trap_image[cell_mask]) def k2_major_median(cell_mask, trap_image): @@ -177,47 +130,23 @@ def k2_major_median(cell_mask, trap_image): median: float The median of the major cluster of two clusters """ - if np.any(cell_mask): - X = trap_image[np.where(cell_mask)].reshape(-1, 1) - # cluster pixels in cell into two clusters - kmeans = KMeans(n_clusters=2, random_state=0).fit(X) - high_clust_id = kmeans.cluster_centers_.argmax() - # find the median of pixels in the largest cluster - major_cluster = X[kmeans.predict(X) == high_clust_id] - major_median = np.median(major_cluster, axis=None) - return major_median - else: - return np.nan - -def k2_minor_median(cell_mask, trap_image): - """ - Finds the median of the minor cluster after clustering the pixels in the cell into two clusters. + if bn.anynan(trap_image): + cell_mask[np.isnan(trap_image)] = False + X = trap_image[cell_mask].reshape(-1, 1).astype(np.float32) + # cluster pixels in cell into two clusters + indices = faiss.IndexFlatL2(X.shape[1]) + # (n_clusters=2, random_state=0).fit(X) + _, indices = indices.search(X, k=2) + high_indices = bn.nanargmax(indices, axis=1).astype(bool) + # find the median of pixels in the largest cluster + # high_masks = np.logical_xor( # Use casting to obtain masks + # high_indices.reshape(-1, 1), np.tile((0, 1), X.shape[0]).reshape(-1, 2) + # ) + major_median = bn.nanmedian(X[high_indices]) + return major_median - Parameters - ---------- - cell_mask: 2d array - Segmentation mask for the cell - trap_image: 2d array - Returns - ------- - median: float - The median of the minor cluster. - """ - if np.any(cell_mask): - X = trap_image[np.where(cell_mask)].reshape(-1, 1) - # cluster pixels in cell into two clusters - kmeans = KMeans(n_clusters=2, random_state=0).fit(X) - low_clust_id = kmeans.cluster_centers_.argmin() - # find the median of pixels in the smallest cluster - minor_cluster = X[kmeans.predict(X) == low_clust_id] - minor_median = np.median(minor_cluster, axis=None) - return minor_median - else: - return np.nan - - -def volume(cell_mask): +def volume(cell_mask) -> float: """ Estimates the volume of the cell assuming it is an ellipsoid with the mask providing a cross-section through the median plane of the ellipsoid. @@ -247,20 +176,20 @@ def conical_volume(cell_mask): def spherical_volume(cell_mask): - ''' + """ Estimates the volume of the cell assuming it is a sphere with the mask providing a cross-section through the median plane of the sphere. Parameters ---------- cell_mask: 2d array Segmentation mask for the cell - ''' - area = cell_mask.sum() - r = np.sqrt(area / np.pi) + """ + total_area = area(cell_mask) + r = math.sqrt(total_area / np.pi) return (4 * np.pi * r**3) / 3 -def min_maj_approximation(cell_mask): +def min_maj_approximation(cell_mask) -> t.Tuple[int]: """ Finds the lengths of the minor and major axes of an ellipse from a cell mask. @@ -278,8 +207,8 @@ def min_maj_approximation(cell_mask): # get the size of the top of the cone (points that are equally maximal) cone_top = ndimage.morphology.distance_transform_edt(dn == 0) * padded # minor axis = largest distance from the edge of the ellipse - min_ax = np.round(nn.max()) + min_ax = np.round(bn.nanmax(nn)) # major axis = largest distance from the cone top # + distance from the center of cone top to edge of cone top - maj_ax = np.round(dn.max() + cone_top.sum() / 2) + maj_ax = np.round(bn.nanmax(dn) + bn.nansum(cone_top) / 2) return min_ax, maj_ax diff --git a/src/extraction/core/functions/defaults.py b/src/extraction/core/functions/defaults.py index 4d60c701c95062784c4853e55ed11b27a398d1f4..ad95d5ff9917f68dd9bd4b810657c9556744f4bb 100644 --- a/src/extraction/core/functions/defaults.py +++ b/src/extraction/core/functions/defaults.py @@ -28,7 +28,7 @@ def exparams_from_meta( "mKO2", } - default_reductions = {"np_max"} + default_reductions = {"max"} default_metrics = { "mean", "median", @@ -62,7 +62,7 @@ def exparams_from_meta( ["GFPFast_bgsub", "pHluorin405_bgsub"], ), ) - for b, y in zip(["em_ratio", "gsum"], ["div0", "np_add"]) + for b, y in zip(["em_ratio", "gsum"], ["div0", "add"]) } for i, v in sets.items(): base["multichannel_ops"][i] = [ diff --git a/src/extraction/core/functions/distributors.py b/src/extraction/core/functions/distributors.py index 23d1ff09bc21a167a3b644a397122f3eaa42efec..40e8fff96b4da6fe417a7ead89d9eb6b54993137 100644 --- a/src/extraction/core/functions/distributors.py +++ b/src/extraction/core/functions/distributors.py @@ -1,3 +1,6 @@ +import typing as t + +import bottleneck as bn import numpy as np @@ -22,7 +25,7 @@ def trap_apply(cell_fun, cell_masks, *args, **kwargs): return [cell_fun(cell_masks[..., i], *args, **kwargs) for i in cells_iter] -def reduce_z(trap_image, fun): +def reduce_z(trap_image: np.ndarray, fun: t.Callable): """ Reduce the trap_image to 2d. @@ -32,9 +35,15 @@ def reduce_z(trap_image, fun): Images for all the channels associated with a trap fun: function Function to execute the reduction + """ - if isinstance(fun, np.ufunc): + # FUTURE replace with py3.10's match-case. + if ( + hasattr(fun, "__module__") and fun.__module__[:10] == "bottleneck" + ): # Bottleneck type + return getattr(bn.reduce, fun.__name__)(trap_image, axis=2) + elif isinstance(fun, np.ufunc): # optimise the reduction function if possible return fun.reduce(trap_image, axis=2) - else: + else: # WARNING: Very slow, only use when no alternatives exist return np.apply_along_axis(fun, 2, trap_image) diff --git a/src/extraction/core/functions/loaders.py b/src/extraction/core/functions/loaders.py index cb26d47ea832a6365bd655d0b3e29e3cc73114cf..9d2e9c479428db9db1189726a7ba8478b9923646 100644 --- a/src/extraction/core/functions/loaders.py +++ b/src/extraction/core/functions/loaders.py @@ -1,7 +1,8 @@ import typing as t -from inspect import getfullargspec, getmembers, isfunction +from types import FunctionType +from inspect import getfullargspec, getmembers, isfunction, isbuiltin -import numpy as np +import bottleneck as bn from extraction.core.functions import cell, trap from extraction.core.functions.custom import localisation @@ -102,22 +103,30 @@ def load_funs(): return CELLFUNS, TRAPFUNS, {**TRAPFUNS, **CELLFUNS} -def load_redfuns(): # TODO make defining reduction functions more flexible +def load_redfuns( + additional_reducers: t.Optional[ + t.Union[t.Dict[str, t.Callable], t.Callable] + ] = None, +) -> t.Dict[str, t.Callable]: """ - Load functions to reduce the z-stack to two dimensions. + Load functions to reduce a multidimensional image by one dimension. + + It can take custom functions as arguments. """ RED_FUNS = { - "np_max": np.maximum, - "np_mean": np.mean, - "np_median": np.median, + "max": bn.nanmax, + "mean": bn.nanmean, + "median": bn.nanmedian, + "div0": div0, + "add": bn.nansum, "None": None, } - return RED_FUNS + if additional_reducers is not None: + if isinstance(additional_reducers, FunctionType): + additional_reducers = [ + (additional_reducers.__name__, additional_reducers) + ] + RED_FUNS.update(name, fun) -def load_mergefuns(): - """ - Load functions to merge multiple channels - """ - MERGE_FUNS = {"div0": div0, "np_add": np.add} - return MERGE_FUNS + return RED_FUNS diff --git a/src/postprocessor/compiler.py b/src/postprocessor/compiler.py index 98f05ca48c1855b10982bdbebe553de7ad04932c..1b0f3bc3e787f3373dc04f9fdac89b1dd6dc6020 100644 --- a/src/postprocessor/compiler.py +++ b/src/postprocessor/compiler.py @@ -288,15 +288,15 @@ class ExperimentCompiler(Compiler): "buddings": ("buddings", "sum"), "cycle_length_mean": ( "buddings", - lambda x: np.diff(np.where(x)[0]).mean(), + lambda x: bn.nanmean(np.diff(np.where(x)[0])), ), "cycle_length_min": ( "buddings", - lambda x: np.diff(np.where(x)[0]).min(), + lambda x: bn.nanmin(np.diff(np.where(x)[0])), ), "cycle_length_median": ( "buddings", - lambda x: np.median(np.diff(np.where(x)[0])), + lambda x: np.nanmedian(np.diff(np.where(x)[0])), ), } @@ -376,8 +376,8 @@ class ExperimentCompiler(Compiler): if metrics is None: metrics = { - "GFP": ("median", "max5px_med", "max5"), - "mCherry": ("median", "max5px_med", "max5"), + "GFP": ("median", "max5"), + "mCherry": ("median", "max5"), # "general": ("eccentricity",), "Flavin": ("median",), "postprocessing/savgol": ("volume",), diff --git a/src/postprocessor/core/multisignal/align.py b/src/postprocessor/core/multisignal/align.py index 855ceccbfb1b786d925905671c52cbc99e1faff1..241736f36598711fc5e0b15d48e306841d7ffba6 100644 --- a/src/postprocessor/core/multisignal/align.py +++ b/src/postprocessor/core/multisignal/align.py @@ -1,9 +1,10 @@ #!/usr/bin/env python3 +import bottleneck as bn import numpy as np import pandas as pd -from agora.abc import ParametersABC +from agora.abc import ParametersABC from postprocessor.core.abc import PostProcessABC @@ -97,7 +98,7 @@ class align(PostProcessABC): # i.e. if events_at_least = 1, then cells that have no birth events are # deleted. event_mask = mask_df.apply( - lambda x: np.sum(x) >= self.events_at_least, axis=1 + lambda x: bn.nansum(x) >= self.events_at_least, axis=1 ) mask_df = mask_df.iloc[event_mask.to_list()] @@ -132,7 +133,7 @@ class align(PostProcessABC): # Do not remove bits of traces before first event else: # Add columns to left, filled with NaNs - max_shift = np.max(shift_list) + max_shift = bn.nanmax(shift_list) mask_aligned = df_extend_nan(mask_aligned, max_shift) trace_aligned = df_extend_nan(trace_aligned, max_shift) # shift each