diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 9487c5ea6663c6aab78c46aa49dc77c4fe275264..d3ebdbae6699dfcdef779daed474158ad5020e25 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -38,7 +38,14 @@ Local Tests: stage: tests script: # - poetry install -vv - - poetry run pytest ./tests --ignore ./tests/aliby/network --ignore ./tests/aliby/pipeline + - poetry run coverage run -m --branch pytest ./tests --ignore ./tests/aliby/network --ignore ./tests/aliby/pipeline + - poetry run coverage report -m + - poetry run coverage xml + coverage: '/(?i)total.*? (100(?:\.0+)?\%|[1-9]?\d(?:\.\d+)?\%)$/' +# Legacy because we use GitLab 14.2.6 + artifacts: + reports: + cobertura: coverage.xml Network Tools Tests: stage: tests diff --git a/README.md b/README.md index 5b5557465d4372bfaacb245645a6aff2543deb96..b6a9f36e16449b05c2beeeb4a1dc373a0b241ec3 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ [](https://badge.fury.io/py/aliby) [](https://git.ecdf.ed.ac.uk/swain-lab/aliby/aliby/-/pipelines) [](https://git.ecdf.ed.ac.uk/swain-lab/aliby/aliby/-/commits/dev) +[](https://git.ecdf.ed.ac.uk/swain-lab/aliby/aliby/-/commits/dev) End-to-end processing of cell microscopy time-lapses. ALIBY automates segmentation, tracking, lineage predictions, post-processing and report production. It leverages the existing Python ecosystem and open-source scientific software available to produce seamless and standardised pipelines. diff --git a/bin/run.py b/bin/run.py new file mode 100644 index 0000000000000000000000000000000000000000..230b934bebbc201bc8acccd05c740f42853fef92 --- /dev/null +++ b/bin/run.py @@ -0,0 +1,47 @@ +#!/usr/bin/env jupyter + +import argparse + +from aliby.pipeline import Pipeline, PipelineParameters + +parser = argparse.ArgumentParser( + prog="aliby-run", description="Run a default microscopy analysis pipeline" +) + +param_values = { + "expt_id": None, + "distributed": 2, + "tps": 2, + "directory": "./data", + "filter": 0, + "host": None, + "username": None, + "password": None, +} + + +def _cast_str(x: str or None): + """ + Cast string as int if possible. If Nonetype return None. + """ + if x: + try: + return int(x) + except: + return x + + +for k in param_values: + parser.add_argument(f"--{k}", action="store") + +args = parser.parse_args() + +for k in param_values: + if passed_value := _cast_str(getattr(args, k)): + + param_values[k] = passed_value + +params = PipelineParameters.default(general=param_values) +p = Pipeline(params) + +p.run() diff --git a/docs/source/specifications/metadata.org b/docs/source/specifications/metadata.org index 6618730c2dfb78e133a64cb62a07a9fd8b90c93a..52d514a3465236a9079a8bba3ee721b013808a28 100644 --- a/docs/source/specifications/metadata.org +++ b/docs/source/specifications/metadata.org @@ -4,7 +4,7 @@ Draft for recommended metadata for images to provide a standard interface for al * Essential data - DimensionOrder: str - Order of dimensions (e.g., CTZYX for Channel, Time, Z,Y,X) + Order of dimensions (e.g., TCZYX for Time, Channel, Z, Y, X) - PixelSize: float Size of pixel, useful for segmentation. - Channels: List[str] diff --git a/examples/extraction/pos_example.py b/examples/extraction/pos_example.py index 9372fa1dd1147d7c6584fba695f20238e3dd1167..da44f464b54d81dbba3f5902125d1f7b1fe4abc9 100644 --- a/examples/extraction/pos_example.py +++ b/examples/extraction/pos_example.py @@ -14,5 +14,4 @@ params = Parameters( ext = Extractor(params, omero_id=19310) -# ext.extract_exp(tile_size=117) d = ext.extract_tp(tp=1, tile_size=117) diff --git a/examples/tiler/pypipeline_unit_test_00_000001_Brightfield_003.tif b/examples/tiler/pypipeline_unit_test_00_000001_Brightfield_003.tif new file mode 100755 index 0000000000000000000000000000000000000000..569f33a72ab18f65c79e28e38869a4f17cd30cca Binary files /dev/null and b/examples/tiler/pypipeline_unit_test_00_000001_Brightfield_003.tif differ diff --git a/examples/tiler/pypipeline_unit_test_00_000001_Brightfield_003_square.tif b/examples/tiler/pypipeline_unit_test_00_000001_Brightfield_003_square.tif new file mode 100755 index 0000000000000000000000000000000000000000..f813c15a9c8aef8a6e629ff17e460aef4acdb630 Binary files /dev/null and b/examples/tiler/pypipeline_unit_test_00_000001_Brightfield_003_square.tif differ diff --git a/src/agora/abc.py b/src/agora/abc.py index 6c1fd343e04bf42fb798642484b0b55c2de0a97a..c396b4b1503c7558dc51eaf9032b7cb14485bc38 100644 --- a/src/agora/abc.py +++ b/src/agora/abc.py @@ -10,6 +10,8 @@ from typing import Union from flatten_dict import flatten from yaml import dump, safe_load +from agora.logging import timer + atomic = t.Union[int, float, str, bool] @@ -239,21 +241,12 @@ class StepABC(ProcessABC): def _run_tp(self): pass - def run_tp(self, tp: int, log: bool = True, **kwargs): + @timer + def run_tp(self, tp: int, **kwargs): """ Time and log the timing of a step. """ - if log: - t = perf_counter() - result = self._run_tp(tp, **kwargs) - self._log( - f"Timing:{self.__class__.__name__}:{perf_counter()-t}s", - "debug", - ) - else: - result = self._run_tp(tp, **kwargs) - - return result + return self._run_tp(tp, **kwargs) def run(self): # Replace run withn run_tp diff --git a/src/agora/io/bridge.py b/src/agora/io/bridge.py index 8c7bee9ee171a931915296a2369c0aa61c3da249..788db5e9d7cc24b1a19c261d80fdf9cc74589427 100644 --- a/src/agora/io/bridge.py +++ b/src/agora/io/bridge.py @@ -157,12 +157,6 @@ def attrs_from_h5(fpath: str): return dict(f.attrs) -def parameters_from_h5(fpath: str): - """Return parameters from an h5 file.""" - attrs = attrs_from_h5(fpath) - return yaml.safe_load(attrs["parameters"]) - - def image_creds_from_h5(fpath: str): """Return image id and server credentials from an h5.""" attrs = attrs_from_h5(fpath) diff --git a/src/agora/io/metadata.py b/src/agora/io/metadata.py index 61caa3aa6c868f33a0f40ce220a02d3a3d028285..c058e9f430672a9fc4cb6a7e25d4e328368a2044 100644 --- a/src/agora/io/metadata.py +++ b/src/agora/io/metadata.py @@ -168,8 +168,9 @@ def get_meta_swainlab(parsed_metadata: dict): def get_meta_from_legacy(parsed_metadata: dict): - channels = parsed_metadata["channels/channel"] - return {"channels": channels} + result = parsed_metadata + result["channels"] = result["channels/channel"] + return result def parse_swainlab_metadata(filedir: t.Union[str, PosixPath]): diff --git a/src/agora/io/signal.py b/src/agora/io/signal.py index 2578b57cfee9668c45e5b834ed3d8577b3cca4fc..0210ef3427552c8d7fa62a9f366976459d112e51 100644 --- a/src/agora/io/signal.py +++ b/src/agora/io/signal.py @@ -84,7 +84,7 @@ class Signal(BridgeH5): try: df.columns = (df.columns * self.tinterval // 60).astype(int) except Exception as e: - self._log(f"Unable to convert columns to minutes: {e}") + self._log(f"Unable to convert columns to minutes: {e}", "debug") return df @cached_property @@ -351,24 +351,6 @@ class Signal(BridgeH5): def stem(self): return self.filename.stem - # def dataset_to_df(self, f: h5py.File, path: str): - - # all_indices = self.index_names - - # valid_indices = { - # k: f[path][k][()] for k in all_indices if k in f[path].keys() - # } - - # new_index = pd.MultiIndex.from_arrays( - # list(valid_indices.values()), names=valid_indices.keys() - # ) - - # return pd.DataFrame( - # f[path + "/values"][()], - # index=new_index, - # columns=f[path + "/timepoint"][()], - # ) - def store_signal_url( self, fullname: str, node: t.Union[h5py.Dataset, h5py.Group] ): diff --git a/src/agora/io/utils.py b/src/agora/io/utils.py index 0acca82cb57fbab596d990a9e9d554f0c8b80344..b32b9314c3291c66f8b4c01336c25db236308e00 100644 --- a/src/agora/io/utils.py +++ b/src/agora/io/utils.py @@ -35,29 +35,6 @@ def imread(path): return cv2.imread(str(path), -1) -class ImageCache: - """HDF5-based image cache for faster loading of the images once they've - been read. - """ - - def __init__(self, file, name, shape, remote_fn): - self.store = h5py.File(file, "a") - # Create a dataset - self.dataset = self.store.create_dataset( - name, shape, dtype=np.float, fill_value=np.nan - ) - self.remote_fn = remote_fn - - def __getitem__(self, item): - cached = self.dataset[item] - if np.any(np.isnan(cached)): - full = self.remote_fn(item) - self.dataset[item] = full - return full - else: - return cached - - class Cache: """ Fixed-length mapping to use as a cache. diff --git a/src/agora/io/writer.py b/src/agora/io/writer.py index f11827fa9adb6818e354c0574f91a419770f8f9a..264478482875c6992241ed9ad00613f0ef96fbf0 100644 --- a/src/agora/io/writer.py +++ b/src/agora/io/writer.py @@ -1,4 +1,3 @@ -import itertools import logging from collections.abc import Iterable from pathlib import Path @@ -55,6 +54,11 @@ class DynamicWriter: if Path(file).exists(): self.metadata = load_attributes(file) + def _log(self, message: str, level: str = "warn"): + # Log messages in the corresponding level + logger = logging.getLogger("aliby") + getattr(logger, level)(f"{self.__class__.__name__}: {message}") + def _append(self, data, key, hgroup): """ Append data to existing dataset in the h5 file otherwise create a new one. @@ -135,10 +139,6 @@ class DynamicWriter: # write all data, signified by the empty tuple hgroup[key][()] = data - # def _check_key(self, key): - # if key not in self.datatypes: - # raise KeyError(f"No defined data type for key {key}") - def write(self, data: dict, overwrite: list, meta: dict = {}): """ Write data and metadata to h5 file. @@ -174,7 +174,9 @@ class DynamicWriter: self._append(value, key, hgroup) except Exception as e: print(key, value) - raise (e) + self._log( + f"{key}:{value} could not be written: {e}", "error" + ) # write metadata for key, value in meta.items(): hgroup.attrs[key] = value @@ -222,220 +224,6 @@ class TilerWriter(DynamicWriter): super().write(data=data, overwrite=overwrite, meta=meta) -# Alan: we use complex numbers because... -@timed() -def save_complex(array, dataset): - # append array, an 1D array of complex numbers, onto dataset, a 2D array of real numbers - n = len(array) - if n > 0: - dataset.resize(dataset.shape[0] + n, axis=0) - dataset[-n:, 0] = array.real - dataset[-n:, 1] = array.imag - - -@timed() -def load_complex(dataset): - # convert 2D dataset into a 1D array of complex numbers - array = dataset[:, 0] + 1j * dataset[:, 1] - return array - - -class BabyWriter(DynamicWriter): - """ - Write data stored in a Baby instance to h5 files. - - Assumes the edgemasks are of form ((max_ncells, max_tps, tile_size, tile_size), bool). - """ - - compression = "gzip" - max_ncells = 2e5 # Alan: Could just make this None - max_tps = 1e3 # Could just make this None - # the number of cells in a chunk for edge masks - chunk_cells = 25 - default_tile_size = 117 - datatypes = { - "centres": ((None, 2), np.uint16), - "position": ((None,), np.uint16), - "angles": ((None,), h5py.vlen_dtype(np.float32)), - "radii": ((None,), h5py.vlen_dtype(np.float32)), - "edgemasks": ( - (max_ncells, max_tps, default_tile_size, default_tile_size), - bool, - ), - "ellipse_dims": ((None, 2), np.float32), - "cell_label": ((None,), np.uint16), - "trap": ((None,), np.uint16), - "timepoint": ((None,), np.uint16), - # "mother_assign": ((None,), h5py.vlen_dtype(np.uint16)), - "mother_assign_dynamic": ((None,), np.uint16), - "volumes": ((None,), np.float32), - } - group = "cell_info" - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self._traps_initialised = False - - def __init_trap_info(self): - # requires traps to have been initialised - trap_metadata = load_attributes(self.file, "trap_info") - tile_size = trap_metadata.get("tile_size", self.default_tile_size) - max_tps = self.metadata["time_settings/ntimepoints"][0] - self.datatypes["edgemasks"] = ( - (self.max_ncells, max_tps, tile_size, tile_size), - bool, - ) - self._traps_initialised = True - - def __init_edgemasks(self, hgroup, edgemasks, current_indices, n_cells): - # create the values dataset in the h5 file - # holds the edge masks and has shape (n_tps, n_cells, tile_size, tile_size) - key = "edgemasks" - max_shape, dtype = self.datatypes[key] - shape = (n_cells, 1) + max_shape[2:] - chunks = (self.chunk_cells, 1) + max_shape[2:] - val_dset = hgroup.create_dataset( - "values", - shape=shape, - maxshape=max_shape, - dtype=dtype, - chunks=chunks, - compression=self.compression, - ) - val_dset[:, 0] = edgemasks - # create index dataset in the h5 file: - # the (trap, cell_id) description used to index into the values and has shape (n_cells, 2) - ix_max_shape = (max_shape[0], 2) - ix_shape = (0, 2) - ix_dtype = np.uint16 - ix_dset = hgroup.create_dataset( - "indices", - shape=ix_shape, - maxshape=ix_max_shape, - dtype=ix_dtype, - compression=self.compression, - ) - save_complex(current_indices, ix_dset) - - def __append_edgemasks(self, hgroup, edgemasks, current_indices): - val_dset = hgroup["values"] - ix_dset = hgroup["indices"] - existing_indices = load_complex(ix_dset) - # check if there are any new labels - available = np.in1d(current_indices, existing_indices) - missing = current_indices[~available] - all_indices = np.concatenate([existing_indices, missing]) - # resizing - debug_t = perf_counter() # for timing code for debugging - n_tps = val_dset.shape[1] + 1 - n_add_cells = len(missing) - # resize dataset for Time and Cells - new_shape = (val_dset.shape[0] + n_add_cells, n_tps) + val_dset.shape[ - 2: - ] - val_dset.resize(new_shape) - logging.debug(f"Timing:resizing:{perf_counter() - debug_t}") - # write data - cell_indices = np.where(np.in1d(all_indices, current_indices))[0] - for ix, mask in zip(cell_indices, edgemasks): - try: - val_dset[ix, n_tps - 1] = mask - except Exception as e: - logging.debug( - "Exception: {}:{}, {}, {}".format( - e, ix, n_tps, val_dset.shape - ) - ) - # save the index values - save_complex(missing, ix_dset) - - def write_edgemasks(self, data, keys, hgroup): - """ - Write edgemasks to h5 file. - - Parameters - ---------- - data: list of arrays - Data to be written, in the form (trap_ids, cell_labels, edgemasks) - keys: list of str - Names corresponding to the elements of data. - For example: ["trap", "cell_label", "edgemasks"] - hgroup: group object - Group to write to in h5 file. - """ - if not self._traps_initialised: - self.__init_trap_info() - key = "edgemasks" - val_key = "values" - traps, cell_labels, edgemasks = data - n_cells = len(cell_labels) - hgroup = hgroup.require_group(key) - # create complex indices with traps as real part and cell_labels as imaginary part - current_indices = np.array(traps) + 1j * np.array(cell_labels) - if val_key not in hgroup: - self.__init_edgemasks(hgroup, edgemasks, current_indices, n_cells) - else: - self.__append_edgemasks(hgroup, edgemasks, current_indices) - - def write( - self, data: dict, overwrite: list, tp: int = None, meta: dict = {} - ): - """ - Write data from a Baby instance, including edgemasks. - - Parameters - ---------- - data: dict - A dict of datasets and data - overwrite: list of str - A list of datasets to overwrite - tp: int - The time point of interest - meta: dict, optional - Metadata to be written as attributes of the h5 file - """ - with h5py.File(self.file, "a") as store: - hgroup = store.require_group(self.group) - # write data - for key, value in data.items(): - if key not in self.datatypes: - raise KeyError( - f"BabyWriter: No defined data type for key {key}" - ) - else: - try: - if key.startswith("attrs/"): - # metadata - key = key.split("/")[1] - hgroup.attrs[key] = value - elif key in overwrite: - # delete and replace existing dataset - self._overwrite(value, key, hgroup) - elif key == "edgemasks": - keys = ["trap", "cell_label", "edgemasks"] - value = [data[x] for x in keys] - edgemask_dset = hgroup.get(key + "/values", None) - if ( - edgemask_dset - and tp < edgemask_dset[()].shape[1] - ): - # data already exists - print( - f"BabyWriter: Skipping edgemasks in tp {tp}" - ) - else: - self.write_edgemasks(value, keys, hgroup) - else: - # append or create new dataset - self._append(value, key, hgroup) - except Exception as e: - print(key, value) - raise (e) - # write metadata - for key, value in meta.items(): - hgroup.attrs[key] = value - - class LinearBabyWriter(DynamicWriter): """ Write data stored in a Baby instance to h5 files. @@ -456,7 +244,6 @@ class LinearBabyWriter(DynamicWriter): "cell_label": ((None,), np.uint16), "trap": ((None,), np.uint16), "timepoint": ((None,), np.uint16), - # "mother_assign": ((None,), h5py.vlen_dtype(np.uint16)), "mother_assign_dynamic": ((None,), np.uint16), "volumes": ((None,), np.float32), } @@ -667,14 +454,6 @@ class Writer(BridgeH5): if overwrite == "overwrite": # TODO refactor overwriting if path in f: del f[path] - # elif overwrite == "accumulate": # Add a number if needed - # if path in f: - # parent, name = path.rsplit("/", maxsplit=1) - # n = sum([x.startswith(name) for x in f[path]]) - # path = path + str(n).zfill(3) - # elif overwrite == "skip": - # if path in f: - # logging.debug("Skipping dataset {}".format(path)) logging.debug( "{} {} to {} and {} metadata fields".format( overwrite, type(data), path, len(meta) @@ -791,7 +570,7 @@ class Writer(BridgeH5): dset[()] = df.index.get_level_values(level=name).tolist() # create dataset and write columns if ( - df.columns.dtype == np.int + df.columns.dtype == int or df.columns.dtype == np.dtype("uint") or df.columns.name == "timepoint" ): @@ -828,9 +607,7 @@ class Writer(BridgeH5): ) # split indices in existing and additional new = df.index.tolist() - if ( - df.index.nlevels == 1 - ): + if df.index.nlevels == 1: # cover cases with a single index new = [(x,) for x in df.index.tolist()] ( @@ -931,8 +708,6 @@ class Writer(BridgeH5): return existing_cells, new_cells - - def locate_indices(existing, new): if new.any(): if new.shape[1] > 1: diff --git a/src/agora/logging.py b/src/agora/logging.py new file mode 100644 index 0000000000000000000000000000000000000000..a004b0182363d495c32ffcb2edf9d51cd8875303 --- /dev/null +++ b/src/agora/logging.py @@ -0,0 +1,20 @@ +#!/usr/bin/env jupyter +""" +Add general logging functions and decorators +""" + +import logging +from time import perf_counter + + +def timer(func): + # Log duration of a function into aliby logfile + def wrap_func(*args, **kwargs): + t1 = perf_counter() + result = func(*args, **kwargs) + logging.getLogger("aliby").debug( + f"{func.__qualname__} took {(perf_counter()-t1):.4f}s" + ) + return result + + return wrap_func diff --git a/src/agora/utils/example.py b/src/agora/utils/example.py deleted file mode 100644 index e3ff571acc0cb780e2feb92007111dbe041a91fb..0000000000000000000000000000000000000000 --- a/src/agora/utils/example.py +++ /dev/null @@ -1,53 +0,0 @@ -"""This is an example module to show the structure.""" -from typing import Union - - -class ExampleClass: - """This is an example class to show the structure.""" - - def __init__(self, parameter: int): - """This class takes one parameter and is used to add one to that - parameter. - - :param parameter: The parameter for this class - """ - self.parameter = parameter - - def add_one(self): - """Takes the parameter and adds one. - - >>> x = ExampleClass(1) - >>> x.add_one() - 2 - - :return: the parameter + 1 - """ - return self.parameter + 1 - - def add_n(self, n: int): - """Adds n to the class instance's parameter. - - For instance - >>> x = ExampleClass(1) - >>> x.add_n(10) - 11 - - :param n: The number to add - :return: the parameter + n - """ - return self.parameter + n - - -def example_function(parameter: Union[int, str]): - """This is a factory function for an ExampleClass. - - :param parameter: the parameter to give to the example class - :return: An example class - """ - try: - return ExampleClass(int(parameter)) - except ValueError as e: - raise ValueError( - f"The parameter {parameter} could not be turned " - f"into an integer." - ) from e diff --git a/src/agora/utils/lineage.py b/src/agora/utils/lineage.py index 5b6686863f0262e515a6164db29c17dbecd80920..52fb552bd530e872ec013b119040cc5d4bba8764 100644 --- a/src/agora/utils/lineage.py +++ b/src/agora/utils/lineage.py @@ -20,60 +20,3 @@ def mb_array_to_dict(mb_array: np.ndarray): for mo, daughters in groupsort(mo_da).items() } - -def mb_array_to_indices(mb_array: np.ndarray): - """ - Convert a lineage ndarray (trap, mother_id, daughter_id) - into a dictionary of lists ( mother_id ->[daughters_ids] ) - """ - return pd.MultiIndex.from_arrays(mb_array[:, :2].T).union( - pd.MultiIndex.from_arrays(mb_array[:, [0, 2]].T) - ) - - -def group_matrix( - matrix: np.ndarray, - n_keys: int = 2, -) -> t.Dict[t.Tuple[int], t.List[int]]: - """Group a matrix of integers by grouping the first two columns - and setting the third one in a list. - - - Parameters - ---------- - matrix : np.ndarray - id_matrix, generally its columns are three integers indicating trap, - mother and daughter. - n_keys : int - number of keys to use to determine groups. - - Returns - ------- - t.Dict[t.Tuple[int], t.Collection[int, ...]] - The column(s) not used for generaeting keys are grouped as values. - - Examples - -------- - FIXME: Add docs. - - """ - lineage_dict = {} - if len(matrix): - - daughter = matrix[:, n_keys] - mother_global_id = matrix[:, :n_keys] - - iterator = groupby( - zip(mother_global_id, daughter), lambda x: str(x[0]) - ) - lineage_dict = {key: [x[1] for x in group] for key, group in iterator} - - def str_to_tuple(k: str) -> t.Tuple[int, ...]: - return tuple([int(x) for x in re.findall("[0-9]+", k)]) - - # Convert keys from str to tuple - lineage_dict = { - str_to_tuple(k): sorted(v) for k, v in lineage_dict.items() - } - - return lineage_dict diff --git a/src/aliby/baby_client.py b/src/aliby/baby_client.py index 8fcd0f0622578edd5de349ba745dbc40a8726c21..3f0c174560d4763e41b0c379eb514887c8a2d379 100644 --- a/src/aliby/baby_client.py +++ b/src/aliby/baby_client.py @@ -171,75 +171,6 @@ class BabyRunner(StepABC): return format_segmentation(segmentation, tp) -class BabyClient: - """A dummy BabyClient object for Dask Demo. - - - Does segmentation one time point at a time. - Should work better with the parallelisation. - """ - - bf_channel = 0 - model_name = "prime95b_brightfield_60x_5z" - url = "http://localhost:5101" - max_tries = 50 - sleep_time = 0.1 - - def __init__(self, tiler): - self.tiler = tiler - self._session = None - - @property - def session(self): - if self._session is None: - r_session = requests.get(self.url + f"/session/{self.model_name}") - r_session.raise_for_status() - self._session = r_session.json()["sessionid"] - return self._session - - def get_data(self, tp): - return self.tiler.get_tp_data(tp, self.bf_channel).swapaxes(1, 3) - - # def queue_image(self, img, **kwargs): - # bit_depth = img.dtype.itemsize * 8 # bit depth = byte_size * 8 - # data = create_request(img.shape, bit_depth, img, **kwargs) - # status = requests.post( - # self.url + f"/segment?sessionid={self.session}", - # data=data, - # headers={"Content-Type": data.content_type}, - # ) - # status.raise_for_status() - # return status - - def get_segmentation(self): - try: - seg_response = requests.get( - self.url + f"/segment?sessionid={self.session}", timeout=120 - ) - seg_response.raise_for_status() - result = seg_response.json() - except Timeout as e: - raise e - except HTTPError as e: - raise e - return result - - def run_tp(self, tp, **kwargs): - # Get data - img = self.get_data(tp) - # Queue image - _ = self.queue_image(img, **kwargs) - # Get segmentation - for _ in range(self.max_tries): - try: - seg = self.get_segmentation() - break - except (Timeout, HTTPError): - time.sleep(self.sleep_time) - continue - return format_segmentation(seg, tp) - - def choose_model_from_params( modelset_filter=None, camera="prime95b", diff --git a/src/aliby/haystack.py b/src/aliby/haystack.py index 340584c5bf95e7d57c9417fc4731c80fb6fe02be..d1368ffd7715a4c6285cf6a6b2e2e79edf5d6319 100644 --- a/src/aliby/haystack.py +++ b/src/aliby/haystack.py @@ -37,8 +37,6 @@ def timer(func, *args, **kwargs): ################## CUSTOM OBJECTS ################################## - - class ModelPredictor: """Generic object that takes a NN and returns the prediction. @@ -77,32 +75,3 @@ class ModelPredictorWriter(DynamicWriter): "timepoint": ((None,), np.uint16), } self.group = f"{self.name}_info" - - -class Saver: - channel_names = {0: "BrightField", 1: "GFP"} - - def __init__(self, tiler, save_directory, pos_name): - """This class straight up saves the trap data for use with neural networks in the future.""" - self.tiler = tiler - self.name = pos_name - self.save_dir = Path(save_directory) - - def channel_dir(self, index): - ch_dir = self.save_dir / self.channel_names[index] - if not ch_dir.exists(): - ch_dir.mkdir() - return ch_dir - - def get_data(self, tp, ch): - return self.tiler.get_tp_data(tp, ch).swapaxes(1, 3).swapaxes(1, 2) - - def cache(self, tp): - # Get a given time point - # split into channels - for ch in self.channel_names: - ch_dir = self.channel_dir(ch) - data = self.get_data(tp, ch) - for tid, trap in enumerate(data): - np.save(ch_dir / f"{self.name}_{tid}_{tp}.npy", trap) - return diff --git a/src/aliby/io/image.py b/src/aliby/io/image.py index 033cae2574d86197a50dc57c3c1bee296d523dcc..7e021fcdfa6e582443161addcb5d5eb9e4d8bc1d 100644 --- a/src/aliby/io/image.py +++ b/src/aliby/io/image.py @@ -1,8 +1,22 @@ #!/usr/bin/env python3 +""" +Image: Loads images and registers them. + +Image instances loads images from a specified directory into an object that +also contains image properties such as name and metadata. Pixels from images +are stored in dask arrays; the standard way is to store them in 5-dimensional +arrays: T(ime point), C(channel), Z(-stack), Y, X. + +This module consists of a base Image class (BaseLocalImage). ImageLocalOME +handles local OMERO images. ImageDir handles cases in which images are split +into directories, with each time point and channel having its own image file. +ImageDummy is a dummy class for silent failure testing. +""" import typing as t -from abc import ABC, abstractproperty +from abc import ABC, abstractmethod, abstractproperty from datetime import datetime +from importlib_resources import files from pathlib import Path, PosixPath import dask.array as da @@ -13,6 +27,11 @@ from tifffile import TiffFile from agora.io.metadata import dir_to_meta +def get_examples_dir(): + """Get examples directory which stores dummy image for tiler""" + return files("aliby").parent.parent / "examples" / "tiler" + + def get_image_class(source: t.Union[str, int, t.Dict[str, str], PosixPath]): """ Wrapper to pick the appropiate Image class depending on the source of data. @@ -35,10 +54,10 @@ def get_image_class(source: t.Union[str, int, t.Dict[str, str], PosixPath]): class BaseLocalImage(ABC): """ - Base class to set path and provide context management method. + Base Image class to set path and provide context management method. """ - _default_dimorder = "tczxy" + _default_dimorder = "tczyx" def __init__(self, path: t.Union[str, PosixPath]): # If directory, assume contents are naturally sorted @@ -47,6 +66,12 @@ class BaseLocalImage(ABC): def __enter__(self): return self + def __exit__(self, *exc): + for e in exc: + if e is not None: + print(e) + return False + def rechunk_data(self, img): # Format image using x and y size from metadata. @@ -56,12 +81,16 @@ class BaseLocalImage(ABC): 1, 1, 1, - self._meta["size_x"], self._meta["size_y"], + self._meta["size_x"], ), ) return self._rechunked_img + @abstractmethod + def get_data_lazy(self) -> da.Array: + pass + @abstractproperty def name(self): pass @@ -74,23 +103,130 @@ class BaseLocalImage(ABC): def data(self): return self.get_data_lazy() - def __enter__(self): - return self - - def __exit__(self, *exc): - for e in exc: - if e is not None: - print(e) - return False - @property def metadata(self): return self._meta +class ImageDummy(BaseLocalImage): + """ + Dummy Image class. + + ImageDummy mimics the other Image classes in such a way that it is accepted + by Tiler. The purpose of this class is for testing, in particular, + identifying silent failures. If something goes wrong, we should be able to + know whether it is because of bad parameters or bad input data. + + For the purposes of testing parameters, ImageDummy assumes that we already + know the tiler parameters before Image instances are instantiated. This is + true for a typical pipeline run. + """ + + def __init__(self, tiler_parameters: dict): + """Builds image instance + + Parameters + ---------- + tiler_parameters : dict + Tiler parameters, in dict form. Following + aliby.tile.tiler.TilerParameters, the keys are: "tile_size" (size of + tile), "ref_channel" (reference channel for tiling), and "ref_z" + (reference z-stack, 0 to choose a default). + """ + self.ref_channel = tiler_parameters["ref_channel"] + self.ref_z = tiler_parameters["ref_z"] + + # Goal: make Tiler happy. + @staticmethod + def pad_array( + image_array: da.Array, + dim: int, + n_empty_slices: int, + image_position: int = 0, + ): + """Extends a dimension in a dask array and pads with zeros + + Extends a dimension in a dask array that has existing content, then pads + with zeros. + + Parameters + ---------- + image_array : da.Array + Input dask array + dim : int + Dimension in which to extend the dask array. + n_empty_slices : int + Number of empty slices to extend the dask array by, in the specified + dimension/axis. + image_position : int + Position within the new dimension to place the input arary, default 0 + (the beginning). + + Examples + -------- + ``` + extended_array = pad_array( + my_da_array, dim = 2, n_empty_slices = 4, image_position = 1) + ``` + Extends a dask array called `my_da_array` in the 3rd dimension + (dimensions start from 0) by 4 slices, filled with zeros. And puts the + original content in slice 1 of the 3rd dimension + """ + # Concats zero arrays with same dimensions as image_array, and puts + # image_array as first element in list of arrays to be concatenated + zeros_array = da.zeros_like(image_array) + return da.concatenate( + [ + *([zeros_array] * image_position), + image_array, + *([zeros_array] * (n_empty_slices - image_position)), + ], + axis=dim, + ) + + # Logic: We want to return a image instance + def get_data_lazy(self) -> da.Array: + """Return 5D dask array. For lazy-loading multidimensional tiff files. Dummy image.""" + examples_dir = get_examples_dir() + # TODO: Make this robust to having multiple TIFF images, one for each z-section, + # all falling under the same "pypipeline_unit_test_00_000001_Brightfield_*.tif" + # naming scheme. The aim is to create a multidimensional dask array that stores + # the z-stacks. + img_filename = "pypipeline_unit_test_00_000001_Brightfield_003.tif" + img_path = examples_dir / img_filename + # img is a dask array has three dimensions: z, x, y + # TODO: Write a test to confirm this: If everything worked well, + # z = 1, x = 1200, y = 1200 + img = imread(str(img_path)) + # Adds t & c dimensions + img = da.reshape( + img, (1, 1, img.shape[-3], img.shape[-2], img.shape[-1]) + ) + # Pads t, c, and z dimensions + img = self.pad_array( + img, dim=0, n_empty_slices=199 + ) # 200 timepoints total + img = self.pad_array(img, dim=1, n_empty_slices=2) # 3 channels + img = self.pad_array( + img, dim=2, n_empty_slices=4, image_position=self.ref_z + ) # 5 z-stacks + return img + + @property + def name(self): + pass + + @property + def dimorder(self): + pass + + class ImageLocalOME(BaseLocalImage): """ - Fetch image from OMEXML data format, in which a multidimensional tiff image contains the metadata. + Local OMERO Image class. + + This is a derivative Image class. It fetches an image from OMEXML data format, + in which a multidimensional tiff image contains the metadata. """ def __init__(self, path: str, dimorder=None): @@ -190,7 +326,7 @@ class ImageDir(BaseLocalImage): """ Image class for the case in which all images are split in one or multiple folders with time-points and channels as independent files. - It inherits from Imagelocal so we only override methods that are critical. + It inherits from BaseLocalImage so we only override methods that are critical. Assumptions: - One folders per position. diff --git a/src/aliby/io/omero.py b/src/aliby/io/omero.py index d7d659783a1764b804459974c2a917ce72b2b88d..62475a0ccce8e7e4f123531f92e93ee0e89806e7 100644 --- a/src/aliby/io/omero.py +++ b/src/aliby/io/omero.py @@ -39,9 +39,10 @@ class BridgeOmero: def __init__( self, - host="islay.bio.ed.ac.uk", - username="upload", - password="***REMOVED***", + host: str = None, + username: str = None, + password: str = None, + ome_id: int = None, ): """ Parameters @@ -50,22 +51,34 @@ class BridgeOmero: web address of OMERO host username: string password : string + ome_id: Optional int + Unique identifier on Omero database. Used to fetch specific objects. """ + # assert all((host, username, password)), str(f"Invalid credentials host:{host}, user:{username}, pass:{pass}") + assert all( + (host, username, password) + ), f"Invalid credentials. host: {host}, user: {username}, pwd: {password}" + self.conn = None self.host = host self.username = username self.password = password + self.ome_id = ome_id # standard method required for Python's with statement def __enter__(self): self.create_gate() - self.init_wrapper() return self - def init_wrapper(self): + @property + def ome_class(self): # Initialise Omero Object Wrapper for instances when applicable. - if hasattr(self, "ome_id"): + if not hasattr(self, "_ome_class"): + assert ( + self.conn.isConnected() and self.ome_id is not None + ), "No Blitz connection or valid omero id" + ome_type = [ valid_name for valid_name in ("Dataset", "Image") @@ -75,7 +88,11 @@ class BridgeOmero: re.IGNORECASE, ) ][0] - self.ome_class = self.conn.getObject(ome_type, self.ome_id) + self._ome_class = self.conn.getObject(ome_type, self.ome_id) + + assert self._ome_class, f"{ome_type} {self.ome_id} not found." + + return self._ome_class def create_gate(self) -> bool: self.conn = BlitzGateway( @@ -124,10 +141,6 @@ class BridgeOmero: def set_id(self, ome_id: int): self.ome_id = ome_id - @abstractmethod - def init_interface(self): - ... - @property def file_annotations(self): valid_annotations = [ @@ -158,10 +171,8 @@ class BridgeOmero: class Dataset(BridgeOmero): - def __init__(self, expt_id, **server_info): - self.ome_id = expt_id - - super().__init__(**server_info) + def __init__(self, expt_id: str or int, **server_info): + super().__init__(ome_id=expt_id, **server_info) @property def name(self): @@ -272,12 +283,7 @@ class Image(BridgeOmero): server_info: dictionary Specifies the host, username, and password as strings """ - self.ome_id = image_id - super().__init__(**server_info) - - def init_interface(self, ome_id: int): - self.set_id(ome_id) - self.ome_class = self.conn.getObject("Image", ome_id) + super().__init__(ome_id=image_id, **server_info) @classmethod def from_h5( @@ -301,9 +307,6 @@ class Image(BridgeOmero): # metadata = load_attributes(filepath) bridge = BridgeH5(filepath) image_id = bridge.meta_h5["image_id"] - # server_info = safe_load(bridge.meta_h5["parameters"])["general"][ - # "server_info" - # ] return cls(image_id, **cls.server_info_from_h5(filepath)) @property @@ -353,7 +356,6 @@ class UnsafeImage(Image): """ super().__init__(image_id, **server_info) self.create_gate() - self.init_wrapper() @property def data(self): diff --git a/src/aliby/io/utils.py b/src/aliby/io/utils.py deleted file mode 100644 index d59a2f65de12ece9fc0ed63ff37e7adfa2f1e801..0000000000000000000000000000000000000000 --- a/src/aliby/io/utils.py +++ /dev/null @@ -1,50 +0,0 @@ -import re -import struct - - -def clean_ascii(text): - return re.sub(r"[^\x20-\x7F]", ".", text) - - -def xxd(x, start=0, stop=None): - if stop is None: - stop = len(x) - for i in range(start, stop, 8): - # Row number - print("%04d" % i, end=" ") - # Hexadecimal bytes - for r in range(i, i + 8): - print("%02x" % x[r], end="") - if (r + 1) % 4 == 0: - print(" ", end="") - # ASCII - print( - " ", - clean_ascii(x[i : i + 8].decode("utf-8", errors="ignore")), - " ", - end="", - ) - # Int32 - print( - "{:>10} {:>10}".format(*struct.unpack("II", x[i : i + 8])), - end=" ", - ) - print("") # Newline - return - - -# Buffer reading functions -def read_int(buffer, n=1): - res = struct.unpack("I" * n, buffer.read(4 * n)) - if n == 1: - res = res[0] - return res - - -def read_string(buffer): - return "".join([x.decode() for x in iter(lambda: buffer.read(1), b"\x00")]) - - -def read_delim(buffer, n): - delim = read_int(buffer, n) - assert all([x == 0 for x in delim]), "Unknown nonzero value in delimiter" diff --git a/src/aliby/pipeline.py b/src/aliby/pipeline.py index 10917b64a3b8a3f1118366ef3600a5251d44546c..b68180f00dc92defd3fe86480624f87cde0f3987 100644 --- a/src/aliby/pipeline.py +++ b/src/aliby/pipeline.py @@ -114,7 +114,7 @@ class PipelineParameters(ParametersABC): ntps_to_eval=5, ), logfile_level="INFO", - logstream_level="WARNING", + use_explog=True, ) } @@ -477,7 +477,7 @@ class Pipeline(ProcessABC): step == "tiler" and i == min_process_from ): - print( + logging.getLogger("aliby").info( f"Found {steps['tiler'].n_traps} traps in {image.name}" ) elif ( @@ -693,7 +693,9 @@ class Pipeline(ProcessABC): except Exception: pass - meta.run() + if config["general"]["use_explog"]: + meta.run() + meta.add_fields( # Add non-logfile metadata { "aliby_version": version("aliby"), diff --git a/src/aliby/tile/tiler.py b/src/aliby/tile/tiler.py index 7309672048ab1f2d19120c96e6b689acbd294566..34aa89d1c2af76976f134a0fd38d58919cedc9bb 100644 --- a/src/aliby/tile/tiler.py +++ b/src/aliby/tile/tiler.py @@ -15,7 +15,7 @@ One key method is Tiler.run. The image-processing is performed by traps/segment_traps. -The experiment is stored as an array with a standard indexing order of (Time, Channels, Z-stack, Y, X). +The experiment is stored as an array with a standard indexing order of (Time, Channels, Z-stack, X, Y). """ import re import typing as t @@ -30,7 +30,7 @@ from skimage.registration import phase_cross_correlation from agora.abc import ParametersABC, StepABC from agora.io.writer import BridgeH5 -from aliby.io.image import ImageLocalOME, ImageDir +from aliby.io.image import ImageLocalOME, ImageDir, ImageDummy from aliby.tile.traps import segment_traps @@ -247,6 +247,43 @@ class Tiler(StepABC): self.tile_size = self.tile_size or min(self.image.shape[-2:]) + @classmethod + def dummy(cls, parameters: dict): + """ + Instantiate dummy Tiler from dummy image + + If image.dimorder exists dimensions are saved in that order. + Otherwise default to "tczyx". + + Parameters + ---------- + parameters: dictionary output of an instance of TilerParameters + """ + imgdmy_obj = ImageDummy(parameters) + dummy_image = imgdmy_obj.get_data_lazy() + # Default to "tczyx" if image.dimorder is None + dummy_omero_metadata = { + f"size_{dim}": dim_size + for dim, dim_size in zip( + imgdmy_obj.dimorder or "tczyx", dummy_image.shape + ) + } + dummy_omero_metadata.update( + { + "channels": [ + parameters["ref_channel"], + *(["nil"] * (dummy_omero_metadata["size_c"] - 1)), + ], + "name": "", + } + ) + + return cls( + imgdmy_obj.data, + dummy_omero_metadata, + TilerParameters.from_dict(parameters), + ) + @classmethod def from_image(cls, image, parameters: TilerParameters): """ @@ -281,8 +318,6 @@ class Tiler(StepABC): trap_locs = TrapLocations.read_hdf5(filepath) metadata = BridgeH5(filepath).meta_h5 metadata["channels"] = image.metadata["channels"] - # metadata["zsectioning/nsections"] = image.metadata["zsectioning/nsections"] - # metadata["channels/zsect"] = image.metadata["channels/zsect"] if parameters is None: parameters = TilerParameters.default() tiler = cls( @@ -323,15 +358,10 @@ class Tiler(StepABC): @property def shape(self): """ - Returns properties of the time-lapse experiment - no of channels - no of time points - no of z stacks - no of pixels in y direction - no of pixels in z direction + Returns properties of the time-lapse as shown by self.image.shape + """ - c, t, z, y, x = self.image.shape - return (c, t, x, y, z) + return self.image.shape @property def n_processed(self): diff --git a/src/aliby/tile/traps.py b/src/aliby/tile/traps.py index 8d1d776c8bbc17a5c843fa3d145c324a20baba5e..4eddeb7e45a0f39ea0de28c865b79b685061da5b 100644 --- a/src/aliby/tile/traps.py +++ b/src/aliby/tile/traps.py @@ -140,9 +140,6 @@ def segment_traps( return traps_retry -### - - def identify_trap_locations( image, trap_template, optimize_scale=True, downscale=0.35, trap_size=None ): @@ -240,103 +237,10 @@ def identify_trap_locations( def stretch_image(image): + # FIXME Used in aliby.utils.imageViewer image = ((image - image.min()) / (image.max() - image.min())) * 255 minval = np.percentile(image, 2) maxval = np.percentile(image, 98) image = np.clip(image, minval, maxval) image = (image - minval) / (maxval - minval) - return image - - -def get_tile_shapes(x, tile_size): - half_size = tile_size // 2 - xmin = int(x[0] - half_size) - ymin = max(0, int(x[1] - half_size)) - - return xmin, xmin + tile_size, ymin, ymin + tile_size - - -def in_image(img, xmin, xmax, ymin, ymax, xidx=2, yidx=3): - if xmin >= 0 and ymin >= 0: - if xmax < img.shape[xidx] and ymax < img.shape[yidx]: - return True - else: - return False - - -def get_xy_tile(img, xmin, xmax, ymin, ymax, xidx=2, yidx=3, pad_val=None): - if pad_val is None: - pad_val = np.median(img) - # Get the tile from the image - idx = [slice(None)] * len(img.shape) - idx[xidx] = slice(max(0, xmin), min(xmax, img.shape[xidx])) - idx[yidx] = slice(max(0, ymin), min(ymax, img.shape[yidx])) - tile = img[tuple(idx)] - # Check if the tile is in the image - if in_image(img, xmin, xmax, ymin, ymax, xidx, yidx): - return tile - else: - # Add padding - pad_shape = [(0, 0)] * len(img.shape) - pad_shape[xidx] = (max(-xmin, 0), max(xmax - img.shape[xidx], 0)) - pad_shape[yidx] = (max(-ymin, 0), max(ymax - img.shape[yidx], 0)) - tile = np.pad(tile, pad_shape, constant_values=pad_val) - return tile - - -def tile_where(centre, x, y, MAX_X, MAX_Y): - # Find the position of the tile - xmin = int(centre[1] - x // 2) - ymin = int(centre[0] - y // 2) - xmax = xmin + x - ymax = ymin + y - # What do we actually have available? - r_xmin = max(0, xmin) - r_xmax = min(MAX_X, xmax) - r_ymin = max(0, ymin) - r_ymax = min(MAX_Y, ymax) - return xmin, ymin, xmax, ymax, r_xmin, r_ymin, r_xmax, r_ymax - - -def get_tile(shape, center, raw_expt, ch, t, z): - """Returns a tile from the raw experiment with a given shape. - - :param shape: The shape of the tile in (C, T, Z, Y, X) order. - :param center: The x,y position of the centre of the tile - :param - """ - _, _, x, y, _ = shape - _, _, MAX_X, MAX_Y, _ = raw_expt.shape - tile = np.full(shape, np.nan) - - # Find the position of the tile - xmin = int(center[1] - x // 2) - ymin = int(center[0] - y // 2) - xmax = xmin + x - ymax = ymin + y - # What do we actually have available? - r_xmin = max(0, xmin) - r_xmax = min(MAX_X, xmax) - r_ymin = max(0, ymin) - r_ymax = min(MAX_Y, ymax) - - # Fill values - tile[ - :, - :, - (r_xmin - xmin) : (r_xmax - xmin), - (r_ymin - ymin) : (r_ymax - ymin), - :, - ] = raw_expt[ch, t, r_xmin:r_xmax, r_ymin:r_ymax, z] - # fill_val = np.nanmedian(tile) - # np.nan_to_num(tile, nan=fill_val, copy=False) - return tile - - -def centre(img, percentage=0.3): - y, x = img.shape - cropx = int(np.ceil(x * percentage)) - cropy = int(np.ceil(y * percentage)) - startx = int(x // 2 - (cropx // 2)) - starty = int(y // 2 - (cropy // 2)) - return img[starty : starty + cropy, startx : startx + cropx] + return image \ No newline at end of file diff --git a/src/aliby/utils/argo.py b/src/aliby/utils/argo.py index 24255065e089a4a6b091f7b821d2bf594d2f4b2e..b4d80c364ab7cf62396660e0f19bd92a04673332 100644 --- a/src/aliby/utils/argo.py +++ b/src/aliby/utils/argo.py @@ -282,10 +282,6 @@ class OmeroExplorer: for k, v in self.cache.items() } - # @staticfunction - # def number_of_X(logfile: str): - # return re.findall("X", logfile) - def dset_count( self, dset: t.Union[int, _DatasetWrapper], @@ -351,17 +347,6 @@ class Argo(OmeroExplorer): super().__init__(*args, **kwargs) -def get_creds(): - return ( - "upload", - "***REMOVED***", # OMERO Password - ) - - -def list_files(dset): - return {x for x in dset.listAnnotations() if hasattr(x, "getFileName")} - - def annot_from_dset(dset, kind): v = [x for x in dset.listAnnotations() if hasattr(x, "getFileName")] infname = kind if kind == "log" else kind.title() @@ -374,7 +359,6 @@ def annot_from_dset(dset, kind): except Exception as e: print(f"Conversion from acquisition file failed: {e}") return {} - return acq @@ -393,6 +377,7 @@ def check_channels(acq, channels, _all=True): def get_chs(exptype): + # TODO Documentation exptypes = { "dual_ph": ("GFP", "pHluorin405", "mCherry"), "ph": ("GFP", "pHluorin405"), @@ -403,6 +388,7 @@ def get_chs(exptype): def load_annot_from_cache(exp_id, cache_dir="cache/"): + # TODO Documentation if type(cache_dir) is not PosixPath: cache_dir = Path(cache_dir) @@ -428,16 +414,6 @@ def parse_annot(str_io, fmt): return parser.parse(io.StringIO(str_io)) -def get_log_date(annot_sets): - log = get_annot(annot_sets, "log") - return log.get("date", None) - - -def get_log_microscope(annot_sets): - log = get_annot(annot_sets, "log") - return log.get("microscope", None) - - def get_annotsets(dset): annot_files = [ annot.getFile() @@ -457,12 +433,8 @@ def get_annotsets(dset): return annot_sets -# def has_tags(d, tags): -# if set(tags).intersection(annot_from_dset(d, "log").get("omero_tags", [])): -# return True - - def load_acq(dset): + # TODO Documentation try: acq = annot_from_dset(dset, kind="acq") return acq @@ -472,6 +444,7 @@ def load_acq(dset): def has_channels(dset, exptype): + # TODO Documentation acq = load_acq(dset) if acq: return check_channels(acq, get_chs(exptype)) @@ -479,26 +452,8 @@ def has_channels(dset, exptype): return -# Custom functions -def compare_dsets_voltages_exp(dsets): - a = {} - for d in dsets: - try: - acq = annot_from_dset(d, kind="acq")["channels"] - a[d.getId()] = { - k: (v, e) - for k, v, e in zip( - acq["channel"], acq["voltage"], acq["exposure"] - ) - } - - except Exception as e: - print(d, f"Data set voltage comparison did not work:{e}") - - return a - - def get_logfile(dset): + # TODO Documentation annot_file = [ annot.getFile() for annot in dset.listAnnotations() diff --git a/src/aliby/utils/cache.py b/src/aliby/utils/cache.py deleted file mode 100644 index a256c6350932f495423fec4f67ade468b9d607a6..0000000000000000000000000000000000000000 --- a/src/aliby/utils/cache.py +++ /dev/null @@ -1,141 +0,0 @@ -""" -Utility functions and classes -""" -import itertools -import logging -import operator -from functools import partial, wraps -from pathlib import Path -from time import perf_counter -from typing import Callable - -import cv2 -import h5py -import imageio -import numpy as np - - -def repr_obj(obj, indent=0): - """ - Helper function to display info about OMERO objects. - Not all objects will have a "name" or owner field. - """ - string = """%s%s:%s Name:"%s" (owner=%s)""" % ( - " " * indent, - obj.OMERO_CLASS, - obj.getId(), - obj.getName(), - obj.getAnnotation(), - ) - - return string - - -def imread(path): - return cv2.imread(str(path), -1) - - -class ImageCache: - """HDF5-based image cache for faster loading of the images once they've - been read. - """ - - def __init__(self, file, name, shape, remote_fn): - self.store = h5py.File(file, "a") - # Create a dataset - self.dataset = self.store.create_dataset( - name, shape, dtype=np.float, fill_value=np.nan - ) - self.remote_fn = remote_fn - - def __getitem__(self, item): - cached = self.dataset[item] - if np.any(np.isnan(cached)): - full = self.remote_fn(item) - self.dataset[item] = full - return full - else: - return cached - - -class Cache: - """ - Fixed-length mapping to use as a cache. - Deletes items in FIFO manner when maximum allowed length is reached. - """ - - def __init__(self, max_len=5000, load_fn: Callable = imread): - """ - :param max_len: Maximum number of items in the cache. - :param load_fn: The function used to load new items if they are not - available in the Cache - """ - self._dict = dict() - self._queue = [] - self.load_fn = load_fn - self.max_len = max_len - - def __getitem__(self, item): - if item not in self._dict: - self.load_item(item) - return self._dict[item] - - def load_item(self, item): - self._dict[item] = self.load_fn(item) - # Clean up the queue - self._queue.append(item) - if len(self._queue) > self.max_len: - del self._dict[self._queue.pop(0)] - - def clear(self): - self._dict.clear() - self._queue.clear() - - -def accumulate(lst: list): - lst = sorted(lst) - it = itertools.groupby(lst, operator.itemgetter(0)) - for key, sub_iter in it: - yield key, [x[1] for x in sub_iter] - - -def get_store_path(save_dir, store, name): - """Create a path to a position-specific store. - - This combines the name and the store's base name into a file path within save_dir. - For example. - >>> get_store_path('data', 'baby_seg.h5', 'pos001') - Path(data/pos001baby_seg.h5') - - :param save_dir: The root directory in which to save the file, absolute - path. - :param store: The base name of the store - :param name: The name of the position - :return: Path(save_dir) / name+store - """ - store = Path(save_dir) / store - store = store.with_name(name + store.name) - return store - - -def parametrized(dec): - def layer(*args, **kwargs): - def repl(f): - return dec(f, *args, **kwargs) - - return repl - - return layer - - -@parametrized -def timed(f, name=None): - @wraps(f) - def decorated(*args, **kwargs): - t = perf_counter() - res = f(*args, **kwargs) - to_print = name or f.__name__ - logging.debug(f"Timing:{to_print}:{perf_counter() - t}s") - return res - - return decorated diff --git a/src/extraction/core/extractor.py b/src/extraction/core/extractor.py index 894b43219953a7a76b5dc1488582a68fccceada6..36d03cb8c814de84e1fec6cc8a4e667733abf980 100644 --- a/src/extraction/core/extractor.py +++ b/src/extraction/core/extractor.py @@ -261,7 +261,7 @@ class Extractor(StepABC): channel_ids = None if z is None: # gets the tiles data via tiler - z: t.List[int] = list(range(self.tiler.shape[-1])) + z: t.List[int] = list(range(self.tiler.shape[-3])) tiles = ( self.tiler.get_tiles_timepoint( tp, channels=channel_ids, z=z, **kwargs @@ -450,6 +450,7 @@ class Extractor(StepABC): An example is d["GFP"]["np_max"]["mean"][0], which gives a tuple of the calculated mean GFP fluorescence for all cells. """ + # TODO Can we split the different extraction types into sub-methods to make this easier to read? if tree is None: # use default tree: extraction_tree = self.params.tree diff --git a/src/extraction/core/functions/io.py b/src/extraction/core/functions/io.py deleted file mode 100644 index 3b377bdffe47c991264e262ca83a18c7dbd726b0..0000000000000000000000000000000000000000 --- a/src/extraction/core/functions/io.py +++ /dev/null @@ -1,12 +0,0 @@ -from yaml import dump, load - - -def dict_to_yaml(d, f): - with open(f, "w") as f: - dump(d, f) - - -def add_attrs(hdfile, path, files): - group = hdfile.create_group(path) - for k, v in files: - group.attrs[k] = v diff --git a/src/extraction/core/functions/utils.py b/src/extraction/core/functions/utils.py deleted file mode 100644 index dcae68c9bde33a86108cf5a6c5bda701c80420e5..0000000000000000000000000000000000000000 --- a/src/extraction/core/functions/utils.py +++ /dev/null @@ -1,19 +0,0 @@ -from collections import deque - - -def depth(d): - """ - Copied from https://stackoverflow.com/a/23499088 - - Used to determine the depth of our config trees and fill them - """ - queue = deque([(id(d), d, 1)]) - memo = set() - while queue: - id_, o, level = queue.popleft() - if id_ in memo: - continue - memo.add(id_) - if isinstance(o, dict): - queue += ((id(v), v, level + 1) for v in o.values()) - return level diff --git a/src/extraction/core/functions/versioning.py b/src/extraction/core/functions/versioning.py index 40c1e8f9c96d0e17af4fb88c8452da9c70fbe065..4c77a9f8ebdc4ce65626c6ac433ddd85194f01eb 100644 --- a/src/extraction/core/functions/versioning.py +++ b/src/extraction/core/functions/versioning.py @@ -2,6 +2,7 @@ import git def get_sha(): + # FIXME Unused, but *should* be used... repo = git.Repo(search_parent_directories=True) sha = repo.head.object.hexsha return sha diff --git a/src/extraction/core/omero.py b/src/extraction/core/omero.py deleted file mode 100644 index e69aa7473a493dff46bb720dd4e8387d0a46bca0..0000000000000000000000000000000000000000 --- a/src/extraction/core/omero.py +++ /dev/null @@ -1,35 +0,0 @@ -from omero.gateway import BlitzGateway -from tqdm import tqdm - - -# Helper funs -def connect_omero(): - conn = BlitzGateway(*get_creds(), host="islay.bio.ed.ac.uk", port=4064) - conn.connect() - return conn - - -def get_creds(): - return ( - "upload", - "***REMOVED***", # OMERO Password - ) - - -def download_file(f): - """ - Download file in chunks using FileWrapper object - """ - desc = ( - "Downloading " - + f.getFileName() - + " (" - + str(round(f.getFileSize() / 1000**2, 2)) - + "Mb)" - ) - - down_file = bytearray() - for c in tqdm(f.getFileInChunks(), desc=desc): - down_file += c - - return down_file diff --git a/src/logfile_parser/swainlab_parser.py b/src/logfile_parser/swainlab_parser.py index 1a0d1763acd492093d1a5e5da76e9ff40382832c..cb39750adcba447cc45b01cdf17b5d3a71bfd326 100644 --- a/src/logfile_parser/swainlab_parser.py +++ b/src/logfile_parser/swainlab_parser.py @@ -1,5 +1,5 @@ #!/usr/bin/env jupyter - +# TODO should this be merged to the regular logfile_parser structure? """ Description of new logfile: @@ -301,18 +301,3 @@ def parse_x(string: str, type: str, **kwargs): def parse_from_swainlab_grammar(filepath: t.Union[str, PosixPath]): return parse_from_grammar(filepath, grammar) - - -# test_file = "/home/alan/Documents/dev/skeletons/scripts/dev/C1_60x.log" -# test_file = "/home/alan/Documents/dev/skeletons/scripts/dev/bak" -# test_file = "/home/alan/Documents/dev/skeletons/scripts/dev/two_tables.log" -# test_file = "/home/alan/Downloads/pH_med_to_low 1.log" -# test_file = "/home/alan/Documents/dev/skeletons/scripts/data/577_2022_12_20_pHCalibrate6_7_00/pHCalibrate6_7.log" - - -# d = parse_from_grammar(test_file, grammar) -# print(d) - -# from logfile_parser.legacy import get_legacy_log_example_interface - -# original = get_legacy_log_example_interface() diff --git a/src/postprocessor/benchmarks/post_processing.py b/src/postprocessor/benchmarks/post_processing.py index d17651a8dc839d19abba7f38528437dbc22c3a03..d36e2afbae5c0e40e219dd322e8e4acf81a9836b 100644 --- a/src/postprocessor/benchmarks/post_processing.py +++ b/src/postprocessor/benchmarks/post_processing.py @@ -1,3 +1,4 @@ +# TODO remove/to snippets? """ Post-processing utilities diff --git a/src/postprocessor/core/abc.py b/src/postprocessor/core/abc.py index a299e19f6f87a33e0d9a6c5c099b291f377811dc..ffb677b4d7e5ad7be5249e23df552b7355bdf1f6 100644 --- a/src/postprocessor/core/abc.py +++ b/src/postprocessor/core/abc.py @@ -16,6 +16,7 @@ class PostProcessABC(ProcessABC): @classmethod def as_function(cls, data, *extra_data, **kwargs): + # FIXME can this be a __call__ method instead? # Find the parameter's default parameters = cls.default_parameters(**kwargs) return cls(parameters=parameters).run(data, *extra_data) diff --git a/src/postprocessor/core/group.py b/src/postprocessor/core/group.py deleted file mode 100644 index 5613e5de2a0fd4981f98e7771bab47eda9307802..0000000000000000000000000000000000000000 --- a/src/postprocessor/core/group.py +++ /dev/null @@ -1,137 +0,0 @@ -""" -Class to group multiple positions into one using one different available criteria. -""" - -import re -from pathlib import Path - -import h5py -import pandas as pd -from agora.io.bridge import groupsort -from agora.io.signal import Signal - -from postprocessor.core.abc import ParametersABC, ProcessABC - - -class GroupParameters(ParametersABC): - def __init__(self, by="name", processes=[], signals=[]): - self.by = by - self.signals = signals - self.processes = processes - - @classmethod - def default(cls): - return cls.from_dict({"by": "name", "signals": [], "processes": []}) - - -class Group(ProcessABC): - def __init__(self, parameters): - super().__init__(parameters) - - def get_position_filenames(self, exp_root, poses): - """ - Get filenames as a dictionary where the key is the position and value the filename. - """ - central_store = Path(exp_root) / "store.h5" - if central_store.exists(): - hdf = h5py.File(central_store, "r") - self.filenames = [ - pos.attrs["filename"] for pos in hdf["/positions/"] - ] - hdf.close() - else: # If no central store just list position files in expt root folder - fullfiles = [x for x in Path(exp_root).glob("*store.h5")] - files = [x.name for x in Path(exp_root).glob("*store.h5")] - filenames = [False for _ in poses] - for i, pos in enumerate(poses): - matches = [ - True if re.match(pos + ".*.h5", fname) else False - for fname in files - ] - if any(matches): - assert sum(matches) == 1, "More than one match" - filenames[i] = (pos, fullfiles[matches.index(True)]) - - self.filenames = { - fname[0]: fname[1] for fname in filenames if fname - } - - self.positions = list(self.filenames.keys()) - return self.filenames - - def get_signals(self): - # hdf = h5py.File(central_store, "r") - # keys_d = groupsort(keys) - self.signals = {pos: {} for pos in self.filenames.keys()} - for pos, fname in self.filenames.items(): - for signal in self.parameters.signals: - self.signals[pos][signal] = pd.read_hdf(fname, signal) - - return self.signals - - def gen_groups(self): - if self.by == "group": # Use group names in metadata - pass - elif self.by == "name": # Infer groups from signal concatenation - # Remove last four characters and find commonalities larger than 4 - # characters between posnames and group them that way. - groupnames = list(set([x[:-3] for x in self.positions])) - self.group_signal_tree = {group: [] for group in groupnames} - self.poses_grouped = {group: [] for group in groupnames} - for pos in self.positions: - group = groupnames[groupnames.index(pos[:-3])] - self.group_signal_tree[group].append(self.signals[pos]) - self.poses_grouped[group].append(pos) - - elif ( - type(self.by) == tuple - ): # Manually give groups as tuple or list of positions - pass - - def concat_signals(self): - self.concated_signals = {group: {} for group in self.group_signal_tree} - for k, group in self.group_signal_tree.items(): - for signal in self.parameters.signals: - self.concated_signals[k][signal] = pd.concat( - [g[signal] for g in group], keys=self.poses_grouped[k] - ) - - return self.concated_signals - - def process_signals(self, grouped_signals): - pass - - def run(self, central_store, poses): - - self.get_position_filenames(central_store, poses) - self.get_signals() - self.gen_groups() - self.concat_signals() - # processed_signals = self.process_signals(grouped_signals) - - return self.concated_signals - # return processed_signals - - -poses = [ - x.name.split("store")[0] - for x in Path( - "/shared_libs/pipeline-core/scripts/data/ph_calibration_dual_phl_ura8_5_04_5_83_7_69_7_13_6_59__01" - ).rglob("*") - if x.name != "images.h5" -] -gr = Group( - GroupParameters( - signals=[ - "/extraction/general/None/area", - "/extraction/mCherry/np_max/median", - ] - ) -) -gr.run( - central_store="/shared_libs/pipeline-core/scripts/data/ph_calibration_dual_phl_ura8_5_04_5_83_7_69_7_13_6_59__01", - poses=poses, -) -signal = Signal( - "/shared_libs/pipeline-core/scripts/data/ph_calibration_dual_phl_ura8_5_04_5_83_7_69_7_13_6_59__01/ph_5_04_001store.h5" -) diff --git a/src/postprocessor/core/lineageprocess.py b/src/postprocessor/core/lineageprocess.py index 34ccf2349430c839ab1616c1d779ca0ff995dd27..f10d5b3e3fef47e23724115ab0f99669a5b6ad94 100644 --- a/src/postprocessor/core/lineageprocess.py +++ b/src/postprocessor/core/lineageprocess.py @@ -1,3 +1,4 @@ +# TODO Module docstring import typing as t from abc import abstractmethod @@ -7,8 +8,6 @@ import pandas as pd from agora.abc import ParametersABC from postprocessor.core.abc import PostProcessABC -# from agora.utils.lineage import group_matrix - class LineageProcessParameters(ParametersABC): """ @@ -47,18 +46,14 @@ class LineageProcess(PostProcessABC): Overrides PostProcess.as_function classmethod. Lineage functions require lineage information to be passed if run as function. """ - # if isinstance(lineage, np.ndarray): - # lineage = group_matrix(lineage, n_keys=2) - parameters = cls.default_parameters(**kwargs) return cls(parameters=parameters).run( data, lineage=lineage, *extra_data ) - # super().as_function(data, *extra_data, lineage=lineage, **kwargs) def load_lineage(self, lineage): """ Reshape the lineage information if needed """ - + # TODO does this need to be a function? self.lineage = lineage diff --git a/src/postprocessor/core/processor.py b/src/postprocessor/core/processor.py index 37efd0d4128d7ce6744fd0397cd0748ccfe30c52..4b9161c0fe7b8a1a70b60a162fb4d575dba3c494 100644 --- a/src/postprocessor/core/processor.py +++ b/src/postprocessor/core/processor.py @@ -56,26 +56,16 @@ class PostProcessorParameters(ParametersABC): "/extraction/general/None/volume", ], ], - # [ - # "savgol", - # [ - # "/extraction/general/None/volume", - # ], - # ], [ "dsignal", [ "/extraction/general/None/volume", - # "/postprocessing/savgol/extraction_general_None_volume", ], ], [ "bud_metric", [ "/extraction/general/None/volume", - # "/postprocessing/dsignal/postprocessing_savgol_extraction_general_None_volume", - # "/postprocessing/savgol/extraction_general_None_volume", - # "/postprocessing/dsignal/extraction_general_None_volume", ], ], [ @@ -84,15 +74,6 @@ class PostProcessorParameters(ParametersABC): "/postprocessing/bud_metric/extraction_general_None_volume", ], ], - # [ - # "aggregate", - # [ - # [ - # "/extraction/general/None/volume", - # "postprocessing/dsignal/extraction_general_None_volume", - # ], - # ], - # ], ], } param_sets = { @@ -105,22 +86,12 @@ class PostProcessorParameters(ParametersABC): outpaths["aggregate"] = "/postprocessing/experiment_wide/aggregated/" if "ph_batman" in kind: - # targets["processes"]["bud_metric"].append( - # [ - # [ - # "/extraction/em_ratio/np_max/mean", - # "/extraction/em_ratio/np_max/median", - # ], - # ] - # ) targets["processes"]["dsignal"].append( [ "/extraction/em_ratio/np_max/mean", "/extraction/em_ratio/np_max/median", "/extraction/em_ratio_bgsub/np_max/mean", "/extraction/em_ratio_bgsub/np_max/median", - # "/postprocessing/bud_metric/extraction_em_ratio_np_max_mean", - # "/postprocessing/bud_metric/extraction_em_ratio_np_max_median", ] ) targets["processes"]["aggregate"].append( @@ -132,10 +103,6 @@ class PostProcessorParameters(ParametersABC): "/extraction/em_ratio_bgsub/np_max/median", "/extraction/gsum/np_max/median", "/extraction/gsum/np_max/mean", - # "postprocessing/bud_metric/extraction_em_ratio_np_max_mean", - # "postprocessing/bud_metric/extraction_em_ratio_np_max_median", - # "postprocessing/dsignal/postprocessing_bud_metric_extraction_em_ratio_np_max_median", - # "postprocessing/dsignal/postprocessing_bud_metric_extraction_em_ratio_np_max_mean", ] ], ) @@ -178,6 +145,7 @@ class PostProcessor(ProcessABC): self.targets = parameters["targets"] def run_prepost(self): + # TODO Split function """Important processes run before normal post-processing ones""" merge_events = self.merger.run( @@ -301,12 +269,7 @@ class PostProcessor(ProcessABC): return x def run(self): - # import cProfile - # import pstats - - # profile = cProfile.Profile() - # profile.enable() - + # TODO Documentation :) + Split self.run_prepost() for process, datasets in tqdm(self.targets["processes"]): diff --git a/src/postprocessor/core/reshapers/bud_metric.py b/src/postprocessor/core/reshapers/bud_metric.py index 0893db667c5250fe20e2447d17712eb920d98e8d..c527134dc95ac69ef3e6b6d54e0b2e67b32fc41a 100644 --- a/src/postprocessor/core/reshapers/bud_metric.py +++ b/src/postprocessor/core/reshapers/bud_metric.py @@ -33,7 +33,6 @@ class bud_metric(LineageProcess): mother_bud_ids: Dict[pd.Index, Tuple[pd.Index]] = None, ): if mother_bud_ids is None: - # filtered_lineage = self.filter_signal_cells(signal) mother_bud_ids = mb_array_to_dict(self.lineage) return self.get_bud_metric(signal, mother_bud_ids) diff --git a/src/postprocessor/core/reshapers/buddings.py b/src/postprocessor/core/reshapers/buddings.py index 0b01dad70c5eab067f26b430e5451fdf70bcb4e2..4b3fbba9db0a2a8b61c69903afe78664c1dcddca 100644 --- a/src/postprocessor/core/reshapers/buddings.py +++ b/src/postprocessor/core/reshapers/buddings.py @@ -25,16 +25,17 @@ class buddingsParameters(LineageProcessParameters): FIXME: Add docs. """ - _defaults = {"lineage_location": "postprocessing/lineage_merged"} +# TODO Why not capitalized? class buddings(LineageProcess): """ Calculate buddings in a trap assuming one mother per trap returns a pandas series with the buddings """ + # TODO might want to define "buddings" more scientifically def __init__(self, parameters: buddingsParameters): super().__init__(parameters) diff --git a/src/postprocessor/core/reshapers/merger.py b/src/postprocessor/core/reshapers/merger.py index 16d2b598ea720027a2d0ca566adfc1b73cc5c31a..1fbf4155fd04fe41b7a76e6aeb0253a6746bf2b2 100644 --- a/src/postprocessor/core/reshapers/merger.py +++ b/src/postprocessor/core/reshapers/merger.py @@ -3,7 +3,7 @@ from agora.abc import ParametersABC from postprocessor.core.abc import PostProcessABC from postprocessor.core.functions.tracks import get_joinable - +# TODO Why not capitalized? class mergerParameters(ParametersABC): """ :param tol: float or int threshold of average (prediction error/std) necessary @@ -22,6 +22,7 @@ class mergerParameters(ParametersABC): } +# TODO Why not capitalized? class merger(PostProcessABC): """ Combines rows of tracklet that are likely to be the same. @@ -34,8 +35,4 @@ class merger(PostProcessABC): joinable = [] if signal.shape[1] > 4: joinable = get_joinable(signal, tol=self.parameters.tolerance) - # merged, _ = merge_tracks(signal) # , min_len=self.window + 1) - # indices = (*zip(*merged.index.tolist()),) - # names = merged.index.names - # return {name: ids for name, ids in zip(names, indices)} return joinable diff --git a/src/postprocessor/core/reshapers/picker.py b/src/postprocessor/core/reshapers/picker.py index c4ada42e14e803371f6a8d34213cb0896dc142d2..b64bfcee9f4f795498b28e7920f35b041726e5e1 100644 --- a/src/postprocessor/core/reshapers/picker.py +++ b/src/postprocessor/core/reshapers/picker.py @@ -1,34 +1,20 @@ -# from abc import ABC, abstractmethod - -# from copy import copy -# from itertools import groupby -# from typing import List, Tuple, Union import typing as t -from typing import Union -# import igraph as ig import numpy as np import pandas as pd from agora.abc import ParametersABC from agora.io.cells import Cells -# from postprocessor.core.functions.tracks import max_nonstop_ntps, max_ntps from agora.utils.association import validate_association from postprocessor.core.lineageprocess import LineageProcess -# from utils_find_1st import cmp_equal, find_1st - class pickerParameters(ParametersABC): _defaults = { "sequence": [ ["lineage", "intersection", "families"], - # ["condition", "intersection", "any_present", 0.7], - # ["condition", "intersection", "growing", 80], ["condition", "intersection", "present", 7], - # ["condition", "intersection", "mb_guess", 3, 0.7], - # ("lineage", "intersection", "full_families"), ], } @@ -80,16 +66,8 @@ class picker(LineageProcess): idx = idx[valid_indices] mothers_daughters = mothers_daughters[valid_lineage] - - # return mothers_daughters, idx return idx - def loc_lineage(self, kymo: pd.DataFrame, how: str, lineage=None): - _, valid_indices = self.pick_by_lineage( - kymo, how, mothers_daughters=lineage - ) - return kymo.loc[[tuple(x) for x in valid_indices]] - def pick_by_condition(self, signals, condition, thresh): idx = self.switch_case(signals, condition, thresh) return idx @@ -131,7 +109,7 @@ class picker(LineageProcess): self, signals: pd.DataFrame, condition: str, - threshold: Union[float, int, list], + threshold: t.Union[float, int, list], ): if len(threshold) == 1: threshold = [_as_int(*threshold, signals.shape[1])] @@ -145,7 +123,7 @@ class picker(LineageProcess): return set(signals.index[case_mgr[condition](signals, *threshold)]) -def _as_int(threshold: Union[float, int], ntps: int): +def _as_int(threshold: t.Union[float, int], ntps: int): if type(threshold) is float: threshold = ntps * threshold return threshold diff --git a/src/postprocessor/grouper.py b/src/postprocessor/grouper.py index 167c0b5e205775b825b1950041e741a523b519dc..ec04a6f5b64a783884807e783f641dc43622497b 100644 --- a/src/postprocessor/grouper.py +++ b/src/postprocessor/grouper.py @@ -15,11 +15,6 @@ import pandas as pd import seaborn as sns from pathos.multiprocessing import Pool -from agora.utils.kymograph import ( - drop_level, - get_mother_ilocs_from_daughters, - intersection_matrix, -) from postprocessor.chainer import Chainer diff --git a/tests/agora/example_test.py b/tests/agora/example_test.py deleted file mode 100644 index 539b410aefd240a77593a2a0509c3e36117e82ba..0000000000000000000000000000000000000000 --- a/tests/agora/example_test.py +++ /dev/null @@ -1,25 +0,0 @@ -"""This is an example test file to show the structure.""" -import pytest - -from agora.utils.example import ExampleClass, example_function - - -class TestExampleClass: - x = ExampleClass(1) - - def test_add_one(self): - assert self.x.add_one() == 2 - - def test_add_n(self): - assert self.x.add_n(10) == 11 - - -def test_example_function(): - x = example_function(1) - assert isinstance(x, ExampleClass) - assert x.parameter == 1 - - -def test_example_function_fail(): - with pytest.raises(ValueError): - example_function("hello") diff --git a/tests/aliby/network/test_baby_client.py b/tests/aliby/network/test_baby_client.py deleted file mode 100644 index a8d131a04d3a06ce03f4634252c4e497111f986f..0000000000000000000000000000000000000000 --- a/tests/aliby/network/test_baby_client.py +++ /dev/null @@ -1,91 +0,0 @@ -import pytest - -pytest.mark.skip - -import json -import time - -import numpy as np - -# from aliby.experiment import ExperimentLocal -from aliby.baby_client import BabyClient -from aliby.tile.tiler import Tiler - - -@pytest.mark.skip( - reason="No longer usable, requires local files. Kept until replaced." -) -def test_client(): - root_dir = ( - "/Users/s1893247/PhD/pipeline-core/data/glclvl_0" - ".1_mig1_msn2_maf1_sfp1_dot6_03" - ) - - expt = ExperimentLocal(root_dir, finished=True) - seg_expt = Tiler(expt, finished=True) - - print(seg_expt.positions) - seg_expt.current_position = "pos007" - - config = { - "camera": "evolve", - "channel": "brightfield", - "zoom": "60x", - "n_stacks": "5z", - } - - baby_client = BabyClient(expt, **config) - - print("The session is {}".format(baby_client.sessions["default"])) - - # Channel 0, 0, X,Y,Z all - num_timepoints = 5 - - traps_tps = [ - seg_expt.get_tiles_timepoint( - tp, tile_size=81, channels=[0], z=[0, 1, 2, 3, 4] - ).squeeze() - for tp in range(num_timepoints) - ] - - segmentations = [] - try: - for i, timpoint in enumerate(traps_tps): - print("Sending timepoint {};".format(i)) - status = baby_client.queue_image( - timpoint, - baby_client.sessions["default"], - assign_mothers=True, - return_baprobs=True, - with_edgemasks=True, - ) - while True: - try: - print("Loading.", end="") - result = baby_client.get_segmentation( - baby_client.sessions["default"] - ) - except: - print(".", end="") - time.sleep(1) - continue - break - print("Received timepoint {}".format(i)) - segmentations.append(result) - except Exception as e: - print(segmentations) - raise e - - with open("segmentations.json", "w") as fd: - json.dump(segmentations, fd) - - print("Done.") - # print(len(segmentations[0])) - # for i in range(5): - # print("trap {}".format(i)) - # for k, v in segmentations[0][i].items(): - # print(k, v) - # - # import matplotlib.pyplot as plt - # plt.imshow(np.squeeze(batches[0][0, ..., 0])) - # plt.savefig('test_baby.pdf') diff --git a/tests/aliby/network/test_post_processing.py b/tests/aliby/network/test_post_processing.py index 3ac8ba403602778a92fa482f4c64395df59663b0..dbf9b8a9bb9e0bfc4703d79b93dd1f28d4f68016 100644 --- a/tests/aliby/network/test_post_processing.py +++ b/tests/aliby/network/test_post_processing.py @@ -7,14 +7,6 @@ import skimage.morphology as morph from scipy import ndimage from skimage import draw -# from aliby.post_processing import ( -# circle_outline, -# conical, -# ellipse_perimeter, -# union_of_spheres, -# volume_of_sphere, -# ) - @pytest.mark.skip( reason="No longer usable, post_processing unused inside aliby. Kept temporarily" diff --git a/tests/aliby/network/test_tiler.py b/tests/aliby/network/test_tiler.py index 52187b58ee4db91d11856bf7cdd6584adf4c1b6d..1ec1755c11ee519c9bdcd1556ed331dfb2d97e49 100644 --- a/tests/aliby/network/test_tiler.py +++ b/tests/aliby/network/test_tiler.py @@ -19,6 +19,12 @@ def define_parser(): return parser +def initialise_dummy(): + tiler_parameters = TilerParameters.default().to_dict() + dummy_tiler = Tiler.dummy(tiler_parameters) + return dummy_tiler + + def initialise_objects(data_path, template=None): image = ImageLocalOME(data_path) tiler = Tiler.from_image(image, TilerParameters.default()) @@ -53,6 +59,8 @@ if __name__ == "__main__": parser = define_parser() args = parser.parse_args() + dummy_tiler = initialise_dummy() + tiler = initialise_objects(args.root_dir, template=args.template) if args.position is not None: diff --git a/tests/aliby/pipeline/test_image.py b/tests/aliby/pipeline/test_image.py new file mode 100644 index 0000000000000000000000000000000000000000..3a2114006c0bd2703bbfed159ee99201853bae05 --- /dev/null +++ b/tests/aliby/pipeline/test_image.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python3 +import numpy as np +import dask.array as da +import pytest + +from aliby.io.image import ImageDummy + +tiler_parameters = {"tile_size": 117, "ref_channel": "Brightfield", "ref_z": 0} + +sample_da = da.from_array(np.array([[1, 2], [3, 4]])) +# Make it 5-dimensional +sample_da = da.reshape( + sample_da, (1, 1, 1, sample_da.shape[-2], sample_da.shape[-1]) +) + + +@pytest.mark.parametrize("sample_da", [sample_da]) +@pytest.mark.parametrize("dim", [2]) +@pytest.mark.parametrize("n_empty_slices", [4]) +@pytest.mark.parametrize("image_position", [1]) +def test_pad_array(sample_da, dim, n_empty_slices, image_position): + """Test ImageDummy.pad_array() method""" + # create object + imgdmy = ImageDummy(tiler_parameters) + # pads array + padded_da = imgdmy.pad_array( + sample_da, + dim=dim, + n_empty_slices=n_empty_slices, + image_position=image_position, + ) + + # select which dimension to index the multidimensional array + indices = {dim: image_position} + ix = [ + indices.get(dim, slice(None)) + for dim in range(padded_da.compute().ndim) + ] + + # Checks that original image array is there and is at the correct index + assert np.array_equal(padded_da.compute()[ix], sample_da.compute()[0]) + # Checks that the additional axis is extended correctly + assert padded_da.compute().shape[dim] == n_empty_slices + 1 diff --git a/tests/postprocessor/test_interpolate.py b/tests/postprocessor/test_interpolate.py new file mode 100644 index 0000000000000000000000000000000000000000..c9c993de2bf0833de6fa053b1993576588a3f7ee --- /dev/null +++ b/tests/postprocessor/test_interpolate.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 +import numpy as np +import pandas as pd +from postprocessor.core.processes.interpolate import ( + interpolate, + interpolateParameters, +) + + +def dummy_signal_array(n_cells, n_tps): + """Creates dummy signal array, i.e. increasing gradient""" + signal = np.array([np.linspace(1, 2, n_tps) for _ in range(n_cells)]) + return signal + + +def test_dummy_signal_array(): + ds = dummy_signal_array(5, 10) + # Check dimensions + assert ds.shape[0] == 5 + assert ds.shape[1] == 10 + + +def randomly_add_na(input_array, num_of_na): + """Randomly replaces a 2d numpy array with NaNs, number of NaNs specified""" + input_array.ravel()[ + np.random.choice(input_array.size, num_of_na, replace=False) + ] = np.nan + return input_array + + +def test_interpolate(): + dummy_array = dummy_signal_array(5, 10) + # Poke holes so interpolate can fill + holey_array = randomly_add_na(dummy_array, 15) + + dummy_signal = pd.DataFrame(dummy_array) + holey_signal = pd.DataFrame(holey_array) + + interpolate_runner = interpolate(interpolateParameters.default()) + interpolated_signal = interpolate_runner.run(holey_signal) + + subtr = interpolated_signal - dummy_signal + # Check that interpolated values are the ones that exist in the dummy + assert np.nansum(subtr.to_numpy()) == 0 + # TODO: Check that if there are NaNs remaining after interpolation, they + # are at the ends