Skip to content
Snippets Groups Projects
Commit ba5266d4 authored by Alán Muñoz's avatar Alán Muñoz
Browse files

Merge 'origin/doc2' into merge_docs

parents 176aa03f 87e943f5
No related branches found
No related tags found
No related merge requests found
""" """
Tools to interact with hdf5 files and handle data consistently. Tools to interact with h5 files and handle data consistently.
""" """
import collections import collections
from itertools import chain, groupby, product from itertools import chain, groupby, product
...@@ -13,26 +13,28 @@ import yaml ...@@ -13,26 +13,28 @@ import yaml
class BridgeH5: class BridgeH5:
""" """
Base class to interact with h5 data stores. Base class to interact with h5 files.
It also contains functions useful to predict how long should segmentation take.
It includes functions that predict how long segmentation will take.
""" """
def __init__(self, filename, flag="r"): def __init__(self, filename, flag="r"):
"""Initialise with the name of the h5 file."""
self.filename = filename self.filename = filename
if flag is not None: if flag is not None:
self._hdf = h5py.File(filename, flag) self._hdf = h5py.File(filename, flag)
self._filecheck self._filecheck
def _filecheck(self): def _filecheck(self):
assert "cell_info" in self._hdf, "Invalid file. No 'cell_info' found." assert "cell_info" in self._hdf, "Invalid file. No 'cell_info' found."
def close(self): def close(self):
"""Close the h5 file."""
self._hdf.close() self._hdf.close()
@property @property
def meta_h5(self) -> t.Dict[str, t.Any]: def meta_h5(self) -> t.Dict[str, t.Any]:
# Return metadata as indicated in h5 file """Return metadata, defining it if necessary."""
if not hasattr(self, "_meta_h5"): if not hasattr(self, "_meta_h5"):
with h5py.File(self.filename, "r") as f: with h5py.File(self.filename, "r") as f:
self._meta_h5 = dict(f.attrs) self._meta_h5 = dict(f.attrs)
...@@ -44,24 +46,24 @@ class BridgeH5: ...@@ -44,24 +46,24 @@ class BridgeH5:
@staticmethod @staticmethod
def get_consecutives(tree, nstepsback): def get_consecutives(tree, nstepsback):
# Receives a sorted tree and returns the keys of consecutive elements """Receives a sorted tree and returns the keys of consecutive elements."""
vals = {k: np.array(list(v)) for k, v in tree.items()} # get tp level # get tp level
vals = {k: np.array(list(v)) for k, v in tree.items()}
# get indices of consecutive elements
where_consec = [ where_consec = [
{ {
k: np.where(np.subtract(v[n + 1 :], v[: -n - 1]) == n + 1)[0] k: np.where(np.subtract(v[n + 1 :], v[: -n - 1]) == n + 1)[0]
for k, v in vals.items() for k, v in vals.items()
} }
for n in range(nstepsback) for n in range(nstepsback)
] # get indices of consecutive elements ]
return where_consec return where_consec
def get_npairs(self, nstepsback=2, tree=None): def get_npairs(self, nstepsback=2, tree=None):
if tree is None: if tree is None:
tree = self.cell_tree tree = self.cell_tree
consecutive = self.get_consecutives(tree, nstepsback=nstepsback) consecutive = self.get_consecutives(tree, nstepsback=nstepsback)
flat_tree = flatten(tree) flat_tree = flatten(tree)
n_predictions = 0 n_predictions = 0
for i, d in enumerate(consecutive, 1): for i, d in enumerate(consecutive, 1):
flat = list(chain(*[product([k], list(v)) for k, v in d.items()])) flat = list(chain(*[product([k], list(v)) for k, v in d.items()]))
...@@ -70,55 +72,49 @@ class BridgeH5: ...@@ -70,55 +72,49 @@ class BridgeH5:
n_predictions += len(flat_tree.get(p[0], [])) * len( n_predictions += len(flat_tree.get(p[0], [])) * len(
flat_tree.get(p[1], []) flat_tree.get(p[1], [])
) )
return n_predictions return n_predictions
def get_npairs_over_time(self, nstepsback=2): def get_npairs_over_time(self, nstepsback=2):
tree = self.cell_tree tree = self.cell_tree
npairs = [] npairs = []
for t in self._hdf["cell_info"]["processed_timepoints"][()]: for tp in self._hdf["cell_info"]["processed_timepoints"][()]:
tmp_tree = { tmp_tree = {
k: {k2: v2 for k2, v2 in v.items() if k2 <= t} k: {k2: v2 for k2, v2 in v.items() if k2 <= tp}
for k, v in tree.items() for k, v in tree.items()
} }
npairs.append(self.get_npairs(tree=tmp_tree)) npairs.append(self.get_npairs(tree=tmp_tree))
return np.diff(npairs) return np.diff(npairs)
def get_info_tree( def get_info_tree(
self, fields: Union[tuple, list] = ("trap", "timepoint", "cell_label") self, fields: Union[tuple, list] = ("trap", "timepoint", "cell_label")
): ):
""" """
Returns traps, time points and labels for this position in form of a tree Return traps, time points and labels for this position in the form of a tree in the hierarchy determined by the argument fields.
in the hierarchy determined by the argument fields. Note that it is
compressed to non-empty elements and timepoints. Note that it is compressed to non-empty elements and timepoints.
Default hierarchy is: Default hierarchy is:
- trap - trap
- time point - time point
- cell label - cell label
This function currently produces trees of depth 3, but it can easily be This function currently produces trees of depth 3, but it can easily be extended for deeper trees if needed (e.g. considering groups, chambers and/or positions).
extended for deeper trees if needed (e.g. considering groups,
chambers and/or positions).
Parameters Parameters
---------- ----------
fields: Fields to fetch from 'cell_info' inside the hdf5 storage fields: list of strs
Fields to fetch from 'cell_info' inside the h5 file.
Returns Returns
---------- ----------
Nested dictionary where keys (or branches) are the upper levels Nested dictionary where keys (or branches) are the upper levels and the leaves are the last element of :fields:.
and the leaves are the last element of :fields:.
""" """
zipped_info = (*zip(*[self._hdf["cell_info"][f][()] for f in fields]),) zipped_info = (*zip(*[self._hdf["cell_info"][f][()] for f in fields]),)
return recursive_groupsort(zipped_info) return recursive_groupsort(zipped_info)
def groupsort(iterable: Union[tuple, list]): def groupsort(iterable: Union[tuple, list]):
# Sorts iterable and returns a dictionary where the values are grouped by the first element. """Sorts iterable and returns a dictionary where the values are grouped by the first element."""
iterable = sorted(iterable, key=lambda x: x[0]) iterable = sorted(iterable, key=lambda x: x[0])
grouped = { grouped = {
k: [x[1:] for x in v] for k, v in groupby(iterable, lambda x: x[0]) k: [x[1:] for x in v] for k, v in groupby(iterable, lambda x: x[0])
...@@ -127,17 +123,18 @@ def groupsort(iterable: Union[tuple, list]): ...@@ -127,17 +123,18 @@ def groupsort(iterable: Union[tuple, list]):
def recursive_groupsort(iterable): def recursive_groupsort(iterable):
# Recursive extension of groupsort """Recursive extension of groupsort."""
if len(iterable[0]) > 1: if len(iterable[0]) > 1:
return { return {
k: recursive_groupsort(v) for k, v in groupsort(iterable).items() k: recursive_groupsort(v) for k, v in groupsort(iterable).items()
} }
else: # Only two elements in list else:
# only two elements in list
return [x[0] for x in iterable] return [x[0] for x in iterable]
def flatten(d, parent_key="", sep="_"): def flatten(d, parent_key="", sep="_"):
"""Flatten nested dict. Adapted from https://stackoverflow.com/a/6027615""" """Flatten nested dict. Adapted from https://stackoverflow.com/a/6027615."""
items = [] items = []
for k, v in d.items(): for k, v in d.items():
new_key = parent_key + (k,) if parent_key else (k,) new_key = parent_key + (k,) if parent_key else (k,)
...@@ -149,18 +146,19 @@ def flatten(d, parent_key="", sep="_"): ...@@ -149,18 +146,19 @@ def flatten(d, parent_key="", sep="_"):
def attrs_from_h5(fpath: str): def attrs_from_h5(fpath: str):
"""Return attributes as dict from h5 file""" """Return attributes as dict from an h5 file."""
with h5py.File(fpath, "r") as f: with h5py.File(fpath, "r") as f:
return dict(f.attrs) return dict(f.attrs)
def parameters_from_h5(fpath: str): def parameters_from_h5(fpath: str):
"""Return parameters from an h5 file."""
attrs = attrs_from_h5(fpath) attrs = attrs_from_h5(fpath)
return yaml.safe_load(attrs["parameters"]) return yaml.safe_load(attrs["parameters"])
def image_creds_from_h5(fpath: str): def image_creds_from_h5(fpath: str):
"""Return image id and server credentials from h5""" """Return image id and server credentials from an h5."""
attrs = attrs_from_h5(fpath) attrs = attrs_from_h5(fpath)
return ( return (
attrs["image_id"], attrs["image_id"],
......
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment