From 6f2b55bc346d2230f2a1aa1fcced9a57ad2f2da5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Al=C3=A1n=20Mu=C3=B1oz?= <alan.munoz@ed.ac.uk> Date: Tue, 20 Dec 2022 18:45:10 +0000 Subject: [PATCH] [WIP] refactor(pipe): generalise metadata usage --- src/agora/io/metadata.py | 77 ++++++++++++++++++++++- src/agora/io/signal.py | 9 +-- src/aliby/io/dataset.py | 6 +- src/aliby/pipeline.py | 13 ++-- src/extraction/core/functions/defaults.py | 2 +- 5 files changed, 89 insertions(+), 18 deletions(-) diff --git a/src/agora/io/metadata.py b/src/agora/io/metadata.py index d5a5c04e..c66f9bb8 100644 --- a/src/agora/io/metadata.py +++ b/src/agora/io/metadata.py @@ -1,12 +1,15 @@ import glob import os +import typing as t from datetime import datetime +from pathlib import Path, PosixPath import pandas as pd from pytz import timezone from agora.io.writer import Writer from logfile_parser import Parser +from logfile_parser.swainlab_parser import parse_from_swainlab_grammar class MetaData: @@ -21,7 +24,8 @@ class MetaData: return self.load_logs()[item] def load_logs(self): - parsed_flattened = parse_logfiles(self.log_dir) + # parsed_flattened = parse_logfiles(self.log_dir) + parsed_flattened = dispatch_metadata_parser(self.log_dir) return parsed_flattened def run(self, overwrite=False): @@ -65,10 +69,16 @@ def datetime_to_timestamp(time, locale="Europe/London"): def find_file(root_dir, regex): file = glob.glob(os.path.join(str(root_dir), regex)) - if len(file) != 1: - return None + if len(file) > 1: + print( + "Warning:Metadata: More than one logfile found. Defaulting to first option." + ) + file = [file[0]] + if len(file) == 0: + print("Warning:Metadata: No valid logfile found.") else: return file[0] + return None # TODO: re-write this as a class if appropriate @@ -114,3 +124,64 @@ def parse_logfiles( parsed_flattened[k] = [0 if el is None else el for el in v] return parsed_flattened + + +def get_meta_swainlab(parsed_metadata: dict): + """ + Convert raw parsing of Swainlab logfile to the metadata interface. + + Input: + -------- + parsed_metadata: Dict[str, str or int or DataFrame or Dict] + default['general', 'image_config', 'device_properties', 'group_position', 'group_time', 'group_config'] + + Returns: + -------- + Dictionary with metadata following the standard + + """ + channels = parsed_metadata["image_config"]["Image config"].values.tolist() + # nframes = int(parsed_metadata["group_time"]["frames"].max()) + + # return {"channels": channels, "nframes": nframes} + return {"channels": channels} + + +def get_meta_from_legacy(parsed_metadata: dict): + channels = parsed_metadata["channels"]["channel"] + return {"channels": channels} + + +def parse_swainlab_metadata(filedir: t.Union[str, PosixPath]): + """ + Dispatcher function that determines which parser to use based on the file ending. + + Input: + -------- + filedir: Directory where the logfile is located. + + Returns: + -------- + Dictionary with minimal metadata + """ + filedir = Path(filedir) + + filepath = find_file(filedir, "*.log") + if filepath: + raw_parse = parse_from_swainlab_grammar(filepath) + minimal_meta = get_meta_swainlab(raw_parse) + else: + if filedir.is_file(): + filedir = filedir.parent + legacy_parse = parse_logfiles(filedir.parent) + minimal_meta = get_meta_from_legacy(legacy_parse) + + return minimal_meta + + +def dispatch_metadata_parser(filepath: t.Union[str, PosixPath]): + """ + Function to dispatch different metadata parsers that convert logfiles into a + basic metadata dictionary. Currently only contains the swainlab log parsers. + """ + return parse_swainlab_metadata(filepath) diff --git a/src/agora/io/signal.py b/src/agora/io/signal.py index 3cfc358f..4c6bf0f7 100644 --- a/src/agora/io/signal.py +++ b/src/agora/io/signal.py @@ -84,12 +84,7 @@ class Signal(BridgeH5): try: df.columns = (df.columns * self.tinterval // 60).astype(int) except Exception as e: - print( - """ - Warning:Can't convert columns to minutes. Signal {}.{}""".format( - df.name, e - ) - ) + print(f"Warning:Signal: Unable to convert columns to minutes: {e}") return df @cached_property @@ -110,7 +105,7 @@ class Signal(BridgeH5): @property def channels(self): with h5py.File(self.filename, "r") as f: - return f.attrs["channels/channel"] + return f.attrs["channels"] @_first_arg_str_to_df def retained(self, signal, cutoff=0.8): diff --git a/src/aliby/io/dataset.py b/src/aliby/io/dataset.py index df6639b9..e0f3b968 100644 --- a/src/aliby/io/dataset.py +++ b/src/aliby/io/dataset.py @@ -118,9 +118,13 @@ class Dataset(BridgeOmero): return self._tags def cache_logs(self, root_dir): + valid_suffixes = ("txt", "log") for name, annotation in self.files.items(): filepath = root_dir / annotation.getFileName().replace("/", "_") - if str(filepath).endswith("txt") and not filepath.exists(): + if ( + any([str(filepath).endswith(suff) for suff in valid_suffixes]) + and not filepath.exists() + ): # save only the text files with open(str(filepath), "wb") as fd: for chunk in annotation.getFileInChunks(): diff --git a/src/aliby/pipeline.py b/src/aliby/pipeline.py index 00f50a38..a3392433 100644 --- a/src/aliby/pipeline.py +++ b/src/aliby/pipeline.py @@ -97,14 +97,15 @@ class PipelineParameters(ParametersABC): try: meta_d = MetaData(directory, None).load_logs() except Exception as e: - print("WARNING: Metadata could not be loaded: {}".format(e)) - # Set minimal metadata - meta_d = { - "channels/channel": "Brightfield", - "time_settings/ntimepoints": [200], + minimal_default_meta = { + "channels": ["Brightfield"], + "ntps": [2000], } + print(f"WARNING:Metadata: error when uploading: {e}") + # Set minimal metadata + meta_d = minimal_default_meta - tps = meta_d["time_settings/ntimepoints"][0] + tps = meta_d.get("ntps", 2000) defaults = { "general": dict( id=expt_id, diff --git a/src/extraction/core/functions/defaults.py b/src/extraction/core/functions/defaults.py index e67ffa19..9f89a2e3 100644 --- a/src/extraction/core/functions/defaults.py +++ b/src/extraction/core/functions/defaults.py @@ -55,7 +55,7 @@ def exparams_from_meta( extant_fluorescence_ch = [] for av_channel in candidate_channels: # Find channels in metadata whose names match - found_channel = find_channel_name(meta["channels/channel"], av_channel) + found_channel = find_channel_name(meta["channels"], av_channel) if found_channel is not None: extant_fluorescence_ch.append(found_channel) -- GitLab