From 02f7587b4b62e8144134c3a6aea15c3fad9c3e9a Mon Sep 17 00:00:00 2001 From: pswain <peter.swain@ed.ac.uk> Date: Fri, 10 Nov 2023 18:37:18 +0000 Subject: [PATCH] reverted baby_client; feat(swainlab_parser): tolerate non-ascii characters --- src/agora/io/metadata.py | 6 +- src/aliby/baby_client.py | 2 +- src/aliby/baby_sitter.py | 2 +- src/aliby/pipeline.py | 23 ++-- src/aliby/utils/imageViewer.py | 2 +- src/logfile_parser/swainlab_parser.py | 176 ++++++++++++-------------- 6 files changed, 98 insertions(+), 113 deletions(-) diff --git a/src/agora/io/metadata.py b/src/agora/io/metadata.py index d59ac406..a84765a6 100644 --- a/src/agora/io/metadata.py +++ b/src/agora/io/metadata.py @@ -148,9 +148,9 @@ def parse_logfiles( return parsed_flattened -def get_meta_swainlab(parsed_metadata: dict): +def get_channels_swainlab(parsed_metadata: dict): """ - Convert raw parsing of Swainlab logfile to the metadata interface. + Extract channels from parsed metadata. Parameters -------- @@ -180,7 +180,7 @@ def parse_swainlab_metadata(filedir: t.Union[str, Path]): if filepath: # new log files ending in .log raw_parse = parse_from_swainlab_grammar(filepath) - minimal_meta = get_meta_swainlab(raw_parse) + minimal_meta = get_channels_swainlab(raw_parse) else: # old log files ending in .txt if filedir.is_file() or str(filedir).endswith(".zarr"): diff --git a/src/aliby/baby_client.py b/src/aliby/baby_client.py index a94fe6ff..cfb7bcf5 100644 --- a/src/aliby/baby_client.py +++ b/src/aliby/baby_client.py @@ -177,7 +177,7 @@ class BabyRunner(StepABC): def choose_model_from_params( modelset_filter=None, - camera="sCMOS", + camera="prime95b", channel="brightfield", zoom="60x", n_stacks="5z", diff --git a/src/aliby/baby_sitter.py b/src/aliby/baby_sitter.py index 2bd930ba..2b56c6fa 100644 --- a/src/aliby/baby_sitter.py +++ b/src/aliby/baby_sitter.py @@ -33,7 +33,7 @@ class BabyParameters(ParametersABC): """Define default parameters; kwargs choose BABY model set.""" return cls( modelset_name=get_modelset_name_from_params(**kwargs), - clogging_thresh=0.75, + clogging_thresh=1, min_bud_tps=3, isbud_thresh=0.5, session=None, diff --git a/src/aliby/pipeline.py b/src/aliby/pipeline.py index 46b786c7..5eb5e548 100644 --- a/src/aliby/pipeline.py +++ b/src/aliby/pipeline.py @@ -15,6 +15,7 @@ from pathos.multiprocessing import Pool from tqdm import tqdm import baby +import baby.errors try: if baby.__version__ == "v0.30.1": @@ -170,11 +171,6 @@ class PipelineParameters(ParametersABC): ).to_dict() return cls(**{k: v for k, v in defaults.items()}) - def load_logs(self): - """Load and parse log files.""" - parsed_flattened = parse_logfiles(self.log_dir) - return parsed_flattened - class Pipeline(ProcessABC): """ @@ -456,14 +452,14 @@ class Pipeline(ProcessABC): tiler=pipe["steps"]["tiler"], ) # initiate progress bar - pbar = tqdm( + progress_bar = tqdm( range(min_process_from, pipe["tps"]), desc=image.name, initial=min_process_from, total=pipe["tps"], ) # run through time points - for i in pbar: + for i in progress_bar: if ( frac_clogged_traps < pipe["earlystop"]["thresh_pos_clogged"] @@ -473,9 +469,14 @@ class Pipeline(ProcessABC): for step in self.pipeline_steps: if i >= pipe["process_from"][step]: # perform step - result = pipe["steps"][step].run_tp( - i, **run_kwargs.get(step, {}) - ) + try: + result = pipe["steps"][step].run_tp( + i, **run_kwargs.get(step, {}) + ) + except baby.errors.Clogging: + logging.getLogger("aliby").warn( + "WARNING:Clogging threshold exceeded in BABY." + ) # write result to h5 file using writers # extractor writes to h5 itself if step in loaded_writers: @@ -521,7 +522,7 @@ class Pipeline(ProcessABC): f"{name}:Clogged_traps:{frac_clogged_traps}" ) frac = np.round(frac_clogged_traps * 100) - pbar.set_postfix_str(f"{frac} Clogged") + progress_bar.set_postfix_str(f"{frac} Clogged") else: # stop if too many traps are clogged self._log( diff --git a/src/aliby/utils/imageViewer.py b/src/aliby/utils/imageViewer.py index bcb54242..fd529108 100644 --- a/src/aliby/utils/imageViewer.py +++ b/src/aliby/utils/imageViewer.py @@ -4,7 +4,7 @@ ImageViewer class, used to look at individual or multiple traps over time. Example of usage: -fpath = "/home/alan/Documents/dev/skeletons/scripts/data/16543_2019_07_16_aggregates_CTP_switch_2_0glu_0_0glu_URA7young_URA8young_URA8old_01/URA8_young018.h5" +fpath = "/home/alan/data/16543/URA8_young018.h5" tile_id = 9 trange = list(range(0, 10)) diff --git a/src/logfile_parser/swainlab_parser.py b/src/logfile_parser/swainlab_parser.py index 49d04690..f9d2a3b8 100644 --- a/src/logfile_parser/swainlab_parser.py +++ b/src/logfile_parser/swainlab_parser.py @@ -53,17 +53,58 @@ from pyparsing import ( atomic = t.Union[str, int, float, bool] +# specify grammar for the Swain lab +sl_grammar = { + "general": { + "start_trigger": Literal("Swain Lab microscope experiment log file"), + "data_type": "fields", + "end_trigger": "-----Acquisition settings-----", + }, + "image_config": { + "start_trigger": "Image Configs:", + "data_type": "table", + }, + "device_properties": { + "start_trigger": "Device properties:", + "data_type": "table", + }, + "group": { + "position": { + "start_trigger": Group( + Group(Literal("group:") + Word(printables)) + + Group(Literal("field:") + "position") + ), + "data_type": "table", + }, + **{ + key: { + "start_trigger": Group( + Group(Literal("group:") + Word(printables)) + + Group(Literal("field:") + key) + ), + "data_type": "fields", + } + for key in ("time", "config") + }, + }, +} + +ACQ_START = "-----Acquisition settings-----" +HEADER_END = "-----Experiment started-----" +MAX_NLINES = 2000 # In case of malformed logfile + +ParserElement.setDefaultWhitespaceChars(" \t") + class HeaderEndNotFound(Exception): def __init__(self, message, errors): super().__init__(message) - self.errors = errors def extract_header(filepath: Path): - # header_contents = "" - with open(filepath, "r") as f: + """Extract content of log file before the experiment starts.""" + with open(filepath, "r", errors="ignore") as f: try: header = "" for _ in range(MAX_NLINES): @@ -72,16 +113,50 @@ def extract_header(filepath: Path): if HEADER_END in line: break except HeaderEndNotFound as e: - print(f"{MAX_NLINES} checked and no header found") + print(f"{MAX_NLINES} checked and no header found.") raise (e) return header +def parse_from_swainlab_grammar(filepath: t.Union[str, Path]): + """Parse using a grammar for the Swain lab.""" + return parse_from_grammar(filepath, sl_grammar) + + +def parse_from_grammar(filepath: str, grammar: t.Dict): + """Parse a file using the specified grammar.""" + header = extract_header(filepath) + d = {} + for key, values in grammar.items(): + try: + if "data_type" in values: + # data_type for parse_x defined in values + d[key] = parse_x(header, **values) + else: + # use sub keys to parse groups + for subkey, subvalues in values.items(): + subkey = "_".join((key, subkey)) + d[subkey] = parse_x(header, **subvalues) + except Exception as e: + logging.getLogger("aliby").critical( + f"Parsing failed for key {key} and values {values}." + ) + raise (e) + return d + + +def parse_x(string, data_type, **kwargs): + """Parse a string for data of a specified type.""" + res_dict = eval(f"parse_{data_type}(string, **kwargs)") + return res_dict + + def parse_table( string: str, start_trigger: t.Union[str, Keyword], ) -> pd.DataFrame: - """Parse csv-like table + """ + Parse csv-like table. Parameters ---------- @@ -98,12 +173,9 @@ def parse_table( Examples -------- >>> table = parse_table() - """ - if isinstance(start_trigger, str): start_trigger: Keyword = Keyword(start_trigger) - EOL = LineEnd().suppress() field = OneOrMore(CharsNotIn(":,\n")) line = LineStart() + Group( @@ -116,11 +188,9 @@ def parse_table( + EOL # end_trigger.suppress() ) parser_result = parser.search_string(string) - assert all( [len(row) == len(parser_result[0]) for row in parser_result] ), f"Table {start_trigger} has unequal number of columns" - assert len(parser_result), f"Parsing is empty. {parser}" return table_to_df(parser_result.as_list()) @@ -139,16 +209,12 @@ def parse_fields( start: 0 interval: 300 frames: 180 - - """ EOL = LineEnd().suppress() - if end_trigger is None: end_trigger = EOL elif isinstance(end_trigger, str): end_trigger = Literal(end_trigger) - field = OneOrMore(CharsNotIn(":\n")) line = ( LineStart() @@ -164,79 +230,6 @@ def parse_fields( return fields_to_dict_or_table(results) -# Grammar specification -grammar = { - "general": { - "start_trigger": Literal("Swain Lab microscope experiment log file"), - "type": "fields", - "end_trigger": "-----Acquisition settings-----", - }, - "image_config": { - "start_trigger": "Image Configs:", - "type": "table", - }, - "device_properties": { - "start_trigger": "Device properties:", - "type": "table", - }, - "group": { - "position": { - "start_trigger": Group( - Group(Literal("group:") + Word(printables)) - + Group(Literal("field:") + "position") - ), - "type": "table", - }, - **{ - key: { - "start_trigger": Group( - Group(Literal("group:") + Word(printables)) - + Group(Literal("field:") + key) - ), - "type": "fields", - } - for key in ("time", "config") - }, - }, -} - - -ACQ_START = "-----Acquisition settings-----" -HEADER_END = "-----Experiment started-----" -MAX_NLINES = 2000 # In case of malformed logfile -# test_file = "/home/alan/Downloads/pH_med_to_low.log" -# test_file = "/home/alan/Documents/dev/skeletons/scripts/dev/C1_60x.log" - -ParserElement.setDefaultWhitespaceChars(" \t") - - -# time_fields = parse_field(acq, start_trigger=grammar["group"]["time"]["start_trigger"]) -# config_fields = parse_fields( -# acq, start_trigger=grammar["group"]["config"]["start_trigger"] -# ) - -# general_fields = parse_fields(basic, start_trigger=grammar["general"]["start_trigger"]) - - -def parse_from_grammar(filepath: str, grammar: t.Dict): - header = extract_header(filepath) - d = {} - for key, values in grammar.items(): - try: - if "type" in values: - d[key] = parse_x(header, **values) - else: # Use subkeys to parse groups - for subkey, subvalues in values.items(): - subkey = "_".join((key, subkey)) - d[subkey] = parse_x(header, **subvalues) - except Exception as e: - logging.getLogger("aliby").critical( - f"Parsing failed for key {key} and values {values}" - ) - raise (e) - return d - - def table_to_df(result: t.List[t.List]): if len(result) > 1: # Multiple tables with ids to append # Generate multiindex from "Name column" @@ -292,12 +285,3 @@ def _cast_type(x: str) -> t.Union[str, int, float, bool]: except: pass return x - - -def parse_x(string: str, type: str, **kwargs): - # return eval(f"parse_{type}({string}, **{kwargs})") - return eval(f"parse_{type}(string, **kwargs)") - - -def parse_from_swainlab_grammar(filepath: t.Union[str, Path]): - return parse_from_grammar(filepath, grammar) -- GitLab