From 02f7587b4b62e8144134c3a6aea15c3fad9c3e9a Mon Sep 17 00:00:00 2001
From: pswain <peter.swain@ed.ac.uk>
Date: Fri, 10 Nov 2023 18:37:18 +0000
Subject: [PATCH] reverted baby_client; feat(swainlab_parser): tolerate
 non-ascii characters

---
 src/agora/io/metadata.py              |   6 +-
 src/aliby/baby_client.py              |   2 +-
 src/aliby/baby_sitter.py              |   2 +-
 src/aliby/pipeline.py                 |  23 ++--
 src/aliby/utils/imageViewer.py        |   2 +-
 src/logfile_parser/swainlab_parser.py | 176 ++++++++++++--------------
 6 files changed, 98 insertions(+), 113 deletions(-)

diff --git a/src/agora/io/metadata.py b/src/agora/io/metadata.py
index d59ac406..a84765a6 100644
--- a/src/agora/io/metadata.py
+++ b/src/agora/io/metadata.py
@@ -148,9 +148,9 @@ def parse_logfiles(
     return parsed_flattened
 
 
-def get_meta_swainlab(parsed_metadata: dict):
+def get_channels_swainlab(parsed_metadata: dict):
     """
-    Convert raw parsing of Swainlab logfile to the metadata interface.
+    Extract channels from parsed metadata.
 
     Parameters
     --------
@@ -180,7 +180,7 @@ def parse_swainlab_metadata(filedir: t.Union[str, Path]):
     if filepath:
         # new log files ending in .log
         raw_parse = parse_from_swainlab_grammar(filepath)
-        minimal_meta = get_meta_swainlab(raw_parse)
+        minimal_meta = get_channels_swainlab(raw_parse)
     else:
         # old log files ending in .txt
         if filedir.is_file() or str(filedir).endswith(".zarr"):
diff --git a/src/aliby/baby_client.py b/src/aliby/baby_client.py
index a94fe6ff..cfb7bcf5 100644
--- a/src/aliby/baby_client.py
+++ b/src/aliby/baby_client.py
@@ -177,7 +177,7 @@ class BabyRunner(StepABC):
 
 def choose_model_from_params(
     modelset_filter=None,
-    camera="sCMOS",
+    camera="prime95b",
     channel="brightfield",
     zoom="60x",
     n_stacks="5z",
diff --git a/src/aliby/baby_sitter.py b/src/aliby/baby_sitter.py
index 2bd930ba..2b56c6fa 100644
--- a/src/aliby/baby_sitter.py
+++ b/src/aliby/baby_sitter.py
@@ -33,7 +33,7 @@ class BabyParameters(ParametersABC):
         """Define default parameters; kwargs choose BABY model set."""
         return cls(
             modelset_name=get_modelset_name_from_params(**kwargs),
-            clogging_thresh=0.75,
+            clogging_thresh=1,
             min_bud_tps=3,
             isbud_thresh=0.5,
             session=None,
diff --git a/src/aliby/pipeline.py b/src/aliby/pipeline.py
index 46b786c7..5eb5e548 100644
--- a/src/aliby/pipeline.py
+++ b/src/aliby/pipeline.py
@@ -15,6 +15,7 @@ from pathos.multiprocessing import Pool
 from tqdm import tqdm
 
 import baby
+import baby.errors
 
 try:
     if baby.__version__ == "v0.30.1":
@@ -170,11 +171,6 @@ class PipelineParameters(ParametersABC):
         ).to_dict()
         return cls(**{k: v for k, v in defaults.items()})
 
-    def load_logs(self):
-        """Load and parse log files."""
-        parsed_flattened = parse_logfiles(self.log_dir)
-        return parsed_flattened
-
 
 class Pipeline(ProcessABC):
     """
@@ -456,14 +452,14 @@ class Pipeline(ProcessABC):
                         tiler=pipe["steps"]["tiler"],
                     )
                     # initiate progress bar
-                    pbar = tqdm(
+                    progress_bar = tqdm(
                         range(min_process_from, pipe["tps"]),
                         desc=image.name,
                         initial=min_process_from,
                         total=pipe["tps"],
                     )
                     # run through time points
-                    for i in pbar:
+                    for i in progress_bar:
                         if (
                             frac_clogged_traps
                             < pipe["earlystop"]["thresh_pos_clogged"]
@@ -473,9 +469,14 @@ class Pipeline(ProcessABC):
                             for step in self.pipeline_steps:
                                 if i >= pipe["process_from"][step]:
                                     # perform step
-                                    result = pipe["steps"][step].run_tp(
-                                        i, **run_kwargs.get(step, {})
-                                    )
+                                    try:
+                                        result = pipe["steps"][step].run_tp(
+                                            i, **run_kwargs.get(step, {})
+                                        )
+                                    except baby.errors.Clogging:
+                                        logging.getLogger("aliby").warn(
+                                            "WARNING:Clogging threshold exceeded in BABY."
+                                        )
                                     # write result to h5 file using writers
                                     # extractor writes to h5 itself
                                     if step in loaded_writers:
@@ -521,7 +522,7 @@ class Pipeline(ProcessABC):
                                     f"{name}:Clogged_traps:{frac_clogged_traps}"
                                 )
                                 frac = np.round(frac_clogged_traps * 100)
-                                pbar.set_postfix_str(f"{frac} Clogged")
+                                progress_bar.set_postfix_str(f"{frac} Clogged")
                         else:
                             # stop if too many traps are clogged
                             self._log(
diff --git a/src/aliby/utils/imageViewer.py b/src/aliby/utils/imageViewer.py
index bcb54242..fd529108 100644
--- a/src/aliby/utils/imageViewer.py
+++ b/src/aliby/utils/imageViewer.py
@@ -4,7 +4,7 @@ ImageViewer class, used to look at individual or multiple traps over time.
 
 Example of usage:
 
-fpath = "/home/alan/Documents/dev/skeletons/scripts/data/16543_2019_07_16_aggregates_CTP_switch_2_0glu_0_0glu_URA7young_URA8young_URA8old_01/URA8_young018.h5"
+fpath = "/home/alan/data/16543/URA8_young018.h5"
 
 tile_id = 9
 trange = list(range(0, 10))
diff --git a/src/logfile_parser/swainlab_parser.py b/src/logfile_parser/swainlab_parser.py
index 49d04690..f9d2a3b8 100644
--- a/src/logfile_parser/swainlab_parser.py
+++ b/src/logfile_parser/swainlab_parser.py
@@ -53,17 +53,58 @@ from pyparsing import (
 
 atomic = t.Union[str, int, float, bool]
 
+# specify grammar for the Swain lab
+sl_grammar = {
+    "general": {
+        "start_trigger": Literal("Swain Lab microscope experiment log file"),
+        "data_type": "fields",
+        "end_trigger": "-----Acquisition settings-----",
+    },
+    "image_config": {
+        "start_trigger": "Image Configs:",
+        "data_type": "table",
+    },
+    "device_properties": {
+        "start_trigger": "Device properties:",
+        "data_type": "table",
+    },
+    "group": {
+        "position": {
+            "start_trigger": Group(
+                Group(Literal("group:") + Word(printables))
+                + Group(Literal("field:") + "position")
+            ),
+            "data_type": "table",
+        },
+        **{
+            key: {
+                "start_trigger": Group(
+                    Group(Literal("group:") + Word(printables))
+                    + Group(Literal("field:") + key)
+                ),
+                "data_type": "fields",
+            }
+            for key in ("time", "config")
+        },
+    },
+}
+
+ACQ_START = "-----Acquisition settings-----"
+HEADER_END = "-----Experiment started-----"
+MAX_NLINES = 2000  # In case of malformed logfile
+
+ParserElement.setDefaultWhitespaceChars(" \t")
+
 
 class HeaderEndNotFound(Exception):
     def __init__(self, message, errors):
         super().__init__(message)
-
         self.errors = errors
 
 
 def extract_header(filepath: Path):
-    # header_contents = ""
-    with open(filepath, "r") as f:
+    """Extract content of log file before the experiment starts."""
+    with open(filepath, "r", errors="ignore") as f:
         try:
             header = ""
             for _ in range(MAX_NLINES):
@@ -72,16 +113,50 @@ def extract_header(filepath: Path):
                 if HEADER_END in line:
                     break
         except HeaderEndNotFound as e:
-            print(f"{MAX_NLINES} checked and no header found")
+            print(f"{MAX_NLINES} checked and no header found.")
             raise (e)
         return header
 
 
+def parse_from_swainlab_grammar(filepath: t.Union[str, Path]):
+    """Parse using a grammar for the Swain lab."""
+    return parse_from_grammar(filepath, sl_grammar)
+
+
+def parse_from_grammar(filepath: str, grammar: t.Dict):
+    """Parse a file using the specified grammar."""
+    header = extract_header(filepath)
+    d = {}
+    for key, values in grammar.items():
+        try:
+            if "data_type" in values:
+                # data_type for parse_x defined in values
+                d[key] = parse_x(header, **values)
+            else:
+                # use sub keys to parse groups
+                for subkey, subvalues in values.items():
+                    subkey = "_".join((key, subkey))
+                    d[subkey] = parse_x(header, **subvalues)
+        except Exception as e:
+            logging.getLogger("aliby").critical(
+                f"Parsing failed for key {key} and values {values}."
+            )
+            raise (e)
+    return d
+
+
+def parse_x(string, data_type, **kwargs):
+    """Parse a string for data of a specified type."""
+    res_dict = eval(f"parse_{data_type}(string, **kwargs)")
+    return res_dict
+
+
 def parse_table(
     string: str,
     start_trigger: t.Union[str, Keyword],
 ) -> pd.DataFrame:
-    """Parse csv-like table
+    """
+    Parse csv-like table.
 
     Parameters
     ----------
@@ -98,12 +173,9 @@ def parse_table(
     Examples
     --------
     >>> table = parse_table()
-
     """
-
     if isinstance(start_trigger, str):
         start_trigger: Keyword = Keyword(start_trigger)
-
     EOL = LineEnd().suppress()
     field = OneOrMore(CharsNotIn(":,\n"))
     line = LineStart() + Group(
@@ -116,11 +188,9 @@ def parse_table(
         + EOL  # end_trigger.suppress()
     )
     parser_result = parser.search_string(string)
-
     assert all(
         [len(row) == len(parser_result[0]) for row in parser_result]
     ), f"Table {start_trigger} has unequal number of columns"
-
     assert len(parser_result), f"Parsing is empty. {parser}"
     return table_to_df(parser_result.as_list())
 
@@ -139,16 +209,12 @@ def parse_fields(
     start: 0
     interval: 300
     frames: 180
-
-
     """
     EOL = LineEnd().suppress()
-
     if end_trigger is None:
         end_trigger = EOL
     elif isinstance(end_trigger, str):
         end_trigger = Literal(end_trigger)
-
     field = OneOrMore(CharsNotIn(":\n"))
     line = (
         LineStart()
@@ -164,79 +230,6 @@ def parse_fields(
     return fields_to_dict_or_table(results)
 
 
-# Grammar specification
-grammar = {
-    "general": {
-        "start_trigger": Literal("Swain Lab microscope experiment log file"),
-        "type": "fields",
-        "end_trigger": "-----Acquisition settings-----",
-    },
-    "image_config": {
-        "start_trigger": "Image Configs:",
-        "type": "table",
-    },
-    "device_properties": {
-        "start_trigger": "Device properties:",
-        "type": "table",
-    },
-    "group": {
-        "position": {
-            "start_trigger": Group(
-                Group(Literal("group:") + Word(printables))
-                + Group(Literal("field:") + "position")
-            ),
-            "type": "table",
-        },
-        **{
-            key: {
-                "start_trigger": Group(
-                    Group(Literal("group:") + Word(printables))
-                    + Group(Literal("field:") + key)
-                ),
-                "type": "fields",
-            }
-            for key in ("time", "config")
-        },
-    },
-}
-
-
-ACQ_START = "-----Acquisition settings-----"
-HEADER_END = "-----Experiment started-----"
-MAX_NLINES = 2000  # In case of malformed logfile
-# test_file = "/home/alan/Downloads/pH_med_to_low.log"
-# test_file = "/home/alan/Documents/dev/skeletons/scripts/dev/C1_60x.log"
-
-ParserElement.setDefaultWhitespaceChars(" \t")
-
-
-# time_fields = parse_field(acq, start_trigger=grammar["group"]["time"]["start_trigger"])
-# config_fields = parse_fields(
-#     acq, start_trigger=grammar["group"]["config"]["start_trigger"]
-# )
-
-# general_fields = parse_fields(basic, start_trigger=grammar["general"]["start_trigger"])
-
-
-def parse_from_grammar(filepath: str, grammar: t.Dict):
-    header = extract_header(filepath)
-    d = {}
-    for key, values in grammar.items():
-        try:
-            if "type" in values:
-                d[key] = parse_x(header, **values)
-            else:  # Use subkeys to parse groups
-                for subkey, subvalues in values.items():
-                    subkey = "_".join((key, subkey))
-                    d[subkey] = parse_x(header, **subvalues)
-        except Exception as e:
-            logging.getLogger("aliby").critical(
-                f"Parsing failed for key {key} and values {values}"
-            )
-            raise (e)
-    return d
-
-
 def table_to_df(result: t.List[t.List]):
     if len(result) > 1:  # Multiple tables with ids to append
         # Generate multiindex from "Name column"
@@ -292,12 +285,3 @@ def _cast_type(x: str) -> t.Union[str, int, float, bool]:
             except:
                 pass
     return x
-
-
-def parse_x(string: str, type: str, **kwargs):
-    # return eval(f"parse_{type}({string}, **{kwargs})")
-    return eval(f"parse_{type}(string, **kwargs)")
-
-
-def parse_from_swainlab_grammar(filepath: t.Union[str, Path]):
-    return parse_from_grammar(filepath, grammar)
-- 
GitLab