diff --git a/README.md b/README.md
index bff3d462af8d8071b3594e7a9e5bf170b46cbebb..98c1d5284d17d7d99d74f2d9798e3ff23e89e0bb 100644
--- a/README.md
+++ b/README.md
@@ -20,6 +20,8 @@ See our [installation instructions]( https://aliby.readthedocs.io/en/latest/INST
 
 ### CLI
 
+If installed via poetry, you have access to a Command Line Interface (CLI)
+
  ```bash
 aliby-run --expt_id EXPT_PATH --distributed 4 --tps None
  ```
@@ -31,6 +33,8 @@ And to run Omero servers, the basic arguments are shown:
 
 The output is a folder with the original logfiles and a set of hdf5 files, one with the results of each multidimensional inside.
 
+For more information, including available options, see the page on [running the analysis pipeline](https://aliby.readthedocs.io/en/latest/PIPELINE.html)
+
 ## Using specific components
 
 ### Access raw data
diff --git a/docs/source/INSTALL.md b/docs/source/INSTALL.md
index 94abc570f87bc636041e8d9203c7247a782ccd0b..b19b576e29f5d163a009e07d0fa5080608a2ebdc 100644
--- a/docs/source/INSTALL.md
+++ b/docs/source/INSTALL.md
@@ -62,7 +62,7 @@ For Windows, the simplest way to install it is using conda (or mamba). You can i
     $ \PATH\TO\POETRY\LOCATION\poetry install
 
   - MacOS
-  Under work (See issue https://github.com/ome/omero-py/issues/317)
+  For local access and processing, follow the same instructions as Linux. Remote access to OMERO servers depends on some issues in one of our depedencies being solved (See issue https://github.com/ome/omero-py/issues/317)
 
 ### Git version
 
@@ -71,9 +71,23 @@ Install [ poetry ](https://python-poetry.org/docs/#installation) for dependency
 In case you want to have local version:
 
     $ git clone git@gitlab.com/aliby/aliby.git
-    $ cd aliby && poetry install --all-extras
+    $ cd aliby
+    
+ and then either
 
-This will automatically install the [ BABY ](https://gitlab.com/aliby/baby) segmentation software. Support for additional segmentation and tracking algorithms is under development.
+    $$ poetry install --all-extras
+
+for everything, including tools to access OMERO servers, or
+
+    $$ poetry install
+
+for a version with only local access, or
+
+    $$ poetry install --with dev
+
+to install with compatible versions of the development tools we use, such as black.
+
+These commands will automatically install the [ BABY ](https://gitlab.com/aliby/baby) segmentation software. Support for additional segmentation and tracking algorithms is under development.
 
 ## Omero Server
 
diff --git a/docs/source/PIPELINE.md b/docs/source/PIPELINE.md
new file mode 100644
index 0000000000000000000000000000000000000000..127ff0c2efc7da40364600db94840c7298a84ce4
--- /dev/null
+++ b/docs/source/PIPELINE.md
@@ -0,0 +1,87 @@
+# Running the analysis pipeline
+
+You can run the analysis pipeline either via the command line interface (CLI) or using a script that incorporates the `aliby.pipeline.Pipeline` object.
+
+## CLI
+
+On a CLI, you can use the `aliby-run` command.  This command takes options as follows:
+- `--host`: Address of image-hosting server.
+- `--username`: Username to access image-hosting server.
+- `--password`: Password to access image-hosting server.
+- `--expt_id`: Number ID of experiment stored on host server.
+- `--distributed`: Number of distributed cores to use for segmentation and signal processing.  If 0, there is no parallelisation.
+- `--tps`: Optional.  Number of time points from the beginning of the experiment to use.  If not specified, the pipeline processes all time points.
+- `--directory`: Optional.  Parent directory to save the data files (HDF5) generated, `./data` by default; the files will be stored in a child directory whose name is the name of the experiment.
+- `--filter`: Optional.  List of positions to use for analysis.  Alternatively, a regex (regular expression) or list of regexes to search for positions.  **Note: for the CLI, currently it is not able to take a list of strings as input.**
+- `--overwrite`: Optional.  Whether to overwrite an existing data directory.  True by default.
+- `--override_meta`: Optional.  Whether to overwrite an existing data directory.  True by default.
+
+Example usage:
+ ```bash
+aliby-run --expt_id EXPT_PATH --distributed 4 --tps None
+ ```
+
+And to run Omero servers, the basic arguments are shown:
+ ```bash
+ aliby-run --expt_id XXX --host SERVER.ADDRESS --user USER --password PASSWORD 
+ ```
+
+
+## Script
+
+Use the `aliby.pipeline.Pipeline` object and supply a dictionary, following the example below.  The meaning of the parameters are the same as described in the CLI section above.
+
+```python
+#!/usr/bin/env python3
+
+from aliby.pipeline import Pipeline, PipelineParameters
+
+# Specify experiment IDs
+ids = [101, 102]
+
+for i in ids:
+    print(i)
+    try:
+        params = PipelineParameters.default(
+            # Create dictionary to define pipeline parameters.
+            general={
+                "expt_id": i,
+                "distributed": 6,
+                "host": "INSERT ADDRESS HERE",
+                "username": "INSERT USERNAME HERE",
+                "password": "INSERT PASSWORD HERE",
+                # Ensure data will be overwriten
+                "override_meta": True,
+                "overwrite": True,
+            }
+        )
+
+        # Fine-grained control beyond general parameters:
+        # change specific leaf in the extraction tree.
+        # This example tells the pipeline to additionally compute the
+        # nuc_est_conv quantity, which is a measure of the degree of
+        # localisation of a signal in a cell.
+        params = params.to_dict()
+        leaf_to_change = params["extraction"]["tree"]["GFP"]["np_max"]
+        leaf_to_change.add("nuc_est_conv")
+
+        # Regenerate PipelineParameters
+        p = Pipeline(PipelineParameters.from_dict(params))
+
+        # Run pipeline
+        p.run()
+        
+    # Error handling
+    except Exception as e:
+        print(e)
+```
+
+This example code can be the contents of a `run.py` file, and you can run it via
+
+```bash
+python run.py
+```
+
+in the appropriate virtual environment.
+
+Alternatively, the example code can be the contents of a cell in a jupyter notebook.
diff --git a/src/agora/io/signal.py b/src/agora/io/signal.py
index 322bb5ee1929650653e93d7bbcab3ae2e72aebd2..6f7ea3e4ad1f1fa8c422a6c33acd1d97000fa944 100644
--- a/src/agora/io/signal.py
+++ b/src/agora/io/signal.py
@@ -203,9 +203,8 @@ class Signal(BridgeH5):
                     merged = pd.DataFrame([], index=index)
         return merged
 
-    # Alan: do we need two similar properties - see below?
-    @property
-    def datasets(self):
+    @cached_property
+    def p_available(self):
         """Print data sets available in h5 file."""
         if not hasattr(self, "_available"):
             self._available = []
@@ -214,11 +213,6 @@ class Signal(BridgeH5):
         for sig in self._available:
             print(sig)
 
-    @cached_property
-    def p_available(self):
-        """Print data sets available in h5 file."""
-        self.datasets
-
     @cached_property
     def available(self):
         """Get data sets available in h5 file."""
diff --git a/src/agora/io/writer.py b/src/agora/io/writer.py
index b57c252e71372059bac41fd72b002a7614dbdb50..a13828c795fe406423531ba9b11a3c2cac224881 100644
--- a/src/agora/io/writer.py
+++ b/src/agora/io/writer.py
@@ -230,7 +230,6 @@ class LinearBabyWriter(DynamicWriter):
     Assumes the edgemasks are of form ((None, tile_size, tile_size), bool).
     """
 
-    # TODO make this YAML: Alan: why?
     compression = "gzip"
     _default_tile_size = 117
     datatypes = {
@@ -319,11 +318,7 @@ class StateWriter(DynamicWriter):
     @staticmethod
     def format_values_tpback(states: list, val_name: str):
         """Unpacks a dict of state data into tp_back, trap, value."""
-        # initialise as empty lists
-        # Alan: is this initialisation necessary?
-        tp_back, trap, value = [
-            [[] for _ in states[0][val_name]] for _ in range(3)
-        ]
+
         # store results as a list of tuples
         lbl_tuples = [
             (tp_back, trap, cell_label)
@@ -334,6 +329,11 @@ class StateWriter(DynamicWriter):
         # unpack list of tuples to define variables
         if len(lbl_tuples):
             tp_back, trap, value = zip(*lbl_tuples)
+        else:
+            # set as empty lists
+            tp_back, trap, value = [
+                [[] for _ in states[0][val_name]] for _ in range(3)
+            ]
         return tp_back, trap, value
 
     @staticmethod
@@ -409,9 +409,9 @@ class StateWriter(DynamicWriter):
 
 #################### Extraction version ###############################
 class Writer(BridgeH5):
-    """Class to transform data into compatible structures."""
-
-    # Alan: when is this used?
+    """
+    Class to transform data into compatible structures.
+    Used by Extractor and Postprocessor within the pipeline."""
 
     def __init__(self, filename, flag=None, compression="gzip"):
         """
@@ -473,7 +473,7 @@ class Writer(BridgeH5):
             self.write_pd(f, path, data, compression=self.compression)
         # data is a multi-index dataframe
         elif isinstance(data, pd.MultiIndex):
-            # Alan: should we still not compress here?
+            # TODO: benchmark I/O speed when using compression
             self.write_index(f, path, data)  # , compression=self.compression)
         # data is a dictionary of dataframes
         elif isinstance(data, Dict) and np.all(
diff --git a/src/aliby/pipeline.py b/src/aliby/pipeline.py
index 82d6420e4fb1f87f20efb3e376051901177496da..9e475646a9fda1922e4654d6e6a63d9d9f510d6b 100644
--- a/src/aliby/pipeline.py
+++ b/src/aliby/pipeline.py
@@ -76,14 +76,12 @@ class PipelineParameters(ParametersABC):
         postprocessing: dict (optional)
             Parameters for post-processing.
         """
-        # Alan: should 19993 be updated?
         expt_id = general.get("expt_id", 19993)
         if isinstance(expt_id, PosixPath):
             expt_id = str(expt_id)
             general["expt_id"] = expt_id
 
-        # Alan: an error message rather than a default might be better
-        directory = Path(general.get("directory", "../data"))
+        directory = Path(general["directory"])
 
         # get log files, either locally or via OMERO
         with dispatch_dataset(
@@ -174,8 +172,8 @@ class Pipeline(ProcessABC):
         "extraction",
         "postprocessing",
     ]
-    # Indicate step-writer groupings to perform special operations during step iteration
-    # Alan: replace with - specify the group in the h5 files written by each step (?)
+
+    # Specify the group in the h5 files written by each step
     writer_groups = {
         "tiler": ["trap_info"],
         "baby": ["cell_info"],
@@ -478,7 +476,7 @@ class Pipeline(ProcessABC):
                                             f"Found {steps['tiler'].n_tiles} traps in {image.name}"
                                         )
                                     elif step == "baby":
-                                        # write state and pass info to ext (Alan: what's ext?)
+                                        # write state and pass info to Extractor
                                         loaded_writers["state"].write(
                                             data=steps[
                                                 step
@@ -573,7 +571,8 @@ class Pipeline(ProcessABC):
         )
         return (traps_above_nthresh & traps_above_athresh).mean()
 
-    # Alan: can both this method and the next be deleted?
+    # FIXME: Remove this functionality. It used to be for
+    # older hdf5 file formats.
     def _load_config_from_file(
         self,
         filename: PosixPath,
@@ -588,6 +587,8 @@ class Pipeline(ProcessABC):
                     process_from[k] += 1
         return process_from, trackers_state, overwrite
 
+    # FIXME: Remove this functionality. It used to be for
+    # older hdf5 file formats.
     @staticmethod
     def legacy_get_last_tp(step: str) -> t.Callable:
         """Get last time-point in different ways depending
@@ -647,7 +648,7 @@ class Pipeline(ProcessABC):
             States of any trackers from earlier runs.
         """
         config = self.parameters.to_dict()
-        # Alan: session is never changed
+        # TODO Alan: Verify if session must be passed
         session = None
         earlystop = config["general"].get("earlystop", None)
         process_from = {k: 0 for k in self.pipeline_steps}
@@ -700,8 +701,8 @@ class Pipeline(ProcessABC):
                         )
                         config["tiler"] = steps["tiler"].parameters.to_dict()
                     except Exception:
-                        # Alan: a warning or log here?
-                        pass
+                        self._log(f"Overwriting tiling data")
+
             if config["general"]["use_explog"]:
                 meta.run()
             # add metadata not in the log file
diff --git a/src/aliby/tile/tiler.py b/src/aliby/tile/tiler.py
index 43f4b397af7b7fea394fca0130ecbd9313347695..f812b756f1e801ec871368d0209a801fab16ff35 100644
--- a/src/aliby/tile/tiler.py
+++ b/src/aliby/tile/tiler.py
@@ -640,8 +640,8 @@ class Tiler(StepABC):
         return tile
 
 
-# Alan: do we need these as well as get_channel_index and get_channel_name?
-# TODO homogenise these into a pair of functions
+# FIXME: Refactor to support both channel or index
+# self._log below is not defined
 def find_channel_index(image_channels: t.List[str], channel: str):
     """
     Access
diff --git a/src/extraction/core/extractor.py b/src/extraction/core/extractor.py
index 3f7fdbef8c680e1902f28b84022eb6cf4f02175e..e254532faadf893a4d41254da6c925da9518bde2 100644
--- a/src/extraction/core/extractor.py
+++ b/src/extraction/core/extractor.py
@@ -100,7 +100,7 @@ class Extractor(StepABC):
     Extraction follows a three-level tree structure. Channels, such as GFP, are the root level; the reduction algorithm, such as maximum projection, is the second level; the specific metric, or operation, to apply to the masks, such as mean, is the third level.
     """
 
-    # Alan: should this data be stored here or all such data in a separate file
+    # TODO Alan: Move this to a location with the SwainLab defaults
     default_meta = {
         "pixel_size": 0.236,
         "z_size": 0.6,
diff --git a/src/postprocessor/grouper.py b/src/postprocessor/grouper.py
index 990a97830d5d90db032e5e6e606ea4aceeb75422..4c8e5026f6c0351132b405a9fcd964cbcf4a729d 100644
--- a/src/postprocessor/grouper.py
+++ b/src/postprocessor/grouper.py
@@ -353,7 +353,6 @@ class phGrouper(NameGrouper):
         return aggregated
 
 
-# Alan: why are these separate functions?
 def concat_standard(
     path: str,
     chainer: Chainer,
@@ -474,9 +473,7 @@ class MultiGrouper:
             )
         return self._sigtable
 
-    # Alan: function seems out of place
-    # seaborn is not in pyproject.toml
-    def sigtable_plot(self) -> None:
+    def _sigtable_plot(self) -> None:
         """
         Plot number of chains for all available experiments.