Compare revisions

0c34246c · 0c6d7660 · 897da295 · 0d1f341c · cffc19d0 · dd2b530f
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -31,21 +31,32 @@ before_script:
  - export ARGS="--with test,dev";
  - if [[ "$CI_STAGE_NAME" == "tests" ]]; then echo "Installing system dependencies for ${CI_STAGE_NAME}"; apt update && apt install -y ffmpeg libsm6 libxext6; fi
  - if [[ "$CI_JOB_NAME" == "Static Type" ]]; then echo "Activating development group"; export ARGS="${ARGS},dev"; fi
-  - if [[ "$CI_JOB_NAME" == "Network Tools Test" ]]; then echo "Setting flag to compile zeroc-ice"; export ARGS="${ARGS} --all-extras"; fi
+  - if [[ "$CI_JOB_NAME" == "Network Tools Tests" ]]; then echo "Setting flag to compile zeroc-ice"; export ARGS="${ARGS} --all-extras"; fi
  - poetry install -vv $ARGS

 Local Tests:
  stage: tests
  script:
-    - poetry install -vv
-    - poetry run pytest ./tests --ignore ./tests/aliby/network
+    # - poetry install -vv
+    - poetry run coverage run -m --branch pytest ./tests --ignore ./tests/aliby/network --ignore ./tests/aliby/pipeline
+    - poetry run coverage report -m
+    - poetry run coverage xml
+  coverage: '/(?i)total.*? (100(?:\.0+)?\%|[1-9]?\d(?:\.\d+)?\%)$/'
+  artifacts:
+    reports:
+      coverage_report:
+        coverage_format: cobertura
+        path: coverage.xml

-# TODO Split testing in four: Network vs non-network and Unit vs integration
-
-Network Tools Test:
+Network Tools Tests:
  stage: tests
  script:
    - poetry run pytest ./tests/aliby/network
+    - DIRNAME="test_datasets"
+    - curl https://zenodo.org/record/7513194/files/test_datasets.tar.gz\?download\=1 -o "test_datasets.tar.gz"
+    - mkdir -p $DIRNAME
+    - tar xvf test_datasets.tar.gz -C $DIRNAME
+    - poetry run pytest -s tests/aliby/pipeline --file $DIRNAME/560_2022_11_30_pypipeline_unit_test_reconstituted_00

 Static Type:
  stage: checks
@@ -56,7 +67,7 @@ Static Type:
    - poetry run mypy . --exclude 'setup\.py$'
    # We can remove the flag once this is resolved https://github.com/pypa/setuptools/issues/2345

-# TODO add more tests before activate automatic release
+# TODO add more tests before activating auto-release
 # Bump_release:
 #   stage: release
 #   script:

--- a/.gitlab/issue_templates/bug.md
+++ b/.gitlab/issue_templates/bug.md
 ## Summary

-(Summarize the bug encountered concisely)
+{Summarize the bug encountered concisely}
+
+I confirm that I have (if relevant):
+- [ ] Read the troubleshooting guide: https://gitlab.com/aliby/aliby/-/wikis/Troubleshooting-(basic)
+- [ ] Updated aliby and aliby-baby.
+- [ ] Tried the unit test.
+- [ ] Tried a scaled-down version of my experiment (distributed=0, filter=0, tps=10)
+- [ ] Tried re-postprocessing.

 ## Steps to reproduce

-(How one can reproduce the issue - this is very important)
+{How one can reproduce the issue - this is very important}
+
+- aliby version: 0.1.{...}, or if development/unreleased version, commit SHA: {...}
+- platform(s):
+    - [ ] Jura
+    - [ ] Other Linux, please specify distribution and version: {...}
+    - [ ] MacOS, please specify version: {...}
+    - [ ] Windows, please specify version: {...}
+- experiment ID: {...}
+   - Any special things you need to know about this experiment: {...}

 ## What is the current bug behavior?

@@ -19,6 +35,12 @@
 (Paste any relevant logs - please use code blocks (```) to format console output, logs, and code, as
 it's very hard to read otherwise.)

+```
+{PASTE YOUR ERROR MESSAGE HERE!!}
+
+
+```
+
 ## Possible fixes

 (If you can, link to the line of code that might be responsible for the problem)
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
 # Contributing

-We focus our work on python 3.7 due to the current neural network being developed on tensorflow 1. In the near future we will migrate the networ to pytorch to support more recent versions of all packages.
+We focus our work on python 3.8 due to the current neural network being developed on tensorflow 1. In the near future we will migrate the network to pytorch to support more recent versions of all packages.

 ## Issues
-All issues are managed within the gitlab [ repository ](https://git.ecdf.ed.ac.uk/swain-lab/aliby/aliby/-/issues), if you don't have an account on the University of Edinburgh's gitlab instance and would like to submit issues please get in touch with [Prof. Peter Swain](mailto:peter.swain@ed.ac.uk ).
+All issues are managed within the gitlab [ repository ](https://gitlab.com/aliby/aliby/-/issues), if you don't have an account on the University of Edinburgh's gitlab instance and would like to submit issues please get in touch with [Prof. Peter Swain](mailto:peter.swain@ed.ac.uk ).

 ## Data aggregation


--- a/README.md
+++ b/README.md
@@ -2,52 +2,47 @@

 [![docs](https://readthedocs.org/projects/aliby/badge/?version=master)](https://aliby.readthedocs.io/en/latest)
 [![PyPI version](https://badge.fury.io/py/aliby.svg)](https://badge.fury.io/py/aliby)
-[![pipeline](https://git.ecdf.ed.ac.uk/swain-lab/aliby/aliby/badges/master/pipeline.svg?key_text=master)](https://git.ecdf.ed.ac.uk/swain-lab/aliby/aliby/-/pipelines)
-[![dev pipeline](https://git.ecdf.ed.ac.uk/swain-lab/aliby/aliby/badges/dev/pipeline.svg?key_text=dev)](https://git.ecdf.ed.ac.uk/swain-lab/aliby/aliby/-/commits/dev)
+[![pipeline](https://gitlab.com/aliby/aliby/badges/master/pipeline.svg?key_text=master)](https://gitlab.com/aliby/aliby/-/pipelines)
+[![dev pipeline](https://gitlab.com/aliby/aliby/badges/dev/pipeline.svg?key_text=dev)](https://gitlab.com/aliby/aliby/-/commits/dev)
+[![coverage](https://gitlab.com/aliby/aliby/badges/dev/coverage.svg)](https://gitlab.com/aliby/aliby/-/commits/dev)

 End-to-end processing of cell microscopy time-lapses. ALIBY automates segmentation, tracking, lineage predictions, post-processing and report production. It leverages the existing Python ecosystem and open-source scientific software available to produce seamless and standardised pipelines.

 ## Quickstart Documentation
+Installation of [VS Studio](https://visualstudio.microsoft.com/downloads/#microsoft-visual-c-redistributable-for-visual-studio-2022) Native MacOS support for is under work, but you can use containers (e.g., Docker, Podman) in the meantime.
+
+To analyse local data
+ ```bash
+pip install aliby 
+ ```
+ Add any of the optional flags `omero` and `utils` (e.g., `pip install aliby[omero, utils]`). `omero` provides tools to connect with an OMERO server and `utils` provides visualisation, user interface and additional deep learning tools.
+  
+See our [installation instructions]( https://aliby.readthedocs.io/en/latest/INSTALL.html ) for more details.

-We use (and recommend) [OMERO](https://www.openmicroscopy.org/omero/) to manage our microscopy database, but ALIBY can process both locally-stored experiments and remote ones hosted on a server.
+### CLI

-### Setting up a server
-For testing and development, the easiest way to set up an OMERO server is by
-using Docker images.
-[The software carpentry](https://software-carpentry.org/) and the [Open
- Microscopy Environment](https://www.openmicroscopy.org), have provided
-[instructions](https://ome.github.io/training-docker/) to do this.
+If installed via poetry, you have access to a Command Line Interface (CLI)

-The `docker-compose.yml` file can be used to create an OMERO server with an
-accompanying PostgreSQL database, and an OMERO web server.
-It is described in detail
-[here](https://ome.github.io/training-docker/12-dockercompose/).
+ ```bash
+aliby-run --expt_id EXPT_PATH --distributed 4 --tps None
+ ```

-Our version of the `docker-compose.yml` has been adapted from the above to
-use version 5.6 of OMERO.
+And to run Omero servers, the basic arguments are shown:
+ ```bash
+ aliby-run --expt_id XXX --host SERVER.ADDRESS --user USER --password PASSWORD 
+ ```

-To start these containers (in background):
-```shell script
-cd pipeline-core
-docker-compose up -d
-```
-Omit the `-d` to run in foreground.
+The output is a folder with the original logfiles and a set of hdf5 files, one with the results of each multidimensional inside.

-To stop them, in the same directory, run:
-```shell script
-docker-compose stop
-```
+For more information, including available options, see the page on [running the analysis pipeline](https://aliby.readthedocs.io/en/latest/PIPELINE.html)

-### Installation
+## Using specific components

-See our [installation instructions]( https://aliby.readthedocs.io/en/latest/INSTALL.html ) for more details.
+### Access raw data

-### Raw data access
-
-ALIBY's tooling can also be used as an interface to OMERO servers, taking care of fetching data when needed.
+ALIBY's tooling can also be used as an interface to OMERO servers, for example, to fetch a brightfield channel.
 ```python
-from aliby.io.dataset import Dataset
-from aliby.io.image import Image
+from aliby.io.omero import Dataset, Image

 server_info= {
            "host": "host_address",
@@ -82,27 +77,33 @@ in time.

 It fetches the metadata from the Image object, and uses the TilerParameters values (all Processes in aliby depend on an associated Parameters class, which is in essence a dictionary turned into a class.)

-#### Get a timelapse for a given trap
+#### Get a timelapse for a given tile (remote connection)
 ```python
 fpath = "h5/location"

-trap_id = 9
-trange = list(range(0, 30))
+tile_id = 9
+trange = range(0, 10)
 ncols = 8

 riv = remoteImageViewer(fpath)
-trap_tps = riv.get_trap_timepoints(trap_id, trange, ncols)
+trap_tps = [riv.tiler.get_tiles_timepoint(tile_id, t) for t in trange] 
+
+# You can also access labelled traps
+m_ts = riv.get_labelled_trap(tile_id=0, tps=[0])
+
+# And plot them directly
+riv.plot_labelled_trap(trap_id=0, channels=[0, 1, 2, 3], trange=range(10))
 ```

-This can take several seconds at the moment.
+Depending on the network speed can take several seconds at the moment.
 For a speed-up: take fewer z-positions if you can.

-#### Get the traps for a given time point
+#### Get the tiles for a given time point
 Alternatively, if you want to get all the traps at a given timepoint:

 ```python
-timepoint = 0
-seg_expt.get_tiles_timepoints(timepoint, tile_size=96, channels=None,
+timepoint = (4,6)
+tiler.get_tiles_timepoint(timepoint, channels=None,
                                z=[0,1,2,3,4])
 ```


--- a/docs/requirements.txt
+++ b/docs/requirements.txt
 numpydoc>=1.3.1
 aliby[network]>=0.1.43
+sphinx-autodoc-typehints==1.19.2
+sphinx-rtd-theme==1.0.0
+sphinxcontrib-applehelp==1.0.2
+sphinxcontrib-devhelp==1.0.2
+sphinxcontrib-htmlhelp==2.0.0
+sphinxcontrib-jsmath==1.0.1
+sphinxcontrib-qthelp==1.0.3
+sphinxcontrib-serializinghtml==1.1.5
 myst-parser
-sphinx-autodoc-typehints
--- a/docs/source/INSTALL.md
+++ b/docs/source/INSTALL.md
@@ -40,29 +40,130 @@ Or using [pyenv](https://github.com/pyenv/pyenv) with pyenv-virtualenv:
 ### Pip version
 Once you have created and activated your virtual environment, run:

-If you are analysing data locally:
+If you are not using an OMERO server setup:

    $ pip install aliby

-If you are contacting an OMERO server:
+Otherwise, if you are contacting an OMERO server:

    $ pip install aliby[network]

 NOTE: Support for OMERO servers in GNU/Linux computers requires building ZeroC-Ice, thus it requires build tools. The versions for Windows and MacOS are provided as Python wheels and thus installation is faster.

+### FAQ
+- Installation fails during zeroc-ice compilation (Windows and MacOS).
+
+
+For Windows, the simplest way to install it is using conda (or mamba). You can install the (OMERO) network components separately:
+
+    $ conda create -n aliby -c conda-forge python=3.8 omero-py
+    $ conda activate aliby
+    $ cd c:/Users/Public/Repos/aliby
+    $ \PATH\TO\POETRY\LOCATION\poetry install
+
+  - MacOS
+  For local access and processing, follow the same instructions as Linux. Remote access to OMERO servers depends on some issues in one of our depedencies being solved (See issue https://github.com/ome/omero-py/issues/317)
+
 ### Git version

-We use [ poetry ](https://python-poetry.org/docs/#installation) for dependency management.
+Install [ poetry ](https://python-poetry.org/docs/#installation) for dependency management.

 In case you want to have local version:

-    $ git clone git@git.ecdf.ed.ac.uk:swain-lab/aliby/aliby.git
-    $ cd aliby && poetry install --all-extras
+    $ git clone git@gitlab.com/aliby/aliby.git
+    $ cd aliby
+    
+ and then either
+
+    $$ poetry install --all-extras
+
+for everything, including tools to access OMERO servers, or
+
+    $$ poetry install
+
+for a version with only local access, or
+
+    $$ poetry install --with dev
+
+to install with compatible versions of the development tools we use, such as black.
+
+These commands will automatically install the [ BABY ](https://gitlab.com/aliby/baby) segmentation software. Support for additional segmentation and tracking algorithms is under development.
+
+## Omero Server
+
+We use (and recommend) [OMERO](https://www.openmicroscopy.org/omero/) to manage our microscopy database, but ALIBY can process both locally-stored experiments and remote ones hosted on a server.

-This will automatically install the [ BABY ](https://git.ecdf.ed.ac.uk/swain-lab/aliby/baby) segmentation software. Support for additional segmentation and tracking algorithms is under development.
+### Setting up a server
+For testing and development, the easiest way to set up an OMERO server is by
+using Docker images.
+[The software carpentry](https://software-carpentry.org/) and the [Open
+ Microscopy Environment](https://www.openmicroscopy.org), have provided
+[instructions](https://ome.github.io/training-docker/) to do this.
+
+The `docker-compose.yml` file can be used to create an OMERO server with an
+accompanying PostgreSQL database, and an OMERO web server.
+It is described in detail
+[here](https://ome.github.io/training-docker/12-dockercompose/).
+
+Our version of the `docker-compose.yml` has been adapted from the above to
+use version 5.6 of OMERO.
+
+To start these containers (in background):
+```shell script
+cd pipeline-core
+docker-compose up -d
+```
+Omit the `-d` to run in foreground.
+
+To stop them, in the same directory, run:
+```shell script
+docker-compose stop
+```

 ### Troubleshooting

 Segmentation has been tested on: Mac OSX Mojave, Ubuntu 20.04 and Arch Linux.
 Data processing has been tested on all the above and Windows 11.

+### Detailed Windows installation
+#### Create environment
+Open anaconda powershell as administrator
+```shell  script
+conda create -n devaliby2 -c conda-forge python=3.8 omero-py
+conda activate devaliby2
+```
+
+#### Install poetry
+    You may have to specify the python executable to get this to work :
+```shell script
+(Invoke-WebRequest -Uri https://install.python-poetry.org -UseBasicParsing).Content | C:\Users\USERNAME\Anaconda3\envs\devaliby2\python.exe -
+
+```    Also specify full path when running poetry (there must be a way to sort this)
+
+- Clone the repository (Assuming you have ssh properly set up)
+```shell script
+git clone git@gitlab.com:aliby/aliby.git
+cd aliby
+poetry install --all-extras
+```
+
+You may need to run the full poetry path twice - first time gave an error message, worked second time
+
+```shell script
+C:\Users\v1iclar2\AppData\Roaming\Python\Scripts\poetry install --all-extras
+```
+
+confirm installation of aliby - python...import aliby - get no error message
+
+#### Access the virtual environment from the IDE (e.g., PyCharm)
+New project
+In location - navigate to the aliby folder (eg c::/Users/Public/Repos/aliby
+
+- Select the correct python interpreter
+click the interpreter name at the bottom right
+click add local interpreter
+on the left click conda environment
+click the 3 dots to the right of the interpreter path and navigate to the python executable from the environment created above (eg C:\Users\v1iclar2\Anaconda3\envs\devaliby2\python.exe)
+
+#### Potential Windows issues
+- Sometimes the library pywin32 gives trouble, just install it using pip or conda 
--- a/docs/source/PIPELINE.md
+++ b/docs/source/PIPELINE.md
+# Running the analysis pipeline
+
+You can run the analysis pipeline either via the command line interface (CLI) or using a script that incorporates the `aliby.pipeline.Pipeline` object.
+
+## CLI
+
+On a CLI, you can use the `aliby-run` command.  This command takes options as follows:
+- `--host`: Address of image-hosting server.
+- `--username`: Username to access image-hosting server.
+- `--password`: Password to access image-hosting server.
+- `--expt_id`: Number ID of experiment stored on host server.
+- `--distributed`: Number of distributed cores to use for segmentation and signal processing.  If 0, there is no parallelisation.
+- `--tps`: Optional.  Number of time points from the beginning of the experiment to use.  If not specified, the pipeline processes all time points.
+- `--directory`: Optional.  Parent directory to save the data files (HDF5) generated, `./data` by default; the files will be stored in a child directory whose name is the name of the experiment.
+- `--filter`: Optional.  List of positions to use for analysis.  Alternatively, a regex (regular expression) or list of regexes to search for positions.  **Note: for the CLI, currently it is not able to take a list of strings as input.**
+- `--overwrite`: Optional.  Whether to overwrite an existing data directory.  True by default.
+- `--override_meta`: Optional.  Whether to overwrite an existing data directory.  True by default.
+
+Example usage:
+ ```bash
+aliby-run --expt_id EXPT_PATH --distributed 4 --tps None
+ ```
+
+And to run Omero servers, the basic arguments are shown:
+ ```bash
+ aliby-run --expt_id XXX --host SERVER.ADDRESS --user USER --password PASSWORD 
+ ```
+
+
+## Script
+
+Use the `aliby.pipeline.Pipeline` object and supply a dictionary, following the example below.  The meaning of the parameters are the same as described in the CLI section above.
+
+```python
+#!/usr/bin/env python3
+
+from aliby.pipeline import Pipeline, PipelineParameters
+
+# Specify experiment IDs
+ids = [101, 102]
+
+for i in ids:
+    print(i)
+    try:
+        params = PipelineParameters.default(
+            # Create dictionary to define pipeline parameters.
+            general={
+                "expt_id": i,
+                "distributed": 6,
+                "host": "INSERT ADDRESS HERE",
+                "username": "INSERT USERNAME HERE",
+                "password": "INSERT PASSWORD HERE",
+                # Ensure data will be overwriten
+                "override_meta": True,
+                "overwrite": True,
+            }
+        )
+
+        # Fine-grained control beyond general parameters:
+        # change specific leaf in the extraction tree.
+        # This example tells the pipeline to additionally compute the
+        # nuc_est_conv quantity, which is a measure of the degree of
+        # localisation of a signal in a cell.
+        params = params.to_dict()
+        leaf_to_change = params["extraction"]["tree"]["GFP"]["np_max"]
+        leaf_to_change.add("nuc_est_conv")
+
+        # Regenerate PipelineParameters
+        p = Pipeline(PipelineParameters.from_dict(params))
+
+        # Run pipeline
+        p.run()
+        
+    # Error handling
+    except Exception as e:
+        print(e)
+```
+
+This example code can be the contents of a `run.py` file, and you can run it via
+
+```bash
+python run.py
+```
+
+in the appropriate virtual environment.
+
+Alternatively, the example code can be the contents of a cell in a jupyter notebook.
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -4,21 +4,15 @@
   contain the root `toctree` directive.

 .. toctree::
-   :hidden:

   Home page <self>
   Installation <INSTALL.md>
+   Pipeline options <PIPELINE.md>
+   Contributing <CONTRIBUTING.md>
   ..
      Examples <examples.rst>
-   Reference <api.rst>
+      Reference <api.rst>
   ..
-      Contributing <CONTRIBUTING.md>
-   ..
-      ALIBY reference <_autosummary/aliby>
-      extraction reference <_autosummary/extraction>
-      agora reference <_autosummary/agora>
-      postprocessor reference <_autosummary/postprocessor>
-      logfile_parser reference <_autosummary/logfile_parser>

 .. include:: ../../README.md
   :parser: myst_parser.sphinx_
--- a/docs/source/specifications/io_dependencies.org
+++ b/docs/source/specifications/io_dependencies.org
+#+title: Input/Output Stage Dependencies
+
+Overview of what fields are required for each consecutive step to run, and
+
+- Registration
+- Tiler
+  - Requires:
+    - None
+  # - Optionally:
+  - Produces:
+    - /trap_info
+- Tiler
+  - Requires:
+    - None
+  - Produces:
+    - /trap_info
--- a/docs/source/specifications/metadata.org
+++ b/docs/source/specifications/metadata.org
+#+title: Aliby metadata specification
+
+Draft for recommended metadata for images to provide a standard interface for aliby. I attempt to follow OMERO metadata structures.
+
+* Essential data
+- DimensionOrder: str
+  Order of dimensions (e.g., TCZYX for Time, Channel, Z, Y, X)
+- PixelSize: float
+  Size of pixel, useful for segmentation.
+- Channels: List[str]
+  Channel names, used to refer to as parameters.
+* Optional but useful data
+- ntps: int
+  Number of time-points
+- Date
+  Date of experiment
+- interval: float
+  Time interval when the experiment has a constant acquisition time. If it changes depending on the position or it is a dynamic experiment, this is the maximum number that can divide all different conditions.
+- Channel conditions: DataFrame
+  Dataframe with acquisition features for each image as a function of a minimal time interval unit.
+- Group config: DataFrame
+  If multiple groups are used, it indicates the time-points at which the corresponding channel was acquired.
+- LED: List[str]
+  Led names. Useful when images are acquired with the same LED and filter but multiple voltage conditions.
+- Filter: List[str]
+  Filter names. Useful when images are acquired with the same LED and filter but multiple voltage conditions.
+- tags : List[str]
+  Tags associated with the experiment. Useful for semi-automated experiment exploration.
+- Experiment-wide groups: List[int]
+  List of groups for which each position belongs.
+- Group names: List[str]
+  List of groups
+* Optional
+- hardware information : Dict[str, str]
+  Name of all hardware used to acquire images.
+- Acquisition software and version: Tuple[str,str]
+- Experiment start: date
+- Experiment end: date
--- a/docs/source/specifications/roadmap.org
+++ b/docs/source/specifications/roadmap.org
+#+title: ALIBY roadmap
+
+Overview of potential improvements, goals, issues and other thoughts worth keeping in the repository. In general, it is things that the original developer would have liked to implement had there been enough time.
+
+* General goals
+- Simplify code base
+- Reduce dependency on BABY
+- Abstract components beyond cell outlines (i.e, vacuole, or other ROIs)
+- Enable providing metadata defaults (remove dependency of metadata)
+- (Relevant to BABY): Migrate aliby-baby to Pytorch from Keras. Immediately after upgrade h5py to the latest version (we are stuck in 2.10.0 due to Keras).
+
+* Long-term tasks (Soft Eng)
+- Support external segmentation/tracking/lineage/processing tools
+  - Split segmentation, tracking and lineage into independent Steps
+- Implement the pipeline as an acyclic graph
+- Isolate lineage and tracking into a section of aliby or an independent package
+- Abstract cells into "ROIs" or "Outlines"
+- Abstract lineage into "Outline relationships" (this may help study cell-to-cell interactions in the future)
+- Add support to next generation microscopy formats.
+- Make live cell processing great again! (low priority)
+
+* Potential features
+- Flat field correction (requires research on what is the best way to do it)
+- Support for monotiles (e.g., agarose pads)
+- Support the user providing location of tiles (could be a GUI in which the user selects a region)
+- Support multiple neural networks (e.g., vacuole/nucleus in adition to cell segmentation)
+- Use CellPose as a backup for accuracy-first pipelines
+
+* Potential CLI(+matplotlib) interfaces
+The fastest way to get a gui-like interface is by using matplotlib as a panel to update and read keyboard inputs to interact with the data. All of this can be done within matplotlib in a few hundreds of line of code.
+
+- Annotate intracellular contents
+- Interface to adjust the parameters for calibration
+- Basic selection of region of interest in a per-position basis
+
+* Sections in need of refactoring
+** Extraction
+Extraction could easily increase its processing speed. Most of the code was not originally written using casting and vectorised operations.
+- Reducing the use of python loops to the minimum
+- Replacing nested functions with functional mappings (extraction be faster and clearer with a functional programming approach)
+- Replacing the tree with a set of tuples and delegating processing order to dask.
+  Dask can produce its own internal tree and optimise the order of rendering the tree unnecessary
+
+** Postprocessing.
+- Clarify the limits of picking and merging classes: These are temporal procedures; in the future segmentation should become more accurate, making picking Picker redundant; better tracking/lineage assignemnt will make merging redundant.
+- Formalise how lineage and reshaper processes are handled
+- Non-destructive postprocessing.
+  The way postprocessing is done is destructive at the moment. If we aim to perform more complex data analysis automatically an implementation of complementary and tractable sub-pipelines is essential. (low priority, perhaps within scripts)
+- Functionalise parameter-process schema. This schema provides a decent structure, but it requires a lot of boilerplate code. To transition the best option is probably a function that converts Process classes into a function, and another that extracts default values from a Parameters class. This could in theory replace most Process-Parameters pairs. Lineage functions will pose a problem and a common interface to get lineage or outline-to-outline relationships demands to be engineered.
+
+** Compiler/Reporter
+- Remove compiler step, and focus on designing an adequate report, then build it straight after postprocessing ends.
+
+** Writers/Readers
+- Consider storing signals that are similar (e.g., signals arising from each channel) in a single multidimensional array to save storage space. (mid priority)
+- Refactor (Extraction/Postprocessing) Writer to use the DynamicWriter Abstract Base Class.
+
+** Pipeline
+Pipeline is in dire need of refactoring, as it coordinates too many things. The best approach would be to modify the structure to delegate more responsibilities to Steps (such as validation) and Writers (such as writing metadata).
+
+* Testing
+- I/O interfaces
+- Visualisation helpers and other functions
+- Running one pipeline from another
+- Groupers
+
+* Documentation
+- Tutorials and how-to for the usual tasks
+- How to deal with different types of data
+- How to aggregate data from multiple experiments
+- Contribution guidelines (after developing some)
+
+* Tools/alternatives that may be worth considering for the future
+- trio/asyncio/anyio for concurrent processing of individual threads
+- Pandas -> Polars: Reconsider after pandas 2.0; they will become interoperable
+- awkward arrays: Better way to represent data series with different sizes
+- h5py -> zarr: OME-ZARR format is out now, it is possible that the field will move in that direction. This would also make us being stuck in h5py 2.10.0 less egregious.
+- Use CellACDC's work on producing a common interface to access a multitude of segmentation algorithms.
+
+* Secrets in the code
+- As aliby is adapted to future Python versions, keep up with the "FUTURE" statements that enunciate how code can be improved in new python version
+- Track FIXMEs and, if we cannot solve them immediately, open an associated issue
+
+* Minor inconveniences to fix
+- Update CellTracker models by training with current scikit-learn (currently it warns that the models were trained in an older version of sklearn )
--- a/examples/extraction/pos_example.py
+++ b/examples/extraction/pos_example.py
@@ -14,5 +14,4 @@ params = Parameters(


 ext = Extractor(params, omero_id=19310)
-# ext.extract_exp(tile_size=117)
 d = ext.extract_tp(tp=1, tile_size=117)
--- a/examples/parsers/swainlab_logfile_header_example.log
+++ b/examples/parsers/swainlab_logfile_header_example.log
+2022-10-10 15:31:27,350 - INFO 
+Swain Lab microscope experiment log file
+GIT commit: e5d5e33 fix: changes to a few issues with focus control on Batman.
+Microscope name: Batman
+Date: 022-10-10 15:31:27
+Log file path: D:\AcquisitionDataBatman\Swain Lab\Ivan\RAW DATA\2022\Oct\10-Oct-2022\pH_med_to_low00\pH_med_to_low.log
+Micromanager config file: C:\Users\Public\Microscope control\Micromanager config files\Batman_python_15_4_22.cfg
+Omero project: Default project
+Omero tags: 
+Experiment details: Effect on growth and cytoplasmic pH of switch from normal pH (4.25) media to higher pH (5.69). Switching is run using the Oxygen software
+-----Acquisition settings-----
+
+2022-10-10 15:31:27,350 - INFO Image Configs:
+Image config,Channel,Description,Exposure (ms), Number of Z sections,Z spacing (um),Sectioning method
+brightfield1,Brightfield,Default bright field config,30,5,0.6,PIFOC
+pHluorin405_0_4,pHluorin405,Phluorin excitation from 405 LED 0.4v and 10ms exposure,5,1,0.6,PIFOC
+pHluorin488_0_4,GFPFast,Phluorin excitation from 488 LED 0.4v,10,1,0.6,PIFOC
+cy5,cy5,Default cy5,30,1,0.6,PIFOC
+
+Device properties:
+Image config,device,property,value
+pHluorin405_0_4,DTOL-DAC-1,Volts,0.4
+pHluorin488_0_4,DTOL-DAC-2,Volts,0.4
+cy5,DTOL-DAC-3,Volts,4
+
+2022-10-10 15:31:27,353 - INFO 
+group: YST_247 field: position
+Name, X, Y, Z, Autofocus offset
+YST_247_001,-8968,-3319,2731.125040696934,123.25
+YST_247_002,-8953,-3091,2731.3000406995416,123.25
+YST_247_003,-8954,-2849,2731.600040704012,122.8
+YST_247_004,-8941,-2611,2730.7750406917185,122.8
+YST_247_005,-8697,-2541,2731.4500407017767,118.6
+group: YST_247 field: time
+start: 0
+interval: 300
+frames: 180
+
+group: YST_247 field: config
+brightfield1: 0xfffffffffffffffffffffffffffffffffffffffffffff
+pHluorin405_0_4: 0xfffffffffffffffffffffffffffffffffffffffffffff
+pHluorin488_0_4: 0xfffffffffffffffffffffffffffffffffffffffffffff
+cy5: 0xfffffffffffffffffffffffffffffffffffffffffffff
+
+2022-10-10 15:31:27,356 - INFO 
+group: YST_1510 field: position
+Name,X,Y,Z,Autofocus offset
+YST_1510_001,-6450,-230,2343.300034917891,112.55
+YST_1510_002,-6450,-436,2343.350034918636,112.55
+YST_1510_003,-6450,-639,2344.000034928322,116.8
+YST_1510_004,-6450,-831,2344.250034932047,116.8
+YST_1510_005,-6848,-536,2343.3250349182636,110
+group: YST_1510 field: time
+start: 0
+interval: 300
+frames: 180
+
+group: YST_1510 field: config
+brightfield1: 0xfffffffffffffffffffffffffffffffffffffffffffff
+pHluorin405_0_4: 0xfffffffffffffffffffffffffffffffffffffffffffff
+pHluorin488_0_4: 0xfffffffffffffffffffffffffffffffffffffffffffff
+cy5: 0xfffffffffffffffffffffffffffffffffffffffffffff
+2022-10-10 15:31:27,359 - INFO 
+group: YST_1511 field: position
+Name, X, Y, Z, Autofocus offset
+YST_1511_001,-10618,-1675,2716.900040484965,118.7
+YST_1511_002,-10618,-1914,2717.2250404898077,122.45
+YST_1511_003,-10367,-1695,2718.2500405050814,120.95
+YST_1511_004,-10367,-1937,2718.8250405136496,120.95
+YST_1511_005,-10092,-1757,2719.975040530786,119.45
+
+group: YST_1511 field: time
+start: 0
+interval: 300
+frames: 180
+
+group: YST_1511 field: config
+brightfield1: 0xfffffffffffffffffffffffffffffffffffffffffffff
+pHluorin405_0_4: 0xfffffffffffffffffffffffffffffffffffffffffffff
+pHluorin488_0_4: 0xfffffffffffffffffffffffffffffffffffffffffffff
+cy5: 0xfffffffffffffffffffffffffffffffffffffffffffff
+
+2022-10-10 15:31:27,362 - INFO 
+group: YST_1512 field: position
+Name,X,Y,Z,Autofocus offset
+YST_1512_001,-8173,-2510,2339.0750348549336,115.65
+YST_1512_002,-8173,-2718,2338.0250348392874,110.8
+YST_1512_003,-8173,-2963,2336.625034818426,110.8
+YST_1512_004,-8457,-2963,2336.350034814328,110.9
+YST_1512_005,-8481,-2706,2337.575034832582,113.3
+group: YST_1512 field: time
+start: 0
+interval: 300
+frames: 180
+
+group: YST_1512 field: config
+brightfield1: 0xfffffffffffffffffffffffffffffffffffffffffffff
+pHluorin405_0_4: 0xfffffffffffffffffffffffffffffffffffffffffffff
+pHluorin488_0_4: 0xfffffffffffffffffffffffffffffffffffffffffffff
+cy5: 0xfffffffffffffffffffffffffffffffffffffffffffff
+
+2022-10-10 15:31:27,365 - INFO 
+group: YST_1513 field: position
+Name,X,Y,Z,Autofocus offset
+YST_1513_001,-6978,-2596,2339.8750348668545,113.3
+YST_1513_002,-6978,-2380,2340.500034876168,113.3
+YST_1513_003,-6971,-2163,2340.8750348817557,113.3
+YST_1513_004,-6971,-1892,2341.2500348873436,113.3
+YST_1513_005,-6692,-1892,2341.550034891814,113.3
+group: YST_1513 field: time
+start: 0
+interval: 300
+frames: 180
+
+group: YST_1513 field: config
+brightfield1: 0xfffffffffffffffffffffffffffffffffffffffffffff
+pHluorin405_0_4: 0xfffffffffffffffffffffffffffffffffffffffffffff
+pHluorin488_0_4: 0xfffffffffffffffffffffffffffffffffffffffffffff
+cy5: 0xfffffffffffffffffffffffffffffffffffffffffffff
+
+2022-10-10 15:31:27,365 - INFO 
+2022-10-10 15:31:27,365 - INFO 
+-----Experiment started-----
--- a/examples/tiler/pypipeline_unit_test_00_000001_Brightfield_003.tif
+++ b/examples/tiler/pypipeline_unit_test_00_000001_Brightfield_003.tif
--- a/examples/tiler/pypipeline_unit_test_00_000001_Brightfield_003_square.tif
+++ b/examples/tiler/pypipeline_unit_test_00_000001_Brightfield_003_square.tif
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
 [tool.poetry]
 name = "aliby"
-version = "0.1.45"
+version = "0.1.64"
 description = "Process and analyse live-cell imaging data"
 authors = ["Alan Munoz <alan.munoz@ed.ac.uk>"]
 packages = [
@@ -12,45 +12,57 @@ packages = [
 ]
 readme = "README.md"

+[tool.poetry.scripts]
+aliby-run = "aliby.bin.run:run"
+aliby-annotate = "aliby.bin.annotate:annotate"
+aliby-visualise = "aliby.bin.visualise:napari_overlay"
+
+[build-system]
+requires = ["setuptools", "poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"
+
 [tool.poetry.dependencies]
 python = ">=3.8, <3.11"
 PyYAML = "^6.0"
 flatten-dict = "^0.4.2"
 gaussianprocessderivatives = "^0.1.5"
-h5py = "2.10" # File I/O
 numpy = ">=1.21.6"
-opencv-python = "4.1.2.30"
+Bottleneck = "^1.3.5"
+opencv-python = "^4.7.0.72"
+pathos = "^0.2.8" # Lambda-friendly multithreading
 p-tqdm = "^1.3.3"
 pandas = ">=1.3.3"
 py-find-1st = "^1.1.5" # Fast indexing
 scikit-learn = ">=1.0.2" # Used for an extraction metric
 scipy = ">=1.7.3"

-# [tool.poetry.group.pipeline.dependencies]
-aliby-baby = "^0.1.15"
+# Pipeline + I/O
 dask = "^2021.12.0"
 imageio = "2.8.0" # For image-visualisation utilities
 requests-toolbelt = "^0.9.1"
 scikit-image = ">=0.18.1"
 tqdm = "^4.62.3" # progress bars
 xmltodict = "^0.13.0" # read ome-tiff metadata
+zarr = "^2.14.0"
+GitPython = "^3.1.27"
+h5py = "2.10" # File I/O
+
+
+# Networking
+omero-py = { version = ">=5.6.2", optional = true } # contact omero server
+
+# Baby segmentation
+aliby-baby = {version = "^0.1.17", optional=true}

-# [tool.poetry.group.postprocessor.dependencies]
+# Postprocessing
+[tool.poetry.group.pp.dependencies]
 leidenalg = "^0.8.8"
 more-itertools = "^8.12.0"
-pathos = "^0.2.8" # Lambda-friendly multithreading
 pycatch22 = "^0.4.2"

-# [tool.poetry.group.network.dependencies]
-omero-py = { version = ">=5.6.2", optional = true } # contact omero server
-zeroc-ice = { version="3.6.5", optional = true } # networking interface, slow to build
-GitPython = "^3.1.27"
-Bottleneck = "^1.3.5"
-faiss-gpu = "^1.7.2"
+[tool.poetry.group.pp]
+optional = true

-[tool.poetry.extras]
-omero = [ "omero-py" ]
-network = [ "omero-py", "zeroc-ice" ]

 [tool.poetry.group.dev]
 optional = true
@@ -65,9 +77,12 @@ flake8 = "^4.0.1"
 pyright = "^1.1.258"
 pre-commit = "^2.20.0"
 seaborn = "^0.11.2"
-
-# [tool.poetry.group.gui.dependencies]
-# napari = ">=0.4.16"
+debugpy = "^1.6.3"
+coverage = "^7.0.4"
+jupytext = "^1.14.4"
+grid-strategy = "^0.0.1"
+readchar = "^4.0.3"
+ipdb = "^0.13.11"

 [tool.poetry.group.docs]
 optional = true
@@ -84,10 +99,21 @@ optional = true
 [tool.poetry.group.test.dependencies]
 pytest = "^6.2.5"

+[tool.poetry.group.utils]
+optional = true

-[build-system]
-requires = ["setuptools", "poetry-core>=1.0.0"]
-build-backend = "poetry.core.masonry.api"
+# Dependency groups can only be used by a poetry installation, not pip
+[tool.poetry.group.utils.dependencies]
+napari = {version = ">=0.4.16", optional=true}
+Torch = {version = "^1.13.1", optional=true}
+pytorch-lightning = {version = "^1.9.3", optional=true}
+torchvision = {version = "^0.14.1", optional=true}
+trio = {version = "^0.22.0", optional=true}
+grid-strategy = {version = "^0.0.1", optional=true}
+
+[tool.poetry.extras]
+omero = ["omero-py"]
+baby = ["aliby-baby"]

 [tool.black]
 line-length = 79

--- a/src/agora/abc.py
+++ b/src/agora/abc.py
+import logging
 import typing as t
 from abc import ABC, abstractmethod
 from collections.abc import Iterable
 from copy import copy
-from pathlib import Path, PosixPath
+from pathlib import Path
+from time import perf_counter
 from typing import Union

 from flatten_dict import flatten
 from yaml import dump, safe_load

+from agora.logging import timer
+
 atomic = t.Union[int, float, str, bool]


@@ -56,14 +60,14 @@ class ParametersABC(ABC):
        else:
            return iterable

-    def to_yaml(self, path: Union[PosixPath, str] = None):
+    def to_yaml(self, path: Union[Path, str] = None):
        """
        Returns a yaml stream of the attributes of the class instance.
        If path is provided, the yaml stream is saved there.

        Parameters
        ----------
-        path : Union[PosixPath, str]
+        path : Union[Path, str]
            Output path.
        """
        if path:
@@ -76,7 +80,7 @@ class ParametersABC(ABC):
        return cls(**d)

    @classmethod
-    def from_yaml(cls, source: Union[PosixPath, str]):
+    def from_yaml(cls, source: Union[Path, str]):
        """
        Returns instance from a yaml filename or stdin
        """
@@ -198,11 +202,16 @@ class ProcessABC(ABC):
    def run(self):
        pass

+    def _log(self, message: str, level: str = "warning"):
+        # Log messages in the corresponding level
+        logger = logging.getLogger("aliby")
+        getattr(logger, level)(f"{self.__class__.__name__}: {message}")
+

 def check_type_recursive(val1, val2):
    same_types = True
    if not isinstance(val1, type(val2)) and not all(
-        type(x) in (PosixPath, str) for x in (val1, val2)  # Ignore str->path
+        type(x) in (Path, str) for x in (val1, val2)  # Ignore str->path
    ):
        return False
    if not isinstance(val1, t.Iterable) and not isinstance(val2, t.Iterable):
@@ -217,3 +226,28 @@ def check_type_recursive(val1, val2):
        for k in val2.keys():
            same_types = same_types and check_type_recursive(val1[k], val2[k])
    return same_types
+
+
+class StepABC(ProcessABC):
+    """
+    Base class that expands on ProcessABC to include tools used by Aliby steps.
+    It adds a setup step, logging and benchmarking for time benchmarks.
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    @abstractmethod
+    def _run_tp(self):
+        pass
+
+    @timer
+    def run_tp(self, tp: int, **kwargs):
+        """
+        Time and log the timing of a step.
+        """
+        return self._run_tp(tp, **kwargs)
+
+    def run(self):
+        # Replace run with run_tp
+        raise Warning("Steps use run_tp instead of run")
--- a/src/agora/io/bridge.py
+++ b/src/agora/io/bridge.py
 """
-Tools to interact with hdf5 files and handle data consistently.
+Tools to interact with h5 files and handle data consistently.
 """
 import collections
+import logging
+import typing as t
 from itertools import chain, groupby, product
 from typing import Union
-import typing as t

 import h5py
 import numpy as np
@@ -13,26 +14,33 @@ import yaml

 class BridgeH5:
    """
-    Base class to interact with h5 data stores.
-    It also contains functions useful to predict how long should segmentation take.
+    Base class to interact with h5 files.
+
+    It includes functions that predict how long segmentation will take.
    """

    def __init__(self, filename, flag="r"):
+        """Initialise with the name of the h5 file."""
        self.filename = filename
        if flag is not None:
            self._hdf = h5py.File(filename, flag)
-
            self._filecheck

+    def _log(self, message: str, level: str = "warn"):
+        # Log messages in the corresponding level
+        logger = logging.getLogger("aliby")
+        getattr(logger, level)(f"{self.__class__.__name__}: {message}")
+
    def _filecheck(self):
        assert "cell_info" in self._hdf, "Invalid file. No 'cell_info' found."

    def close(self):
+        """Close the h5 file."""
        self._hdf.close()

    @property
    def meta_h5(self) -> t.Dict[str, t.Any]:
-        # Return metadata as indicated in h5 file
+        """Return metadata, defining it if necessary."""
        if not hasattr(self, "_meta_h5"):
            with h5py.File(self.filename, "r") as f:
                self._meta_h5 = dict(f.attrs)
@@ -44,24 +52,24 @@ class BridgeH5:

    @staticmethod
    def get_consecutives(tree, nstepsback):
-        # Receives a sorted tree and returns the keys of consecutive elements
-        vals = {k: np.array(list(v)) for k, v in tree.items()}  # get tp level
+        """Receives a sorted tree and returns the keys of consecutive elements."""
+        # get tp level
+        vals = {k: np.array(list(v)) for k, v in tree.items()}
+        # get indices of consecutive elements
        where_consec = [
            {
                k: np.where(np.subtract(v[n + 1 :], v[: -n - 1]) == n + 1)[0]
                for k, v in vals.items()
            }
            for n in range(nstepsback)
-        ]  # get indices of consecutive elements
+        ]
        return where_consec

    def get_npairs(self, nstepsback=2, tree=None):
        if tree is None:
            tree = self.cell_tree
-
        consecutive = self.get_consecutives(tree, nstepsback=nstepsback)
        flat_tree = flatten(tree)
-
        n_predictions = 0
        for i, d in enumerate(consecutive, 1):
            flat = list(chain(*[product([k], list(v)) for k, v in d.items()]))
@@ -70,55 +78,49 @@ class BridgeH5:
                n_predictions += len(flat_tree.get(p[0], [])) * len(
                    flat_tree.get(p[1], [])
                )
-
        return n_predictions

    def get_npairs_over_time(self, nstepsback=2):
        tree = self.cell_tree
        npairs = []
-        for t in self._hdf["cell_info"]["processed_timepoints"][()]:
+        for tp in self._hdf["cell_info"]["processed_timepoints"][()]:
            tmp_tree = {
-                k: {k2: v2 for k2, v2 in v.items() if k2 <= t}
+                k: {k2: v2 for k2, v2 in v.items() if k2 <= tp}
                for k, v in tree.items()
            }
            npairs.append(self.get_npairs(tree=tmp_tree))
-
        return np.diff(npairs)

    def get_info_tree(
        self, fields: Union[tuple, list] = ("trap", "timepoint", "cell_label")
    ):
        """
-        Returns traps, time points and labels for this position in form of a tree
-        in the hierarchy determined by the argument fields. Note that it is
-        compressed to non-empty elements and timepoints.
+        Return traps, time points and labels for this position in the form of a tree in the hierarchy determined by the argument fields.
+
+        Note that it is compressed to non-empty elements and timepoints.

        Default hierarchy is:
        - trap
        - time point
        - cell label

-        This function currently produces trees of depth 3, but it can easily be
-        extended for deeper trees if needed (e.g. considering groups,
-        chambers and/or positions).
+        This function currently produces trees of depth 3, but it can easily be extended for deeper trees if needed (e.g. considering groups, chambers and/or positions).

        Parameters
        ----------
-            fields: Fields to fetch from 'cell_info' inside the hdf5 storage
+        fields: list of strs
+            Fields to fetch from 'cell_info' inside the h5 file.

        Returns
        ----------
-            Nested dictionary where keys (or branches) are the upper levels
-             and the leaves are the last element of :fields:.
+        Nested dictionary where keys (or branches) are the upper levels and the leaves are the last element of :fields:.
        """
        zipped_info = (*zip(*[self._hdf["cell_info"][f][()] for f in fields]),)
-
        return recursive_groupsort(zipped_info)


 def groupsort(iterable: Union[tuple, list]):
-    # Sorts iterable and returns a dictionary where the values are grouped by the first element.
-
+    """Sorts iterable and returns a dictionary where the values are grouped by the first element."""
    iterable = sorted(iterable, key=lambda x: x[0])
    grouped = {
        k: [x[1:] for x in v] for k, v in groupby(iterable, lambda x: x[0])
@@ -127,17 +129,18 @@ def groupsort(iterable: Union[tuple, list]):


 def recursive_groupsort(iterable):
-    # Recursive extension of groupsort
+    """Recursive extension of groupsort."""
    if len(iterable[0]) > 1:
        return {
            k: recursive_groupsort(v) for k, v in groupsort(iterable).items()
        }
-    else:  # Only two elements in list
+    else:
+        # only two elements in list
        return [x[0] for x in iterable]


 def flatten(d, parent_key="", sep="_"):
-    """Flatten nested dict. Adapted from https://stackoverflow.com/a/6027615"""
+    """Flatten nested dict. Adapted from https://stackoverflow.com/a/6027615."""
    items = []
    for k, v in d.items():
        new_key = parent_key + (k,) if parent_key else (k,)
@@ -149,20 +152,18 @@ def flatten(d, parent_key="", sep="_"):


 def attrs_from_h5(fpath: str):
-    """Return attributes as dict from h5 file"""
+    """Return attributes as dict from an h5 file."""
    with h5py.File(fpath, "r") as f:
        return dict(f.attrs)


-def parameters_from_h5(fpath: str):
-    attrs = attrs_from_h5(fpath)
-    return yaml.safe_load(attrs["parameters"])
-
-
 def image_creds_from_h5(fpath: str):
-    """Return image id and server credentials from h5"""
+    """Return image id and server credentials from an h5."""
    attrs = attrs_from_h5(fpath)
    return (
        attrs["image_id"],
-        yaml.safe_load(attrs["parameters"])["general"]["server_info"],
+        {
+            k: yaml.safe_load(attrs["parameters"])["general"][k]
+            for k in ("username", "password", "host")
+        },
    )
--- a/src/agora/io/cells.py
+++ b/src/agora/io/cells.py
No results found