From 61a3376ac71e26aeefe0e9eb9d6507bf14342728 Mon Sep 17 00:00:00 2001
From: Arin Wongprommoon <arin.wongprommoon@ed.ac.uk>
Date: Fri, 17 Jun 2022 10:33:27 +0100
Subject: [PATCH 1/5] [routines] Unit scaling argument

Specifying a sampling period via the sampling_period argument duplicated
multiplying the horizontal axes by a scaling factor.

This is because recent changes to postprocessor (see last paragraph of
commit) makes the columns of DataFrames show absolute time units (e.g. [0, 5,
10, 15...] if the images were taken every 5 minutes).  Previously,
columns showed time points (e.g. [0, 1, 2, 3...]).  All routines relied
on column labels to define the horizontal axis.  This issue would
have been addressed more timely if the commit in question had a more
informative description; 'add tiniterval property' tells me nothing and
does not inform me that it affects the column labels.

These recent changes mean that the sampling_period argument is no longer
necessary.  However, instead of deleting this argument, I've decided to
repurpose it for unit scaling, e.g. from minutes to hours.
Operationally, nothing has changed, but the meaning of the argument has
changed, and I've updated the docstrings accordingly.

This commit may affect horizontal axes of plots affected; users should
inspect the axes carefully, especially if the source data relies on
postprocessor.grouper.

This bug was likely caused by 119a8a1288950c7b3026aba604bf0908fe011239
on 2022-06-16 14:21.  It is unclear from the commit message, but I
suspect that this commit attempted to incorporate the image sampling
interval into defining the DataFrame columns produced by postprocessor.
This commit addresses issue #20.
---
 postprocessor/routines/mean_plot.py         | 14 +++++++-------
 postprocessor/routines/median_plot.py       | 14 +++++++-------
 postprocessor/routines/plottingabc.py       |  4 ++--
 postprocessor/routines/single_birth_plot.py | 14 +++++++-------
 postprocessor/routines/single_plot.py       | 14 +++++++-------
 5 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/postprocessor/routines/mean_plot.py b/postprocessor/routines/mean_plot.py
index a3af66d..e259448 100644
--- a/postprocessor/routines/mean_plot.py
+++ b/postprocessor/routines/mean_plot.py
@@ -13,7 +13,7 @@ class _MeanPlotter(BasePlotter):
         self,
         trace_df,
         trace_name,
-        sampling_period,
+        unit_scaling,
         label,
         mean_color,
         error_color,
@@ -22,7 +22,7 @@ class _MeanPlotter(BasePlotter):
         ylabel,
         plot_title,
     ):
-        super().__init__(trace_name, sampling_period, xlabel, plot_title)
+        super().__init__(trace_name, unit_scaling, xlabel, plot_title)
         # Define attributes from arguments
         self.trace_df = trace_df
         self.label = label
@@ -35,7 +35,7 @@ class _MeanPlotter(BasePlotter):
         self.ylabel = ylabel
 
         # Mean and standard error
-        self.trace_time = np.array(self.trace_df.columns) * self.sampling_period
+        self.trace_time = np.array(self.trace_df.columns) * self.unit_scaling
         self.mean_ts = self.trace_df.mean(axis=0)
         self.stderr = self.trace_df.std(axis=0) / np.sqrt(len(self.trace_df))
 
@@ -64,7 +64,7 @@ class _MeanPlotter(BasePlotter):
 def mean_plot(
     trace_df,
     trace_name="flavin",
-    sampling_period=5,
+    unit_scaling=1,
     label="wild type",
     mean_color="b",
     error_color="lightblue",
@@ -82,8 +82,8 @@ def mean_plot(
         Time series of traces (rows = cells, columns = time points).
     trace_name : string
         Name of trace being plotted, e.g. 'flavin'.
-    sampling_period : int or float
-        Sampling period, in unit time.
+    unit_scaling : int or float
+        Unit scaling factor, e.g. 1/60 to convert minutes to hours.
     label : string
         Name of group being plotted, e.g. a strain name.
     mean_color : string
@@ -109,7 +109,7 @@ def mean_plot(
     plotter = _MeanPlotter(
         trace_df,
         trace_name,
-        sampling_period,
+        unit_scaling,
         label,
         mean_color,
         error_color,
diff --git a/postprocessor/routines/median_plot.py b/postprocessor/routines/median_plot.py
index 4b3205c..573263b 100644
--- a/postprocessor/routines/median_plot.py
+++ b/postprocessor/routines/median_plot.py
@@ -13,7 +13,7 @@ class _MedianPlotter(BasePlotter):
         self,
         trace_df,
         trace_name,
-        sampling_period,
+        unit_scaling,
         label,
         median_color,
         error_color,
@@ -22,7 +22,7 @@ class _MedianPlotter(BasePlotter):
         ylabel,
         plot_title,
     ):
-        super().__init__(trace_name, sampling_period, xlabel, plot_title)
+        super().__init__(trace_name, unit_scaling, xlabel, plot_title)
         # Define attributes from arguments
         self.trace_df = trace_df
         self.label = label
@@ -35,7 +35,7 @@ class _MedianPlotter(BasePlotter):
         self.ylabel = ylabel
 
         # Median and interquartile range
-        self.trace_time = np.array(self.trace_df.columns) * self.sampling_period
+        self.trace_time = np.array(self.trace_df.columns) * self.unit_scaling
         self.median_ts = self.trace_df.median(axis=0)
         self.quartile1_ts = self.trace_df.quantile(0.25)
         self.quartile3_ts = self.trace_df.quantile(0.75)
@@ -65,7 +65,7 @@ class _MedianPlotter(BasePlotter):
 def median_plot(
     trace_df,
     trace_name="flavin",
-    sampling_period=5,
+    unit_scaling=1,
     label="wild type",
     median_color="b",
     error_color="lightblue",
@@ -83,8 +83,8 @@ def median_plot(
         Time series of traces (rows = cells, columns = time points).
     trace_name : string
         Name of trace being plotted, e.g. 'flavin'.
-    sampling_period : int or float
-        Sampling period, in unit time.
+    unit_scaling : int or float
+        Unit scaling factor, e.g. 1/60 to convert minutes to hours.
     label : string
         Name of group being plotted, e.g. a strain name.
     median_color : string
@@ -110,7 +110,7 @@ def median_plot(
     plotter = _MedianPlotter(
         trace_df,
         trace_name,
-        sampling_period,
+        unit_scaling,
         label,
         median_color,
         error_color,
diff --git a/postprocessor/routines/plottingabc.py b/postprocessor/routines/plottingabc.py
index 1990df9..97b89aa 100644
--- a/postprocessor/routines/plottingabc.py
+++ b/postprocessor/routines/plottingabc.py
@@ -6,10 +6,10 @@ from abc import ABC
 class BasePlotter(ABC):
     """Base class for plotting handler classes"""
 
-    def __init__(self, trace_name, sampling_period, xlabel, plot_title):
+    def __init__(self, trace_name, unit_scaling, xlabel, plot_title):
         """Common attributes"""
         self.trace_name = trace_name
-        self.sampling_period = sampling_period
+        self.unit_scaling = unit_scaling
 
         self.xlabel = xlabel
         self.ylabel = None
diff --git a/postprocessor/routines/single_birth_plot.py b/postprocessor/routines/single_birth_plot.py
index 6d1a405..671cfec 100644
--- a/postprocessor/routines/single_birth_plot.py
+++ b/postprocessor/routines/single_birth_plot.py
@@ -14,7 +14,7 @@ class _SingleBirthPlotter(_SinglePlotter):
         trace_values,
         trace_name,
         birth_mask,
-        sampling_period,
+        unit_scaling,
         trace_color,
         birth_color,
         trace_linestyle,
@@ -27,7 +27,7 @@ class _SingleBirthPlotter(_SinglePlotter):
             trace_timepoints,
             trace_values,
             trace_name,
-            sampling_period,
+            unit_scaling,
             trace_color,
             trace_linestyle,
             xlabel,
@@ -40,7 +40,7 @@ class _SingleBirthPlotter(_SinglePlotter):
 
     def plot(self, ax):
         """Draw the line plots on the provided Axes."""
-        trace_time = self.trace_timepoints * self.sampling_period
+        trace_time = self.trace_timepoints * self.unit_scaling
         super().plot(ax)
         birth_mask_bool = self.birth_mask.astype(bool)
         for occurence, birth_time in enumerate(trace_time[birth_mask_bool]):
@@ -62,7 +62,7 @@ def single_birth_plot(
     trace_values,
     trace_name="flavin",
     birth_mask=None,
-    sampling_period=5,
+    unit_scaling=1,
     trace_color="b",
     birth_color="k",
     trace_linestyle="-",
@@ -84,8 +84,8 @@ def single_birth_plot(
     birth_mask : array_like
         Mask to indicate where births are. Expect values of '0' and '1' or
         'False' and 'True' in the elements.
-    sampling_period : int or float
-        Sampling period, in unit time.
+    unit_scaling : int or float
+        Unit scaling factor, e.g. 1/60 to convert minutes to hours.
     trace_color : string
         matplotlib colour string for the trace
     birth_color : string
@@ -116,7 +116,7 @@ def single_birth_plot(
         trace_values,
         trace_name,
         birth_mask,
-        sampling_period,
+        unit_scaling,
         trace_color,
         birth_color,
         trace_linestyle,
diff --git a/postprocessor/routines/single_plot.py b/postprocessor/routines/single_plot.py
index 11ee940..68e7d76 100644
--- a/postprocessor/routines/single_plot.py
+++ b/postprocessor/routines/single_plot.py
@@ -13,13 +13,13 @@ class _SinglePlotter(BasePlotter):
         trace_timepoints,
         trace_values,
         trace_name,
-        sampling_period,
+        unit_scaling,
         trace_color,
         trace_linestyle,
         xlabel,
         plot_title,
     ):
-        super().__init__(trace_name, sampling_period, xlabel, plot_title)
+        super().__init__(trace_name, unit_scaling, xlabel, plot_title)
         # Define attributes from arguments
         self.trace_timepoints = trace_timepoints
         self.trace_values = trace_values
@@ -33,7 +33,7 @@ class _SinglePlotter(BasePlotter):
         """Draw the line plot on the provided Axes."""
         super().plot(ax)
         ax.plot(
-            self.trace_timepoints * self.sampling_period,
+            self.trace_timepoints * self.unit_scaling,
             self.trace_values,
             color=self.trace_color,
             linestyle=self.trace_linestyle,
@@ -45,7 +45,7 @@ def single_plot(
     trace_timepoints,
     trace_values,
     trace_name="flavin",
-    sampling_period=5,
+    unit_scaling=1,
     trace_color="b",
     trace_linestyle="-",
     xlabel="Time (min)",
@@ -62,8 +62,8 @@ def single_plot(
         Trace to plot.
     trace_name : string
         Name of trace being plotted, e.g. 'flavin'.
-    sampling_period : int or float
-        Sampling period, in unit time.
+    unit_scaling : int or float
+        Unit scaling factor, e.g. 1/60 to convert minutes to hours.
     trace_color : string
         matplotlib colour string, specifies colour of line plot.
     trace_linestyle : string
@@ -89,7 +89,7 @@ def single_plot(
         trace_timepoints,
         trace_values,
         trace_name,
-        sampling_period,
+        unit_scaling,
         trace_color,
         trace_linestyle,
         xlabel,
-- 
GitLab


From a88117d59d581ddcb626d360cc263efd59f1b16e Mon Sep 17 00:00:00 2001
From: Arin Wongprommoon <arin.wongprommoon@ed.ac.uk>
Date: Fri, 17 Jun 2022 15:34:35 +0100
Subject: [PATCH 2/5] [routines/histogram] Remove unit_scaling as it is not
 used

---
 postprocessor/routines/histogram.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/postprocessor/routines/histogram.py b/postprocessor/routines/histogram.py
index 4533d66..6d39574 100644
--- a/postprocessor/routines/histogram.py
+++ b/postprocessor/routines/histogram.py
@@ -12,7 +12,6 @@ class _HistogramPlotter:
         values,
         label,
         color,
-        sampling_period,
         binsize,
         lognormal,
         lognormal_base,
@@ -24,7 +23,6 @@ class _HistogramPlotter:
         self.values = values
         self.label = label
         self.color = color
-        self.sampling_period = sampling_period
         self.binsize = binsize
         self.lognormal = lognormal
         self.lognormal_base = lognormal_base
@@ -39,7 +37,9 @@ class _HistogramPlotter:
         if self.lognormal:
             self.bins = np.logspace(
                 0,
-                np.ceil(np.log(np.nanmax(values)) / np.log(self.lognormal_base)),
+                np.ceil(
+                    np.log(np.nanmax(values)) / np.log(self.lognormal_base)
+                ),
                 base=self.lognormal_base,
             )  # number of bins will be 50 by default, as it's the default in np.logspace
         else:
@@ -77,7 +77,6 @@ def histogram(
     values,
     label,
     color="b",
-    sampling_period=5,
     binsize=5,
     lognormal=False,
     lognormal_base=10,
@@ -96,8 +95,6 @@ def histogram(
         Name of value being plotting, e.g. cell division cycle length.
     color : string
         Colour of bars.
-    sampling_period : float
-        Sampling period, in unit time.
     binsize : float
         Bin size.
     lognormal : bool
@@ -127,7 +124,6 @@ def histogram(
         values,
         label,
         color,
-        sampling_period,
         binsize,
         lognormal,
         lognormal_base,
-- 
GitLab


From dd1d6eabfcc035526cb5106babf25e724d1c72a5 Mon Sep 17 00:00:00 2001
From: Arin Wongprommoon <arin.wongprommoon@ed.ac.uk>
Date: Fri, 17 Jun 2022 15:41:53 +0100
Subject: [PATCH 3/5] [routine/boxplot] Unit scaling argument

Repurpose sampling_period for unit scaling, as in
61a3376ac71e26aeefe0e9eb9d6507bf14342728

This commit addresses issue #20.
---
 postprocessor/routines/boxplot.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/postprocessor/routines/boxplot.py b/postprocessor/routines/boxplot.py
index 3a3523b..a9855e9 100644
--- a/postprocessor/routines/boxplot.py
+++ b/postprocessor/routines/boxplot.py
@@ -14,13 +14,13 @@ class _BoxplotPlotter(BasePlotter):
         self,
         trace_df,
         trace_name,
-        sampling_period,
+        unit_scaling,
         box_color,
         xtick_step,
         xlabel,
         plot_title,
     ):
-        super().__init__(trace_name, sampling_period, xlabel, plot_title)
+        super().__init__(trace_name, unit_scaling, xlabel, plot_title)
         # Define attributes from arguments
         self.trace_df = trace_df
         self.box_color = box_color
@@ -31,9 +31,11 @@ class _BoxplotPlotter(BasePlotter):
 
         # Define horizontal axis ticks and labels
         # hacky! -- redefine column names
-        trace_df.columns = trace_df.columns * self.sampling_period
+        trace_df.columns = trace_df.columns * self.unit_scaling
         self.fmt = ticker.FuncFormatter(
-            lambda x, pos: "{0:g}".format(x / (self.xtick_step / self.sampling_period))
+            lambda x, pos: "{0:g}".format(
+                x / (self.xtick_step / self.unit_scaling)
+            )
         )
 
     def plot(self, ax):
@@ -47,14 +49,14 @@ class _BoxplotPlotter(BasePlotter):
             ax=ax,
         )
         ax.xaxis.set_major_locator(
-            ticker.MultipleLocator(self.xtick_step / self.sampling_period)
+            ticker.MultipleLocator(self.xtick_step / self.unit_scaling)
         )
 
 
 def boxplot(
     trace_df,
     trace_name,
-    sampling_period=5,
+    unit_scaling=1,
     box_color="b",
     xtick_step=60,
     xlabel="Time (min)",
@@ -64,7 +66,7 @@ def boxplot(
     plotter = _BoxplotPlotter(
         trace_df,
         trace_name,
-        sampling_period,
+        unit_scaling,
         box_color,
         xtick_step,
         xlabel,
-- 
GitLab


From f43700ec2017467a34dd40990b065648a275bec2 Mon Sep 17 00:00:00 2001
From: Arin Wongprommoon <arin.wongprommoon@ed.ac.uk>
Date: Fri, 17 Jun 2022 15:48:38 +0100
Subject: [PATCH 4/5] [routines/boxplot] Add docstring for plotting function

---
 postprocessor/routines/boxplot.py | 35 +++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/postprocessor/routines/boxplot.py b/postprocessor/routines/boxplot.py
index a9855e9..ea5e9ba 100644
--- a/postprocessor/routines/boxplot.py
+++ b/postprocessor/routines/boxplot.py
@@ -63,6 +63,41 @@ def boxplot(
     plot_title="",
     ax=None,
 ):
+    """Draw series of boxplots from an array of time series of traces
+
+    Draw series of boxplots from an array of time series of traces, showing the
+    distribution of values at each time point over time.
+
+    Parameters
+    ----------
+    trace_df : pandas.DataFrame
+        Time series of traces (rows = cells, columns = time points).
+    trace_name : string
+        Name of trace being plotted, e.g. 'flavin'.
+    unit_scaling : int or float
+        Unit scaling factor, e.g. 1/60 to convert minutes to hours.
+    box_color : string
+        matplolib colour string, specifies colour of boxes in boxplot
+    xtick_step : int or float
+        Interval length, in unit time, to draw x axis ticks.
+    xlabel : string
+        x axis label.
+    plot_title : string
+        Plot title.
+    ax : matplotlib Axes
+        Axes in which to draw the plot, otherwise use the currently active Axes.
+
+    Returns
+    -------
+    ax : matplotlib Axes
+        Axes object with the heatmap.
+
+    Examples
+    --------
+    FIXME: Add docs.
+
+    """
+
     plotter = _BoxplotPlotter(
         trace_df,
         trace_name,
-- 
GitLab


From f5cc6c3e4fa1096dec9548beb5d98dd5f049f6d4 Mon Sep 17 00:00:00 2001
From: Arin Wongprommoon <arin.wongprommoon@ed.ac.uk>
Date: Fri, 17 Jun 2022 15:52:45 +0100
Subject: [PATCH 5/5] [routines/heatamp] Incorporates unit scaling into x-axis
 ticks

Heatmap added x-ticks based on the time points, not absolute time.

When a DataFrame is passed into matplotlib.imshow, the Axes no longer
cares about the column names.  We want the plot to take into account the
sampling period AND the unit scaling if the user specifies one.

I copied over the method I implemented for boxplot.py -- it essentially
'tricks' matplotlib by redefining the labels rather than change the
time-axis values (see
https://stackoverflow.com/questions/10171618/changing-plot-scale-by-a-factor-in-matplotlib).
matplotlib's xscale does not support simple linear re-scaling; in any
case, having time-axis values does not make sense for imshow.

This commit addresses issue #20.
---
 postprocessor/routines/heatmap.py | 32 +++++++++++++++++++------------
 1 file changed, 20 insertions(+), 12 deletions(-)

diff --git a/postprocessor/routines/heatmap.py b/postprocessor/routines/heatmap.py
index bacf514..d2a8927 100644
--- a/postprocessor/routines/heatmap.py
+++ b/postprocessor/routines/heatmap.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 import matplotlib.pyplot as plt
-from matplotlib import cm, colors
+from matplotlib import cm, colors, ticker
 
 from postprocessor.core.processes.standardscaler import standardscaler
 from postprocessor.routines.plottingabc import BasePlotter
@@ -17,14 +17,14 @@ class _HeatmapPlotter(BasePlotter):
         trace_name,
         births_df,
         cmap,
-        sampling_period,
+        unit_scaling,
         xtick_step,
         scale,
         robust,
         xlabel,
         plot_title,
     ):
-        super().__init__(trace_name, sampling_period, xlabel, plot_title)
+        super().__init__(trace_name, unit_scaling, xlabel, plot_title)
         # Define attributes from arguments
         self.trace_df = trace_df
         self.births_df = births_df
@@ -59,12 +59,17 @@ class _HeatmapPlotter(BasePlotter):
             self.vmin = None
             self.vmax = None
 
+        # Define horizontal axis ticks and labels
+        # hacky! -- redefine column names
+        trace_df.columns = trace_df.columns * self.unit_scaling
+        self.fmt = ticker.FuncFormatter(
+            lambda x, pos: "{0:g}".format(x * self.unit_scaling)
+        )
+
     def plot(self, ax, cax):
         """Draw the heatmap on the provided Axes."""
         super().plot(ax)
-        # Horizontal axis labels as multiples of xtick_step
-        ax.xaxis.set_major_locator(plt.MultipleLocator(self.xtick_step))
-
+        ax.xaxis.set_major_formatter(self.fmt)
         # Draw trace heatmap
         trace_heatmap = ax.imshow(
             self.trace_scaled,
@@ -73,7 +78,11 @@ class _HeatmapPlotter(BasePlotter):
             vmin=self.vmin,
             vmax=self.vmax,
         )
-
+        # Horizontal axis labels as multiples of xtick_step, taking
+        # into account unit scaling
+        ax.xaxis.set_major_locator(
+            ticker.MultipleLocator(self.xtick_step / self.unit_scaling)
+        )
         # Overlay births, if present
         if self.births_df is not None:
             # Must be masked array for transparency
@@ -86,7 +95,6 @@ class _HeatmapPlotter(BasePlotter):
                 births_heatmap_mask,
                 interpolation="none",
             )
-
         # Draw colour bar
         colorbar = ax.figure.colorbar(
             mappable=trace_heatmap, cax=cax, ax=ax, label=self.colorbarlabel
@@ -98,7 +106,7 @@ def heatmap(
     trace_name,
     births_df=None,
     cmap=cm.RdBu,
-    sampling_period=5,
+    unit_scaling=1,
     xtick_step=60,
     scale=True,
     robust=True,
@@ -120,8 +128,8 @@ def heatmap(
         0 or 1.
     cmap : matplotlib ColorMap
         Colour map for heatmap.
-    sampling_period : int or float
-        Sampling period, in unit time.
+    unit_scaling : int or float
+        Unit scaling factor, e.g. 1/60 to convert minutes to hours.
     xtick_step : int or float
         Interval length, in unit time, to draw x axis ticks.
     scale : bool
@@ -154,7 +162,7 @@ def heatmap(
         trace_name,
         births_df,
         cmap,
-        sampling_period,
+        unit_scaling,
         xtick_step,
         scale,
         robust,
-- 
GitLab