From 61a3376ac71e26aeefe0e9eb9d6507bf14342728 Mon Sep 17 00:00:00 2001 From: Arin Wongprommoon <arin.wongprommoon@ed.ac.uk> Date: Fri, 17 Jun 2022 10:33:27 +0100 Subject: [PATCH 1/5] [routines] Unit scaling argument Specifying a sampling period via the sampling_period argument duplicated multiplying the horizontal axes by a scaling factor. This is because recent changes to postprocessor (see last paragraph of commit) makes the columns of DataFrames show absolute time units (e.g. [0, 5, 10, 15...] if the images were taken every 5 minutes). Previously, columns showed time points (e.g. [0, 1, 2, 3...]). All routines relied on column labels to define the horizontal axis. This issue would have been addressed more timely if the commit in question had a more informative description; 'add tiniterval property' tells me nothing and does not inform me that it affects the column labels. These recent changes mean that the sampling_period argument is no longer necessary. However, instead of deleting this argument, I've decided to repurpose it for unit scaling, e.g. from minutes to hours. Operationally, nothing has changed, but the meaning of the argument has changed, and I've updated the docstrings accordingly. This commit may affect horizontal axes of plots affected; users should inspect the axes carefully, especially if the source data relies on postprocessor.grouper. This bug was likely caused by 119a8a1288950c7b3026aba604bf0908fe011239 on 2022-06-16 14:21. It is unclear from the commit message, but I suspect that this commit attempted to incorporate the image sampling interval into defining the DataFrame columns produced by postprocessor. This commit addresses issue #20. --- postprocessor/routines/mean_plot.py | 14 +++++++------- postprocessor/routines/median_plot.py | 14 +++++++------- postprocessor/routines/plottingabc.py | 4 ++-- postprocessor/routines/single_birth_plot.py | 14 +++++++------- postprocessor/routines/single_plot.py | 14 +++++++------- 5 files changed, 30 insertions(+), 30 deletions(-) diff --git a/postprocessor/routines/mean_plot.py b/postprocessor/routines/mean_plot.py index a3af66d..e259448 100644 --- a/postprocessor/routines/mean_plot.py +++ b/postprocessor/routines/mean_plot.py @@ -13,7 +13,7 @@ class _MeanPlotter(BasePlotter): self, trace_df, trace_name, - sampling_period, + unit_scaling, label, mean_color, error_color, @@ -22,7 +22,7 @@ class _MeanPlotter(BasePlotter): ylabel, plot_title, ): - super().__init__(trace_name, sampling_period, xlabel, plot_title) + super().__init__(trace_name, unit_scaling, xlabel, plot_title) # Define attributes from arguments self.trace_df = trace_df self.label = label @@ -35,7 +35,7 @@ class _MeanPlotter(BasePlotter): self.ylabel = ylabel # Mean and standard error - self.trace_time = np.array(self.trace_df.columns) * self.sampling_period + self.trace_time = np.array(self.trace_df.columns) * self.unit_scaling self.mean_ts = self.trace_df.mean(axis=0) self.stderr = self.trace_df.std(axis=0) / np.sqrt(len(self.trace_df)) @@ -64,7 +64,7 @@ class _MeanPlotter(BasePlotter): def mean_plot( trace_df, trace_name="flavin", - sampling_period=5, + unit_scaling=1, label="wild type", mean_color="b", error_color="lightblue", @@ -82,8 +82,8 @@ def mean_plot( Time series of traces (rows = cells, columns = time points). trace_name : string Name of trace being plotted, e.g. 'flavin'. - sampling_period : int or float - Sampling period, in unit time. + unit_scaling : int or float + Unit scaling factor, e.g. 1/60 to convert minutes to hours. label : string Name of group being plotted, e.g. a strain name. mean_color : string @@ -109,7 +109,7 @@ def mean_plot( plotter = _MeanPlotter( trace_df, trace_name, - sampling_period, + unit_scaling, label, mean_color, error_color, diff --git a/postprocessor/routines/median_plot.py b/postprocessor/routines/median_plot.py index 4b3205c..573263b 100644 --- a/postprocessor/routines/median_plot.py +++ b/postprocessor/routines/median_plot.py @@ -13,7 +13,7 @@ class _MedianPlotter(BasePlotter): self, trace_df, trace_name, - sampling_period, + unit_scaling, label, median_color, error_color, @@ -22,7 +22,7 @@ class _MedianPlotter(BasePlotter): ylabel, plot_title, ): - super().__init__(trace_name, sampling_period, xlabel, plot_title) + super().__init__(trace_name, unit_scaling, xlabel, plot_title) # Define attributes from arguments self.trace_df = trace_df self.label = label @@ -35,7 +35,7 @@ class _MedianPlotter(BasePlotter): self.ylabel = ylabel # Median and interquartile range - self.trace_time = np.array(self.trace_df.columns) * self.sampling_period + self.trace_time = np.array(self.trace_df.columns) * self.unit_scaling self.median_ts = self.trace_df.median(axis=0) self.quartile1_ts = self.trace_df.quantile(0.25) self.quartile3_ts = self.trace_df.quantile(0.75) @@ -65,7 +65,7 @@ class _MedianPlotter(BasePlotter): def median_plot( trace_df, trace_name="flavin", - sampling_period=5, + unit_scaling=1, label="wild type", median_color="b", error_color="lightblue", @@ -83,8 +83,8 @@ def median_plot( Time series of traces (rows = cells, columns = time points). trace_name : string Name of trace being plotted, e.g. 'flavin'. - sampling_period : int or float - Sampling period, in unit time. + unit_scaling : int or float + Unit scaling factor, e.g. 1/60 to convert minutes to hours. label : string Name of group being plotted, e.g. a strain name. median_color : string @@ -110,7 +110,7 @@ def median_plot( plotter = _MedianPlotter( trace_df, trace_name, - sampling_period, + unit_scaling, label, median_color, error_color, diff --git a/postprocessor/routines/plottingabc.py b/postprocessor/routines/plottingabc.py index 1990df9..97b89aa 100644 --- a/postprocessor/routines/plottingabc.py +++ b/postprocessor/routines/plottingabc.py @@ -6,10 +6,10 @@ from abc import ABC class BasePlotter(ABC): """Base class for plotting handler classes""" - def __init__(self, trace_name, sampling_period, xlabel, plot_title): + def __init__(self, trace_name, unit_scaling, xlabel, plot_title): """Common attributes""" self.trace_name = trace_name - self.sampling_period = sampling_period + self.unit_scaling = unit_scaling self.xlabel = xlabel self.ylabel = None diff --git a/postprocessor/routines/single_birth_plot.py b/postprocessor/routines/single_birth_plot.py index 6d1a405..671cfec 100644 --- a/postprocessor/routines/single_birth_plot.py +++ b/postprocessor/routines/single_birth_plot.py @@ -14,7 +14,7 @@ class _SingleBirthPlotter(_SinglePlotter): trace_values, trace_name, birth_mask, - sampling_period, + unit_scaling, trace_color, birth_color, trace_linestyle, @@ -27,7 +27,7 @@ class _SingleBirthPlotter(_SinglePlotter): trace_timepoints, trace_values, trace_name, - sampling_period, + unit_scaling, trace_color, trace_linestyle, xlabel, @@ -40,7 +40,7 @@ class _SingleBirthPlotter(_SinglePlotter): def plot(self, ax): """Draw the line plots on the provided Axes.""" - trace_time = self.trace_timepoints * self.sampling_period + trace_time = self.trace_timepoints * self.unit_scaling super().plot(ax) birth_mask_bool = self.birth_mask.astype(bool) for occurence, birth_time in enumerate(trace_time[birth_mask_bool]): @@ -62,7 +62,7 @@ def single_birth_plot( trace_values, trace_name="flavin", birth_mask=None, - sampling_period=5, + unit_scaling=1, trace_color="b", birth_color="k", trace_linestyle="-", @@ -84,8 +84,8 @@ def single_birth_plot( birth_mask : array_like Mask to indicate where births are. Expect values of '0' and '1' or 'False' and 'True' in the elements. - sampling_period : int or float - Sampling period, in unit time. + unit_scaling : int or float + Unit scaling factor, e.g. 1/60 to convert minutes to hours. trace_color : string matplotlib colour string for the trace birth_color : string @@ -116,7 +116,7 @@ def single_birth_plot( trace_values, trace_name, birth_mask, - sampling_period, + unit_scaling, trace_color, birth_color, trace_linestyle, diff --git a/postprocessor/routines/single_plot.py b/postprocessor/routines/single_plot.py index 11ee940..68e7d76 100644 --- a/postprocessor/routines/single_plot.py +++ b/postprocessor/routines/single_plot.py @@ -13,13 +13,13 @@ class _SinglePlotter(BasePlotter): trace_timepoints, trace_values, trace_name, - sampling_period, + unit_scaling, trace_color, trace_linestyle, xlabel, plot_title, ): - super().__init__(trace_name, sampling_period, xlabel, plot_title) + super().__init__(trace_name, unit_scaling, xlabel, plot_title) # Define attributes from arguments self.trace_timepoints = trace_timepoints self.trace_values = trace_values @@ -33,7 +33,7 @@ class _SinglePlotter(BasePlotter): """Draw the line plot on the provided Axes.""" super().plot(ax) ax.plot( - self.trace_timepoints * self.sampling_period, + self.trace_timepoints * self.unit_scaling, self.trace_values, color=self.trace_color, linestyle=self.trace_linestyle, @@ -45,7 +45,7 @@ def single_plot( trace_timepoints, trace_values, trace_name="flavin", - sampling_period=5, + unit_scaling=1, trace_color="b", trace_linestyle="-", xlabel="Time (min)", @@ -62,8 +62,8 @@ def single_plot( Trace to plot. trace_name : string Name of trace being plotted, e.g. 'flavin'. - sampling_period : int or float - Sampling period, in unit time. + unit_scaling : int or float + Unit scaling factor, e.g. 1/60 to convert minutes to hours. trace_color : string matplotlib colour string, specifies colour of line plot. trace_linestyle : string @@ -89,7 +89,7 @@ def single_plot( trace_timepoints, trace_values, trace_name, - sampling_period, + unit_scaling, trace_color, trace_linestyle, xlabel, -- GitLab From a88117d59d581ddcb626d360cc263efd59f1b16e Mon Sep 17 00:00:00 2001 From: Arin Wongprommoon <arin.wongprommoon@ed.ac.uk> Date: Fri, 17 Jun 2022 15:34:35 +0100 Subject: [PATCH 2/5] [routines/histogram] Remove unit_scaling as it is not used --- postprocessor/routines/histogram.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/postprocessor/routines/histogram.py b/postprocessor/routines/histogram.py index 4533d66..6d39574 100644 --- a/postprocessor/routines/histogram.py +++ b/postprocessor/routines/histogram.py @@ -12,7 +12,6 @@ class _HistogramPlotter: values, label, color, - sampling_period, binsize, lognormal, lognormal_base, @@ -24,7 +23,6 @@ class _HistogramPlotter: self.values = values self.label = label self.color = color - self.sampling_period = sampling_period self.binsize = binsize self.lognormal = lognormal self.lognormal_base = lognormal_base @@ -39,7 +37,9 @@ class _HistogramPlotter: if self.lognormal: self.bins = np.logspace( 0, - np.ceil(np.log(np.nanmax(values)) / np.log(self.lognormal_base)), + np.ceil( + np.log(np.nanmax(values)) / np.log(self.lognormal_base) + ), base=self.lognormal_base, ) # number of bins will be 50 by default, as it's the default in np.logspace else: @@ -77,7 +77,6 @@ def histogram( values, label, color="b", - sampling_period=5, binsize=5, lognormal=False, lognormal_base=10, @@ -96,8 +95,6 @@ def histogram( Name of value being plotting, e.g. cell division cycle length. color : string Colour of bars. - sampling_period : float - Sampling period, in unit time. binsize : float Bin size. lognormal : bool @@ -127,7 +124,6 @@ def histogram( values, label, color, - sampling_period, binsize, lognormal, lognormal_base, -- GitLab From dd1d6eabfcc035526cb5106babf25e724d1c72a5 Mon Sep 17 00:00:00 2001 From: Arin Wongprommoon <arin.wongprommoon@ed.ac.uk> Date: Fri, 17 Jun 2022 15:41:53 +0100 Subject: [PATCH 3/5] [routine/boxplot] Unit scaling argument Repurpose sampling_period for unit scaling, as in 61a3376ac71e26aeefe0e9eb9d6507bf14342728 This commit addresses issue #20. --- postprocessor/routines/boxplot.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/postprocessor/routines/boxplot.py b/postprocessor/routines/boxplot.py index 3a3523b..a9855e9 100644 --- a/postprocessor/routines/boxplot.py +++ b/postprocessor/routines/boxplot.py @@ -14,13 +14,13 @@ class _BoxplotPlotter(BasePlotter): self, trace_df, trace_name, - sampling_period, + unit_scaling, box_color, xtick_step, xlabel, plot_title, ): - super().__init__(trace_name, sampling_period, xlabel, plot_title) + super().__init__(trace_name, unit_scaling, xlabel, plot_title) # Define attributes from arguments self.trace_df = trace_df self.box_color = box_color @@ -31,9 +31,11 @@ class _BoxplotPlotter(BasePlotter): # Define horizontal axis ticks and labels # hacky! -- redefine column names - trace_df.columns = trace_df.columns * self.sampling_period + trace_df.columns = trace_df.columns * self.unit_scaling self.fmt = ticker.FuncFormatter( - lambda x, pos: "{0:g}".format(x / (self.xtick_step / self.sampling_period)) + lambda x, pos: "{0:g}".format( + x / (self.xtick_step / self.unit_scaling) + ) ) def plot(self, ax): @@ -47,14 +49,14 @@ class _BoxplotPlotter(BasePlotter): ax=ax, ) ax.xaxis.set_major_locator( - ticker.MultipleLocator(self.xtick_step / self.sampling_period) + ticker.MultipleLocator(self.xtick_step / self.unit_scaling) ) def boxplot( trace_df, trace_name, - sampling_period=5, + unit_scaling=1, box_color="b", xtick_step=60, xlabel="Time (min)", @@ -64,7 +66,7 @@ def boxplot( plotter = _BoxplotPlotter( trace_df, trace_name, - sampling_period, + unit_scaling, box_color, xtick_step, xlabel, -- GitLab From f43700ec2017467a34dd40990b065648a275bec2 Mon Sep 17 00:00:00 2001 From: Arin Wongprommoon <arin.wongprommoon@ed.ac.uk> Date: Fri, 17 Jun 2022 15:48:38 +0100 Subject: [PATCH 4/5] [routines/boxplot] Add docstring for plotting function --- postprocessor/routines/boxplot.py | 35 +++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/postprocessor/routines/boxplot.py b/postprocessor/routines/boxplot.py index a9855e9..ea5e9ba 100644 --- a/postprocessor/routines/boxplot.py +++ b/postprocessor/routines/boxplot.py @@ -63,6 +63,41 @@ def boxplot( plot_title="", ax=None, ): + """Draw series of boxplots from an array of time series of traces + + Draw series of boxplots from an array of time series of traces, showing the + distribution of values at each time point over time. + + Parameters + ---------- + trace_df : pandas.DataFrame + Time series of traces (rows = cells, columns = time points). + trace_name : string + Name of trace being plotted, e.g. 'flavin'. + unit_scaling : int or float + Unit scaling factor, e.g. 1/60 to convert minutes to hours. + box_color : string + matplolib colour string, specifies colour of boxes in boxplot + xtick_step : int or float + Interval length, in unit time, to draw x axis ticks. + xlabel : string + x axis label. + plot_title : string + Plot title. + ax : matplotlib Axes + Axes in which to draw the plot, otherwise use the currently active Axes. + + Returns + ------- + ax : matplotlib Axes + Axes object with the heatmap. + + Examples + -------- + FIXME: Add docs. + + """ + plotter = _BoxplotPlotter( trace_df, trace_name, -- GitLab From f5cc6c3e4fa1096dec9548beb5d98dd5f049f6d4 Mon Sep 17 00:00:00 2001 From: Arin Wongprommoon <arin.wongprommoon@ed.ac.uk> Date: Fri, 17 Jun 2022 15:52:45 +0100 Subject: [PATCH 5/5] [routines/heatamp] Incorporates unit scaling into x-axis ticks Heatmap added x-ticks based on the time points, not absolute time. When a DataFrame is passed into matplotlib.imshow, the Axes no longer cares about the column names. We want the plot to take into account the sampling period AND the unit scaling if the user specifies one. I copied over the method I implemented for boxplot.py -- it essentially 'tricks' matplotlib by redefining the labels rather than change the time-axis values (see https://stackoverflow.com/questions/10171618/changing-plot-scale-by-a-factor-in-matplotlib). matplotlib's xscale does not support simple linear re-scaling; in any case, having time-axis values does not make sense for imshow. This commit addresses issue #20. --- postprocessor/routines/heatmap.py | 32 +++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/postprocessor/routines/heatmap.py b/postprocessor/routines/heatmap.py index bacf514..d2a8927 100644 --- a/postprocessor/routines/heatmap.py +++ b/postprocessor/routines/heatmap.py @@ -2,7 +2,7 @@ import numpy as np import matplotlib.pyplot as plt -from matplotlib import cm, colors +from matplotlib import cm, colors, ticker from postprocessor.core.processes.standardscaler import standardscaler from postprocessor.routines.plottingabc import BasePlotter @@ -17,14 +17,14 @@ class _HeatmapPlotter(BasePlotter): trace_name, births_df, cmap, - sampling_period, + unit_scaling, xtick_step, scale, robust, xlabel, plot_title, ): - super().__init__(trace_name, sampling_period, xlabel, plot_title) + super().__init__(trace_name, unit_scaling, xlabel, plot_title) # Define attributes from arguments self.trace_df = trace_df self.births_df = births_df @@ -59,12 +59,17 @@ class _HeatmapPlotter(BasePlotter): self.vmin = None self.vmax = None + # Define horizontal axis ticks and labels + # hacky! -- redefine column names + trace_df.columns = trace_df.columns * self.unit_scaling + self.fmt = ticker.FuncFormatter( + lambda x, pos: "{0:g}".format(x * self.unit_scaling) + ) + def plot(self, ax, cax): """Draw the heatmap on the provided Axes.""" super().plot(ax) - # Horizontal axis labels as multiples of xtick_step - ax.xaxis.set_major_locator(plt.MultipleLocator(self.xtick_step)) - + ax.xaxis.set_major_formatter(self.fmt) # Draw trace heatmap trace_heatmap = ax.imshow( self.trace_scaled, @@ -73,7 +78,11 @@ class _HeatmapPlotter(BasePlotter): vmin=self.vmin, vmax=self.vmax, ) - + # Horizontal axis labels as multiples of xtick_step, taking + # into account unit scaling + ax.xaxis.set_major_locator( + ticker.MultipleLocator(self.xtick_step / self.unit_scaling) + ) # Overlay births, if present if self.births_df is not None: # Must be masked array for transparency @@ -86,7 +95,6 @@ class _HeatmapPlotter(BasePlotter): births_heatmap_mask, interpolation="none", ) - # Draw colour bar colorbar = ax.figure.colorbar( mappable=trace_heatmap, cax=cax, ax=ax, label=self.colorbarlabel @@ -98,7 +106,7 @@ def heatmap( trace_name, births_df=None, cmap=cm.RdBu, - sampling_period=5, + unit_scaling=1, xtick_step=60, scale=True, robust=True, @@ -120,8 +128,8 @@ def heatmap( 0 or 1. cmap : matplotlib ColorMap Colour map for heatmap. - sampling_period : int or float - Sampling period, in unit time. + unit_scaling : int or float + Unit scaling factor, e.g. 1/60 to convert minutes to hours. xtick_step : int or float Interval length, in unit time, to draw x axis ticks. scale : bool @@ -154,7 +162,7 @@ def heatmap( trace_name, births_df, cmap, - sampling_period, + unit_scaling, xtick_step, scale, robust, -- GitLab