diff --git a/limbo/tsa_inprogress.py b/limbo/tsa_inprogress.py index 2a844f9c5c56884377027028e839ba6d839f75ab..280026c2624fe0c2b6c36f5dac3ce6a306d747d4 100644 --- a/limbo/tsa_inprogress.py +++ b/limbo/tsa_inprogress.py @@ -1,7 +1,5 @@ import numpy as np import pandas as pd -from collections import namedtuple - ### @@ -25,59 +23,6 @@ def todf(y, exampledf): ### -def lbootstrap(data, statistic, noresamples=100, *args, **kwargs): - """ - Uses statistical bootstrapping to estimate errors in a statistic. - - Parameters - ---------- - data: array - A design matrix of data, with features as columns and replicates - as rows - statistic: function returning a float - Returns the desired statistic - noresamples: integer - The number of bootstrapped samples generated (by resampling with - replacement) - args, kwargs: - Any additional arguments required by the statistic function - - Returns - ------- - res: a named tuple with fields - median: float - The median of the statistic calculated from the resampled data sets - mean: float - The mean of the statistic - std: float - The standard deviation of the statistic - cf: list of two floats - The 95% confidence interval - iqr: float - The interquartile range - """ - if data.ndim == 1: - data = data.reshape(data.size, -1) - nosamples = data.shape[0] - rsams = np.random.choice(nosamples, size=(nosamples, noresamples)) - stats = [ - statistic(data[rsams[:, i], :], *args, **kwargs) - for i in np.arange(noresamples) - ] - Res = namedtuple("Res", ["cf", "std", "iqr", "median", "mean", "orig"]) - return Res( - [np.nanquantile(stats, 0.025), np.nanquantile(stats, 0.975)], - np.nanstd(stats), - np.nanquantile(stats, 0.75) - np.nanquantile(stats, 0.25), - np.nanmedian(stats), - np.nanmean(stats), - statistic(data), - ) - - -### - - def entropy_time_series(data): """ Estimates the noise in a collection of time series by calculating diff --git a/src/wela/dataloader.py b/src/wela/dataloader.py index 85a6679c4ad1d3e0a64ff42cf4f31f65e301540b..9ef2ff06a799be1d845d1f05f86e4490b2dd8dc3 100644 --- a/src/wela/dataloader.py +++ b/src/wela/dataloader.py @@ -103,6 +103,7 @@ class dataloader: "extraction/Flavin_bgsub/max/mean": "flavin", "postprocessing/buddings/extraction_general_None_volume": "buddings", "postprocessing/bud_metric/extraction_general_None_volume": "bud_volume", + "postprocessing/bud_metric/extraction_general_None_area": "bud_area", } for fl in ["GFP", "mCherry"]: g2a_dict[f"extraction/{fl}/max/median"] = f"median_{fl}" @@ -177,6 +178,7 @@ class dataloader: self.g2a_dict = {**self.g2a_dict, **extra_g2a_dict} elif extra_g2a_dict: self.g2a_dict = extra_g2a_dict + # include compulsory signals for key, value in zip( [ "postprocessing/buddings/extraction_general_None_volume", @@ -199,13 +201,13 @@ class dataloader: [ f"/extraction/{channel}/max/median", f"/extraction/{channel}/max/mean", - f"/extraction/{channel}/max/total", - f"/extraction/{channel}/max/total_squared", ] for channel in channels ] for signal in two_signal ] + # add bud area + signals.append("/extraction/general/None/area") # add bud fluorescence to h5 files add_bud_fluorescence(self.h5dirpath / dataname, signals) diff --git a/src/wela/misc.py b/src/wela/misc.py new file mode 100644 index 0000000000000000000000000000000000000000..4a16e992e4d4dc822098257ba6f9adf59b88bb93 --- /dev/null +++ b/src/wela/misc.py @@ -0,0 +1,55 @@ +"""Miscellaneous routines.""" + +from collections import namedtuple + +import numpy as np + + +def lbootstrap(data, statistic, noresamples=100, *args, **kwargs): + """ + Use statistical bootstrapping to estimate errors in a statistic. + + Parameters + ---------- + data: array + A design matrix of data, with features as columns and replicates + as rows + statistic: function returning a float + Returns the desired statistic + noresamples: integer + The number of bootstrapped samples generated (by resampling with + replacement) + args, kwargs: + Any additional arguments required by the statistic function + + Returns + ------- + res: a named tuple with fields + median: float + The median of the statistic calculated from the resampled data sets + mean: float + The mean of the statistic + std: float + The standard deviation of the statistic + cf: list of two floats + The 95% confidence interval + iqr: float + The interquartile range + """ + if data.ndim == 1: + data = data.reshape(data.size, -1) + nosamples = data.shape[0] + rsams = np.random.choice(nosamples, size=(nosamples, noresamples)) + stats = [ + statistic(data[rsams[:, i], :], *args, **kwargs) + for i in np.arange(noresamples) + ] + Res = namedtuple("Res", ["cf", "std", "iqr", "median", "mean", "orig"]) + return Res( + [np.nanquantile(stats, 0.025), np.nanquantile(stats, 0.975)], + np.nanstd(stats), + np.nanquantile(stats, 0.75) - np.nanquantile(stats, 0.25), + np.nanmedian(stats), + np.nanmean(stats), + statistic(data), + )