Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
B
Baby
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Swain Lab
Baby
Merge requests
!1
fix requirements and add fig1 example
Code
Review changes
Check out branch
Download
Patches
Plain diff
Merged
fix requirements and add fig1 example
updates
into
master
Overview
0
Commits
2
Changes
5
Merged
jpietsch
requested to merge
updates
into
master
2 years ago
Overview
0
Commits
2
Changes
5
Expand
0
0
Merge request reports
Compare
master
version 1
77dcdebd
2 years ago
master (base)
and
latest version
latest version
de8322ce
2 commits,
2 years ago
version 1
77dcdebd
1 commit,
2 years ago
5 files
+
1030
−
49
Inline
Compare changes
Side-by-side
Inline
Show whitespace changes
Show one file at a time
Files
5
Search (e.g. *.vue) (Ctrl+P)
python/baby/postprocessing.py
0 → 100644
+
319
−
0
Options
# If you publish results that make use of this software or the Birth Annotator
# for Budding Yeast algorithm, please cite:
# Julian M J Pietsch, Alán Muñoz, Diane Adjavon, Ivan B N Clark, Peter S
# Swain, 2021, Birth Annotator for Budding Yeast (in preparation).
#
#
# The MIT License (MIT)
#
# Copyright (c) Julian Pietsch, Alán Muñoz and Diane Adjavon 2021
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
from
warnings
import
warn
import
numpy
as
np
# import gaussianprocessderivatives as gp
import
gaussianprocessderivatives
as
gp
import
h5py
from
tqdm
import
trange
,
tqdm
from
collections
import
namedtuple
def
by_thresh
(
diff
,
gr
,
start
,
thresh
,
max_thresh
,
d_idx
=
None
,
debug
=
False
):
"""
Recursively define a crossing point based on a threshold range.
Requirements for a valid
'
crossing
'
point:
* the difference between mother and daughter growth rate (diff) is less than the threshold
* the daughter growth rate (gr) is greater than the threshold
The range of thresholds from thresh to max_thresh is checked from low to high.
As soon as a point satisfies the requirements above, a value is returend.
If there is no valid point for any of the thresholds within the range, 0 is returned
"""
# In case there are NaN values, we can safely ignore the warning that
# usually comes up
with
np
.
errstate
(
invalid
=
'
ignore
'
):
zero_crossings
=
np
.
where
((
np
.
abs
(
diff
)
<=
thresh
)
&
(
gr
>=
1.0
*
thresh
))[
0
]
if
len
(
zero_crossings
)
>
0
:
if
debug
:
print
(
f
"
{
thresh
}
succeeded:
{
zero_crossings
[
0
]
+
start
}
"
)
return
zero_crossings
[
0
]
+
start
elif
thresh
<
max_thresh
:
if
debug
:
print
(
f
"
{
thresh
}
failed for
{
d_idx
}
, dist is
{
diff
}
"
)
return
by_thresh
(
diff
,
gr
,
start
,
thresh
+
1
,
max_thresh
,
d_idx
,
debug
)
else
:
if
debug
:
print
(
f
"
All thresholds failed
"
)
return
None
def
cytokinesis_from_growth_rate_crossing
(
d_gr
,
m_gr
,
start
,
end
,
d_idx
=
None
,
ofs
=
2
,
lb
=
2
,
ub
=
15
,
debug
=
False
):
"""
Find cytokinesis by growth rate crossing within a range of thresholds.
This method enforces that cytokinesis is not within ofs time points of start or end.
It only uses the mean growth rate values to determine the crossing point.
"""
ostart
=
start
+
ofs
oend
=
end
-
ofs
diff
=
d_gr
[
ostart
:
oend
]
-
m_gr
[
ostart
:
oend
]
cyto
=
by_thresh
(
diff
,
d_gr
[
ostart
:
oend
],
ostart
,
lb
,
ub
,
d_idx
,
debug
)
return
cyto
def
dist
(
x
,
std_x
,
y
,
std_y
):
"""
Distance between two gaussian variables given mean and standard deviation.
"""
return
np
.
sqrt
(
std_x
**
2
+
std_y
**
2
+
(
x
-
y
)
**
2
)
def
cytokinesis_from_growth_rate
(
d_gr
,
d_std
,
m_gr
,
m_std
,
start
,
end
,
d_idx
=
None
,
ofs
=
2
,
lb
=
2
,
ub
=
15
,
debug
=
False
):
"""
Find cytokinesis by growth rate crossing within a range of thresholds, noisy.
This method enforces that cytokinesis is not within ofs time points of start or end.
It uses both the mean and standard deviation of growth rate to determine the crossing point.
"""
ostart
=
start
+
ofs
oend
=
end
-
ofs
diff
=
d_gr
[
ostart
:
oend
]
-
m_gr
[
ostart
:
oend
]
diff
=
dist
(
d_gr
[
ostart
:
oend
],
d_std
[
ostart
:
oend
],
m_gr
[
ostart
:
oend
],
m_std
[
ostart
:
oend
])
cyto
=
by_thresh
(
diff
,
d_gr
[
ostart
:
oend
],
ostart
,
lb
,
ub
,
d_idx
,
debug
)
return
cyto
def
growth_rates_from_volumes
(
times
,
volumes
):
fvals
=
[
'
f
'
,
'
df
'
]
fits
=
{}
for
f
in
fvals
:
fits
[
f
]
=
{}
fits
[
f
][
'
mean
'
]
=
np
.
full
(
volumes
.
shape
,
np
.
nan
)
fits
[
f
][
'
std
'
]
=
np
.
full
(
volumes
.
shape
,
np
.
nan
)
# Use parameter bounds suited to raw and not log volume
hyperpar_bounds
=
{
0
:
(
-
2
,
3
),
1
:
(
-
2
,
0
),
2
:
(
0
,
3
)}
# Loop over all cells and fit Gaussian process
for
i
in
range
(
volumes
.
shape
[
0
]):
iV
=
volumes
[
i
]
i_filt
=
~
np
.
isnan
(
iV
)
if
i_filt
.
sum
()
<
3
:
continue
i_gp
=
gp
.
maternGP
(
hyperpar_bounds
,
times
[
i_filt
],
iV
[
i_filt
])
i_gp
.
findhyperparameters
(
noruns
=
5
)
i_gp
.
predict
(
times
[
i_filt
],
derivs
=
1
)
for
f
in
fvals
:
fits
[
f
][
'
mean
'
][
i
,
i_filt
]
=
getattr
(
i_gp
,
f
)
fits
[
f
][
'
std
'
][
i
,
i_filt
]
=
np
.
sqrt
(
getattr
(
i_gp
,
f
+
'
var
'
))
gr_tuple
=
namedtuple
(
'
gp_out
'
,
'
vol, vol_std, grs, grs_std
'
)
return
gr_tuple
(
*
fits
[
'
f
'
].
values
(),
*
fits
[
'
df
'
].
values
())
def
fit_gr_data
(
filename
,
run_moth
=
True
,
run_dght
=
True
,
split_at_birth
=
False
,
log_volume
=
False
):
data
=
h5py
.
File
(
filename
,
'
a
'
)
if
log_volume
:
hyperpar_bounds
=
{
0
:
(
-
2
,
1
),
1
:
(
-
2
,
2
),
2
:
(
-
4
,
-
2
)}
else
:
hyperpar_bounds
=
{
0
:
(
-
2
,
3
),
1
:
(
-
2
,
0
),
2
:
(
0
,
3
)}
if
run_moth
:
allT
=
data
[
'
M/times
'
][()]
allV
=
data
[
'
M/vol
'
][()]
if
log_volume
:
allV
=
np
.
log
(
allV
)
if
split_at_birth
:
ntps
,
ncells
=
allV
.
shape
assert
ncells
<
2
**
16
-
1
pnum
=
data
[
'
M/posNum
'
][()].
astype
(
int
).
flatten
()
-
1
tnum
=
data
[
'
M/trapNum
'
][()].
astype
(
int
).
flatten
()
-
1
cnum
=
data
[
'
M/cellNum
'
][()].
astype
(
int
).
flatten
()
-
1
mnum
=
data
[
'
M/mothers
'
][()].
flatten
()
dlabs
=
data
[
'
M/daughterLabel
'
][()].
astype
(
int
)
births
=
data
[
'
M/births
'
][()].
astype
(
bool
)
cmap
=
np
.
zeros
((
np
.
max
(
pnum
)
+
1
,
np
.
max
(
tnum
)
+
1
,
np
.
max
(
cnum
)
+
1
),
dtype
=
'
uint16
'
)
cmap
[
pnum
,
tnum
,
cnum
]
=
np
.
arange
(
1
,
ncells
+
1
)
mother_inds
=
np
.
empty
(
ncells
)
mother_inds
[:]
=
np
.
nan
hasm
=
~
np
.
isnan
(
mnum
)
mother_inds
[
hasm
]
=
cmap
[
pnum
[
hasm
],
tnum
[
hasm
],
mnum
[
hasm
].
astype
(
int
)
-
1
]
if
np
.
any
(
mother_inds
[
hasm
]
==
0
):
warn
(
'
Some mothers are missing...
'
)
mother_inds
[
hasm
&
(
mother_inds
==
0
)]
=
np
.
nan
mother_inds
-=
1
# convert to Python indexing
mothers
=
np
.
unique
(
mother_inds
[
~
np
.
isnan
(
mother_inds
)])
fvals
=
[
'
f
'
,
'
df
'
,
'
ddf
'
]
fits
=
{}
for
f
in
fvals
:
fits
[
f
]
=
{}
fits
[
f
][
'
mean
'
]
=
np
.
empty
(
allV
.
shape
)
fits
[
f
][
'
mean
'
][:]
=
np
.
nan
fits
[
f
][
'
std
'
]
=
np
.
empty
(
allV
.
shape
)
fits
[
f
][
'
std
'
][:]
=
np
.
nan
# Loop over all mothers and run smoothing
for
m
in
trange
(
allV
.
shape
[
1
]):
if
split_at_birth
:
# Split time series at each birth event
btps
=
np
.
flatnonzero
(
births
[:,
m
])
daughter_inds
=
cmap
[
pnum
[
m
],
tnum
[
m
],
dlabs
[
btps
,
m
].
astype
(
int
)
-
1
]
if
np
.
any
(
daughter_inds
==
0
):
warn
(
f
'
Some daughters are missing for cell
{
m
+
1
}
'
)
daughter_inds
=
daughter_inds
[
daughter_inds
!=
0
]
daughter_inds
-=
1
assert
set
(
daughter_inds
)
==
set
(
np
.
flatnonzero
(
mother_inds
==
m
))
# Where birth is uncertain, the daughter may appear and
# disappear over its first few time points, with the mother
# dividing and merging in complement. Take care to add all
# mother-like tps to time series of undivided cell, and keep
# only daughter-like tps in time series of divided cell
flicker_mask
=
np
.
zeros
(
ntps
,
dtype
=
'
bool
'
)
for
btp
,
nbtp
,
d
in
zip
(
btps
,
np
.
hstack
((
btps
[
1
:],
ntps
)),
daughter_inds
):
ltp
=
max
(
nbtp
,
np
.
argmax
(
births
[:,
d
]))
flicker_mask
[
btp
:
ltp
]
=
np
.
isnan
(
allV
[
btp
:
ltp
,
d
])
mVs
=
[]
btps
=
np
.
hstack
((
0
,
btps
,
ntps
))
for
s
,
b
,
e
in
zip
(
btps
[:
-
2
],
btps
[
1
:
-
1
],
btps
[
2
:]):
mV
=
allV
[:,
m
].
copy
()
mV
[:
s
]
=
np
.
nan
mV
[
e
:]
=
np
.
nan
mV
[
s
:
b
][
flicker_mask
[
s
:
b
]]
=
np
.
nan
mV
[
b
:
e
][
~
flicker_mask
[
b
:
e
]]
=
np
.
nan
mVs
.
append
(
mV
)
mV
=
allV
[:,
m
].
copy
()
mV
[:
btps
[
-
2
]]
=
np
.
nan
mV
[
btps
[
-
2
]:][
flicker_mask
[
btps
[
-
2
]:]]
=
np
.
nan
mVs
.
append
(
mV
)
else
:
# Treat the mother as one time series
mVs
=
[
allV
[:,
m
]]
for
mV
in
mVs
:
mf
=
~
np
.
isnan
(
mV
)
if
mf
.
sum
()
<
3
:
continue
mT
=
allT
[
mf
,
m
]
mV
=
mV
[
mf
]
mg
=
gp
.
maternGP
(
hyperpar_bounds
,
mT
,
mV
)
mg
.
findhyperparameters
(
noruns
=
5
)
mg
.
predict
(
mT
,
derivs
=
2
)
for
f
in
fvals
:
fits
[
f
][
'
mean
'
][
mf
,
m
]
=
getattr
(
mg
,
f
)
fits
[
f
][
'
std
'
][
mf
,
m
]
=
np
.
sqrt
(
getattr
(
mg
,
f
+
'
var
'
))
# Save predictions
dsprops
=
dict
(
shape
=
allV
.
shape
,
dtype
=
allV
.
dtype
,
chunks
=
True
,
compression
=
'
gzip
'
,
fillvalue
=
np
.
nan
)
data
[
'
M
'
].
require_dataset
(
'
svol
'
,
**
dsprops
)
data
[
'
M/svol
'
][()]
=
fits
[
'
f
'
][
'
mean
'
]
data
[
'
M
'
].
require_dataset
(
'
svol_std
'
,
**
dsprops
)
data
[
'
M/svol_std
'
][()]
=
fits
[
'
f
'
][
'
std
'
]
data
[
'
M
'
].
require_dataset
(
'
grs
'
,
**
dsprops
)
data
[
'
M/grs
'
][()]
=
fits
[
'
df
'
][
'
mean
'
]
data
[
'
M
'
].
require_dataset
(
'
grs_std
'
,
**
dsprops
)
data
[
'
M/grs_std
'
][()]
=
fits
[
'
df
'
][
'
std
'
]
data
[
'
M
'
].
require_dataset
(
'
d2v
'
,
**
dsprops
)
data
[
'
M/d2v
'
][()]
=
fits
[
'
ddf
'
][
'
mean
'
]
data
[
'
M
'
].
require_dataset
(
'
d2v_std
'
,
**
dsprops
)
data
[
'
M/d2v_std
'
][()]
=
fits
[
'
ddf
'
][
'
std
'
]
if
run_dght
:
fvals
=
[
'
f
'
,
'
df
'
,
'
ddf
'
]
for
g
in
tqdm
(
data
[
'
D
'
]):
grp
=
data
[
'
D
'
][
g
]
allT
=
grp
[
'
times
'
]
allV
=
grp
[
'
vol
'
]
if
log_volume
:
allV
=
np
.
log
(
allV
)
fits
=
{}
for
f
in
fvals
:
fits
[
f
]
=
{}
fits
[
f
][
'
mean
'
]
=
np
.
empty
(
allV
.
shape
)
fits
[
f
][
'
mean
'
][:]
=
np
.
nan
fits
[
f
][
'
std
'
]
=
np
.
empty
(
allV
.
shape
)
fits
[
f
][
'
std
'
][:]
=
np
.
nan
# Loop over all daughters and run smoothing
for
d
in
range
(
allV
.
shape
[
1
]):
dV
=
allV
[:,
d
]
df
=
~
np
.
isnan
(
dV
)
if
not
df
.
any
():
continue
dT
=
allT
[
df
,
d
]
dV
=
dV
[
df
]
dg
=
gp
.
maternGP
(
hyperpar_bounds
,
dT
,
dV
)
dg
.
findhyperparameters
(
noruns
=
5
)
dg
.
predict
(
dT
,
derivs
=
2
)
for
f
in
fvals
:
fits
[
f
][
'
mean
'
][
df
,
d
]
=
getattr
(
dg
,
f
)
fits
[
f
][
'
std
'
][
df
,
d
]
=
np
.
sqrt
(
getattr
(
dg
,
f
+
'
var
'
))
# Save predictions
dsprops
=
dict
(
shape
=
allV
.
shape
,
dtype
=
allV
.
dtype
,
chunks
=
True
,
compression
=
'
gzip
'
,
fillvalue
=
np
.
nan
)
grp
.
require_dataset
(
'
svol
'
,
**
dsprops
)
grp
[
'
svol
'
][()]
=
fits
[
'
f
'
][
'
mean
'
]
grp
.
require_dataset
(
'
svol_std
'
,
**
dsprops
)
grp
[
'
svol_std
'
][()]
=
fits
[
'
f
'
][
'
std
'
]
grp
.
require_dataset
(
'
grs
'
,
**
dsprops
)
grp
[
'
grs
'
][()]
=
fits
[
'
df
'
][
'
mean
'
]
grp
.
require_dataset
(
'
grs_std
'
,
**
dsprops
)
grp
[
'
grs_std
'
][()]
=
fits
[
'
df
'
][
'
std
'
]
grp
.
require_dataset
(
'
d2v
'
,
**
dsprops
)
grp
[
'
d2v
'
][()]
=
fits
[
'
ddf
'
][
'
mean
'
]
grp
.
require_dataset
(
'
d2v_std
'
,
**
dsprops
)
grp
[
'
d2v_std
'
][()]
=
fits
[
'
ddf
'
][
'
std
'
]
data
.
close
()
def
main
():
from
optparse
import
OptionParser
usage
=
"
usage: %prog [options] grdata_file
"
parser
=
OptionParser
(
usage
=
usage
)
parser
.
add_option
(
'
-d
'
,
'
--daughters-only
'
,
dest
=
'
run_moth
'
,
help
=
'
only fit daughter data
'
,
action
=
'
store_false
'
,
default
=
True
)
parser
.
add_option
(
'
-m
'
,
'
--mothers-only
'
,
dest
=
'
run_dght
'
,
help
=
'
only fit mother data
'
,
action
=
'
store_false
'
,
default
=
True
)
parser
.
add_option
(
'
-s
'
,
'
--split-at-birth
'
,
dest
=
'
split_at_birth
'
,
help
=
'
split mother traces according to birth times
'
,
action
=
'
store_true
'
,
default
=
False
)
parser
.
add_option
(
'
-l
'
,
'
--log-volume
'
,
dest
=
'
log_volume
'
,
help
=
'
fit gaussian process to log(V)
'
,
action
=
'
store_true
'
,
default
=
False
)
(
options
,
args
)
=
parser
.
parse_args
()
if
len
(
args
)
<
1
:
parser
.
print_help
()
sys
.
exit
()
for
grdata_file
in
args
:
fit_gr_data
(
grdata_file
,
run_moth
=
options
.
run_moth
,
run_dght
=
options
.
run_dght
,
split_at_birth
=
options
.
split_at_birth
,
log_volume
=
options
.
log_volume
)
Loading