Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
A
aliby-mirror
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Swain Lab
aliby
aliby-mirror
Commits
d99e55a3
Commit
d99e55a3
authored
2 years ago
by
Alán Muñoz
Browse files
Options
Downloads
Patches
Plain Diff
tweak(agora): isolate merge as functions
parent
e318491f
No related branches found
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
src/agora/io/signal.py
+61
-108
61 additions, 108 deletions
src/agora/io/signal.py
src/agora/utils/merge.py
+113
-0
113 additions, 0 deletions
src/agora/utils/merge.py
src/postprocessor/core/processor.py
+1
-5
1 addition, 5 deletions
src/postprocessor/core/processor.py
with
175 additions
and
113 deletions
src/agora/io/signal.py
+
61
−
108
View file @
d99e55a3
import
typing
as
t
from
copy
import
copy
from
functools
import
cached_property
,
lru_cache
from
pathlib
import
PosixPath
import
bottleneck
as
bn
import
h5py
import
numpy
as
np
import
pandas
as
pd
from
utils_find_1st
import
cmp_larger
,
find_1st
from
agora.io.bridge
import
BridgeH5
from
agora.io.decorators
import
_first_arg_str_to_df
from
agora.utils.merge
import
apply_merges
class
Signal
(
BridgeH5
):
...
...
@@ -34,10 +35,6 @@ class Signal(BridgeH5):
def
__getitem__
(
self
,
dsets
:
t
.
Union
[
str
,
t
.
Collection
]):
assert
isinstance
(
dsets
,
(
str
,
t
.
Collection
)
),
"
Incorrect type for dset
"
if
isinstance
(
dsets
,
str
)
and
dsets
.
endswith
(
"
imBackground
"
):
df
=
self
.
get_raw
(
dsets
)
...
...
@@ -52,6 +49,8 @@ class Signal(BridgeH5):
return
[
self
.
add_name
(
self
.
apply_prepost
(
dset
),
dset
)
for
dset
in
dsets
]
else
:
raise
Exception
(
f
"
Invalid type
{
type
(
dsets
)
}
to get datasets
"
)
# return self.cols_in_mins(self.add_name(df, dsets))
return
self
.
add_name
(
df
,
dsets
)
...
...
@@ -74,12 +73,12 @@ class Signal(BridgeH5):
)
return
df
@property
@
cached_
property
def
ntimepoints
(
self
):
with
h5py
.
File
(
self
.
filename
,
"
r
"
)
as
f
:
return
f
[
"
extraction/general/None/area/timepoint
"
][
-
1
]
+
1
@property
@
cached_
property
def
tinterval
(
self
)
->
int
:
tinterval_location
=
"
time_settings/timeinterval
"
with
h5py
.
File
(
self
.
filename
,
"
r
"
)
as
f
:
...
...
@@ -89,6 +88,7 @@ class Signal(BridgeH5):
def
get_retained
(
df
,
cutoff
):
return
df
.
loc
[
bn
.
nansum
(
df
.
notna
(),
axis
=
1
)
>
df
.
shape
[
1
]
*
cutoff
]
@lru_cache
(
30
)
def
retained
(
self
,
signal
,
cutoff
=
0.8
):
df
=
self
[
signal
]
...
...
@@ -98,6 +98,7 @@ class Signal(BridgeH5):
elif
isinstance
(
df
,
list
):
return
[
self
.
get_retained
(
d
,
cutoff
=
cutoff
)
for
d
in
df
]
@lru_cache
(
2
)
def
lineage
(
self
,
lineage_location
:
t
.
Optional
[
str
]
=
None
,
merged
:
bool
=
False
)
->
np
.
ndarray
:
...
...
@@ -127,40 +128,48 @@ class Signal(BridgeH5):
def
apply_prepost
(
self
,
data
:
t
.
Union
[
str
,
pd
.
DataFrame
],
merges
:
np
.
ndarray
=
Non
e
,
picks
:
t
.
Op
tion
al
[
bool
]
=
Non
e
,
merges
:
t
.
Union
[
np
.
ndarray
,
bool
]
=
Tru
e
,
picks
:
t
.
Union
[
t
.
Collec
tion
,
bool
]
=
Tru
e
,
):
"""
Apply modifier operations (picker, merger) to a given dataframe.
Parameters
----------
data : t.Union[str, pd.DataFrame]
DataFrame or url to one.
merges : t.Union[np.ndarray, bool]
(optional) 2-D array with three columns and variable length. The
first column is the trap id, second is mother label and third one is
daughter id.
If it is True it fetches merges from file, if false it skips merging step.
picks : t.Union[np.ndarray, bool]
(optional) 2-D ndarray where first column is traps and second column
is cell labels.
If it is True it fetches picks from file, if false it skips picking step.
Examples
--------
FIXME: Add docs.
"""
Apply modifier operations (picker, merger) to a given dataframe.
"""
if
merges
is
None
:
merges
=
self
.
get_merges
()
if
isinstance
(
merges
,
bool
):
merges
:
np
.
ndarray
=
self
.
get_merges
()
if
merges
else
np
.
array
([])
merged
=
copy
(
data
)
if
merges
.
any
():
# Split in two dfs, one with rows relevant for merging and one
# without them
valid_merges
=
validate_merges
(
merges
,
np
.
array
(
list
(
data
.
index
)))
# TODO use the same info from validate_merges to select both
valid_indices
=
[
tuple
(
x
)
for
x
in
(
np
.
unique
(
valid_merges
.
reshape
(
-
1
,
2
),
axis
=
0
))
]
merged
=
self
.
apply_merge
(
data
.
loc
[
valid_indices
],
valid_merges
,
)
nonmergeable_ids
=
data
.
index
.
difference
(
valid_indices
)
merged
=
apply_merges
(
data
,
merges
)
merged
=
pd
.
concat
(
(
merged
,
data
.
loc
[
nonmergeable_ids
]),
names
=
data
.
index
.
names
if
isinstance
(
picks
,
bool
):
picks
=
(
self
.
get_picks
(
names
=
merged
.
index
.
names
)
if
picks
else
set
(
merged
.
index
)
)
with
h5py
.
File
(
self
.
filename
,
"
r
"
)
as
f
:
if
"
modifiers/picks
"
in
f
and
not
picks
:
picks
=
self
.
get_picks
(
names
=
merged
.
index
.
names
)
if
"
modifiers/picks
"
in
f
and
picks
:
# missing_cells = [i for i in picks if tuple(i) not in
# set(merged.index)]
...
...
@@ -184,7 +193,7 @@ class Signal(BridgeH5):
merged
=
pd
.
DataFrame
([],
index
=
index
)
return
merged
@property
@
cached_
property
def
datasets
(
self
):
if
not
hasattr
(
self
,
"
_siglist
"
):
self
.
_siglist
=
[]
...
...
@@ -195,12 +204,12 @@ class Signal(BridgeH5):
for
sig
in
self
.
siglist
:
print
(
sig
)
@property
@
cached_
property
def
p_siglist
(
self
):
"""
Print signal list
"""
self
.
datasets
@property
@
cached_
property
def
siglist
(
self
):
"""
Return list of signals
"""
try
:
...
...
@@ -215,34 +224,24 @@ class Signal(BridgeH5):
return
self
.
_siglist
def
get_merged
(
self
,
dataset
):
return
self
.
apply_prepost
(
dataset
,
skip_pick
=
True
)
return
self
.
apply_prepost
(
dataset
,
skip_pick
s
=
True
)
@property
@
cached_
property
def
merges
(
self
):
with
h5py
.
File
(
self
.
filename
,
"
r
"
)
as
f
:
dsets
=
f
.
visititems
(
self
.
_if_merges
)
return
dsets
@property
@
cached_
property
def
n_merges
(
self
):
return
len
(
self
.
merges
)
@property
@
cached_
property
def
picks
(
self
):
with
h5py
.
File
(
self
.
filename
,
"
r
"
)
as
f
:
dsets
=
f
.
visititems
(
self
.
_if_picks
)
return
dsets
def
apply_merge
(
self
,
df
,
changes
):
if
len
(
changes
):
for
target
,
source
in
changes
:
df
.
loc
[
tuple
(
target
)]
=
self
.
join_tracks_pair
(
df
.
loc
[
tuple
(
target
)],
df
.
loc
[
tuple
(
source
)]
)
df
.
drop
(
tuple
(
source
),
inplace
=
True
)
return
df
def
get_raw
(
self
,
dataset
:
str
,
in_minutes
:
bool
=
True
):
try
:
if
isinstance
(
dataset
,
str
):
...
...
@@ -266,14 +265,20 @@ class Signal(BridgeH5):
return
merges
# def get_picks(self, levels):
def
get_picks
(
self
,
names
,
path
=
"
modifiers/picks/
"
):
def
get_picks
(
self
,
names
:
t
.
Tuple
[
str
,
...]
=
(
"
trap
"
,
"
cell_label
"
),
path
:
str
=
"
modifiers/picks/
"
,
)
->
t
.
Set
[
t
.
Tuple
[
int
,
str
]]:
"""
Return the relevant picks based on names
"""
with
h5py
.
File
(
self
.
filename
,
"
r
"
)
as
f
:
picks
=
set
()
if
path
in
f
:
return
list
(
zip
(
*
[
f
[
path
+
name
]
for
name
in
names
]))
# return f["modifiers/picks"]
else
:
return
None
picks
=
set
(
zip
(
*
[
f
[
path
+
name
]
for
name
in
names
]))
return
picks
def
dataset_to_df
(
self
,
f
:
h5py
.
File
,
path
:
str
)
->
pd
.
DataFrame
:
"""
...
...
@@ -322,7 +327,7 @@ class Signal(BridgeH5):
# columns=f[path + "/timepoint"][()],
# )
def
get_siglist
(
self
,
name
:
str
,
node
):
def
get_siglist
(
self
,
node
):
fullname
=
node
.
name
if
isinstance
(
node
,
h5py
.
Group
)
and
np
.
all
(
[
isinstance
(
x
,
h5py
.
Dataset
)
for
x
in
node
.
values
()]
...
...
@@ -348,17 +353,6 @@ class Signal(BridgeH5):
if
isinstance
(
obj
,
h5py
.
Group
)
and
name
.
endswith
(
"
picks
"
):
return
obj
[()]
@staticmethod
def
join_tracks_pair
(
target
:
pd
.
Series
,
source
:
pd
.
Series
):
"""
Join two tracks and return the new value of the target.
TODO replace this with arrays only.
"""
tgt_copy
=
copy
(
target
)
end
=
find_1st
(
target
.
values
[::
-
1
],
0
,
cmp_larger
)
tgt_copy
.
iloc
[
-
end
:]
=
source
.
iloc
[
-
end
:].
values
return
tgt_copy
# TODO FUTURE add stages support to fluigent system
@property
def
ntps
(
self
)
->
int
:
...
...
@@ -401,44 +395,3 @@ class Signal(BridgeH5):
if
end
<=
self
.
max_span
]
return
tuple
((
stage
,
ntps
)
for
stage
,
ntps
in
zip
(
self
.
stages
,
spans
))
def
validate_merges
(
merges
:
np
.
ndarray
,
indices
:
np
.
ndarray
)
->
np
.
ndarray
:
"""
Select rows from the first array that are present in both.
We use casting for fast multiindexing
Parameters
----------
merges : np.ndarray
2-D array where columns are (trap, mother, daughter) or 3-D array where
dimensions are (X, (trap,mother), (trap,daughter))
indices : np.ndarray
2-D array where each column is a different level.
Returns
-------
np.ndarray
3-D array with elements in both arrays.
Examples
--------
FIXME: Add docs.
"""
if
merges
.
ndim
<
3
:
# Reshape into 3-D array for casting if neded
merges
=
np
.
stack
((
merges
[:,
[
0
,
1
]],
merges
[:,
[
0
,
2
]]),
axis
=
1
)
# Compare existing merges with available indices
# Swap trap and label axes for the merges array to correctly cast
# valid_ndmerges = merges.swapaxes(1, 2)[..., None] == indices.T[:, None, :]
valid_ndmerges
=
merges
[...,
None
]
==
indices
.
T
[
None
,
...]
# Casting is confusing (but efficient):
# - First we check the dimension across trap and cell id, to ensure both match
# - Then we check the dimension that crosses all indices, to ensure the pair is present there
# - Finally we check the merge tuples to check which cases have both target and source
valid_merges
=
merges
[
valid_ndmerges
.
all
(
axis
=
2
).
any
(
axis
=
2
).
all
(
axis
=
1
)]
# valid_merges = merges[allnan.any(axis=1)]
return
valid_merges
This diff is collapsed.
Click to expand it.
src/agora/utils/merge.py
0 → 100644
+
113
−
0
View file @
d99e55a3
#!/usr/bin/env jupyter
"""
Functions to efficiently merge rows in DataFrames.
"""
import
typing
as
t
from
copy
import
copy
import
numpy
as
np
import
pandas
as
pd
from
utils_find_1st
import
cmp_larger
,
find_1st
def
apply_merges
(
data
:
pd
.
DataFrame
,
merges
:
np
.
ndarray
):
"""
Split data in two, one subset for rows relevant for merging and one
without them. It uses an array of source tracklets and target tracklets
to efficiently merge them.
Parameters
----------
data : pd.DataFrame
Input DataFrame.
merges : np.ndarray
3-D ndarray where dimensions are (X,2,2): nmerges, source-target
pair and single-cell identifiers, respectively.
Examples
--------
FIXME: Add docs.
"""
valid_merges
,
indices
=
validate_merges
(
merges
,
np
.
array
(
list
(
data
.
index
)))
# Assign non-merged
merged
=
data
.
loc
[
~
indices
]
# Implement the merges and drop source rows.
if
valid_merges
.
any
():
to_merge
=
data
.
loc
[
indices
]
for
target
,
source
in
merges
[
valid_merges
]:
target
,
source
=
tuple
(
target
),
tuple
(
source
)
to_merge
.
loc
[
target
]
=
join_tracks_pair
(
to_merge
.
loc
[
target
].
values
,
to_merge
.
loc
[
source
].
values
,
)
to_merge
.
drop
(
source
,
inplace
=
True
)
merged
=
pd
.
concat
((
merged
,
to_merge
),
names
=
data
.
index
.
names
)
return
merged
def
validate_merges
(
merges
:
np
.
ndarray
,
indices
:
np
.
ndarray
)
->
t
.
Tuple
[
np
.
ndarray
,
np
.
ndarray
]:
"""
Select rows from the first array that are present in both.
We use casting for fast multiindexing.
Parameters
----------
merges : np.ndarray
2-D array where columns are (trap, mother, daughter) or 3-D array where
dimensions are (X, (trap,mother), (trap,daughter))
indices : np.ndarray
2-D array where each column is a different level.
Returns
-------
np.ndarray
1-D boolean array indicating valid merge events.
np.ndarray
1-D boolean array indicating indices involved in merging.
Examples
--------
FIXME: Add docs.
"""
if
merges
.
ndim
<
3
:
# Reshape into 3-D array for broadcasting if neded
merges
=
np
.
stack
((
merges
[:,
[
0
,
1
]],
merges
[:,
[
0
,
2
]]),
axis
=
1
)
# Compare existing merges with available indices
# Swap trap and label axes for the merges array to correctly cast
# valid_ndmerges = merges.swapaxes(1, 2)[..., None] == indices.T[:, None, :]
valid_ndmerges
=
merges
[...,
None
]
==
indices
.
T
[
None
,
...]
# Broadcasting is confusing (but efficient):
# First we check the dimension across trap and cell id, to ensure both match
valid_cell_ids
=
valid_ndmerges
.
all
(
axis
=
2
)
# Then we check the merge tuples to check which cases have both target and source
valid_merges
=
valid_cell_ids
.
any
(
axis
=
2
).
all
(
axis
=
1
)
# Finalle we check the dimension that crosses all indices, to ensure the pair
# is present in a valid merge event.
valid_indices
=
valid_ndmerges
[
valid_merges
].
all
(
axis
=
2
).
any
(
axis
=
(
0
,
1
))
return
valid_merges
,
valid_indices
def
join_tracks_pair
(
target
:
np
.
ndarray
,
source
:
np
.
ndarray
)
->
np
.
ndarray
:
"""
Join two tracks and return the new value of the target.
TODO replace this with arrays only.
"""
target_copy
=
copy
(
target
)
end
=
find_1st
(
target_copy
[::
-
1
],
0
,
cmp_larger
)
target_copy
[
-
end
:]
=
source
[
-
end
:]
return
target_copy
This diff is collapsed.
Click to expand it.
src/postprocessor/core/processor.py
+
1
−
5
View file @
d99e55a3
...
...
@@ -298,11 +298,7 @@ class PostProcessor(ProcessABC):
self
.
run_prepost
()
for
i
,
(
process
,
datasets
)
in
tqdm
(
enumerate
(
self
.
targets
[
"
processes
"
])
):
if
i
==
3
:
print
(
"
stop
"
)
for
process
,
datasets
in
tqdm
(
enumerate
(
self
.
targets
[
"
processes
"
])):
if
process
in
self
.
parameters
[
"
param_sets
"
].
get
(
"
processes
"
,
{}
):
# If we assigned parameters
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment