Dataloader returns cryptic KeyError when loading datasets created by older versions of aliby
Summary
Dataloader returns cryptic KeyError when loading datasets created by older versions of aliby.
Steps to reproduce
Version: 5b1d522c
Code:
h5dir = "/home/jupyter-arin/data/"
wdir = "./wela_tsv_data/"
dl = dataloader(
h5dir = h5dir,
wdir = wdir,
)
dataname = "19972_2021_05_28_flavin_htb2_glucose_limitation_hard_Delft_01_01"
extra_g2a_dict = {
"extraction/Flavin_bgsub/np_max/mean": "flavin",
}
dl.load(
dataname,
extra_g2a_dict=extra_g2a_dict,
key_index="flavin",
bud_fluorescence=False,
)
- This error occurred with dataset 19972 (islay), which was generated using an aliby (upstream fork) version preceding 0.1.36. This error also occurred with dataset 26643 (islay), which was also generated using an old aliby version. This error did not occur with dataset 1649 (staffa), which was generated using aliby 0.1.61.
- This error did not occur with an earlier version of wela, f6d6eefc (30/08/2023). With this version, dataloader worked as expected, and I was able to use it to write a TSV file.
What is the current bug behaviour?
- Dataloader correctly identifies the dataset name and the signals available, and prints the signal names accordingly.
-
extra_g2a_dictwas defined because the "flavin" key containednp_max, from older versions of aliby, rather thanmax, from newer versions of aliby. But, no error was raised that was related to this dataset. - Dataloader raises a KeyError, see traceback below.
Logs/Traceback
Loading...
bud data
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
Cell In[3], line 10
3 #dataname = "1649_2023_04_18_flavin_by4742swain_by4742morgan_tsa1tsa2morgan_lysmedia_02_00"
5 extra_g2a_dict = {
6 "extraction/Flavin_bgsub/np_max/mean": "flavin",
7 #"postprocessing/births/extraction_general_None_volume": "buddings",
8 }
---> 10 dl.load(
11 dataname,
12 extra_g2a_dict=extra_g2a_dict,
13 key_index="flavin",
14 bud_fluorescence=False,
15 )
File ~/git/wela/dataloader.py:245, in dataloader.load(self, dataname, key_index, cutoff, interpolate_list, extra_g2a_dict, pxsize, use_tsv, over_write_dict, hours, bud_fluorescence)
243 key_index = self.get_key_index(grouper, key_index_path, cutoff)
244 # load from h5 files
--> 245 r_df = self.load_h5(grouper, key_index, cutoff, interpolate_list)
246 if pxsize:
247 # convert volumes to micron^3
248 for signal in r_df.columns:
File ~/git/wela/dataloader.py:294, in dataloader.load_h5(self, grouper, key_index, cutoff, interpolate_list)
292 # load and correct buddings and bud_volume
293 print(" bud data")
--> 294 r_df = self.load_bud_data(
295 grouper,
296 cutoff,
297 figs=False,
298 key_index=key_index,
299 interpolate_list=interpolate_list,
300 )
301 # load other signals
302 for i, sigpath in enumerate(self.g2a_dict):
File ~/git/wela/dataloader.py:365, in dataloader.load_bud_data(self, grouper, cutoff, figs, key_index, interpolate_list)
363 bud_interpolate_indices = None
364 # load buddings
--> 365 buddings = grouper.concat_signal(
366 self.a2g_dict["buddings"], cutoff=cutoff
367 )
368 # bud_volume and any other signals; missing signals return None
369 bud_data = [
370 grouper.concat_signal(self.a2g_dict[bud_signal], cutoff=0)
371 for bud_signal in bud_signals
372 ]
File ~/git/alibylite/src/postprocessor/grouper.py:116, in Grouper.concat_signal(self, path, pool, mode, **kwargs)
114 if good_positions:
115 kwargs["mode"] = mode
--> 116 records = self.pool_function(
117 path=path,
118 f=concat_one_signal,
119 pool=pool,
120 positions=good_positions,
121 **kwargs,
122 )
123 # check for errors
124 errors = [
125 position
126 for record, position in zip(records, self.positions.keys())
127 if record is None
128 ]
File ~/git/alibylite/src/postprocessor/grouper.py:184, in Grouper.pool_function(self, path, f, pool, positions, **kwargs)
173 records = p.map(
174 lambda x: f(
175 path=path,
(...)
181 positions.items(),
182 )
183 else:
--> 184 records = [
185 f(
186 path=path,
187 position=position,
188 group=self.positions_groups[name],
189 position_name=name,
190 **kwargs,
191 )
192 for name, position in self.positions.items()
193 ]
194 return records
File ~/git/alibylite/src/postprocessor/grouper.py:185, in <listcomp>(.0)
173 records = p.map(
174 lambda x: f(
175 path=path,
(...)
181 positions.items(),
182 )
183 else:
184 records = [
--> 185 f(
186 path=path,
187 position=position,
188 group=self.positions_groups[name],
189 position_name=name,
190 **kwargs,
191 )
192 for name, position in self.positions.items()
193 ]
194 return records
File ~/git/alibylite/src/postprocessor/grouper.py:287, in concat_one_signal(path, position, group, mode, position_name, **kwargs)
285 position_name = position.stem
286 if mode == "retained":
--> 287 combined = position.retained(path, **kwargs)
288 elif mode == "raw":
289 combined = position.get_raw(path, **kwargs)
File ~/git/alibylite/src/agora/io/signal.py:105, in Signal.retained(self, signal, cutoff)
103 """Get retained cells for a Signal or list of Signals."""
104 if isinstance(signal, str):
--> 105 signal = self.get(signal)
106 if isinstance(signal, pd.DataFrame):
107 return self.get_retained(signal, cutoff)
File ~/git/alibylite/src/agora/io/signal.py:60, in Signal.get(self, dset_name, **kwargs)
58 dsets = self.get_raw(dset_name, **kwargs)
59 if dsets is not None:
---> 60 picked_merged = self.apply_merging_picking(dsets, **kwargs)
61 return self.add_name(picked_merged, dset_name)
62 else:
File ~/git/alibylite/src/agora/io/decorators.py:22, in _first_arg_str_to_raw_df.<locals>.format_input(*args, **kwargs)
20 data = cls.get_raw(data)
21 # replace path in the undecorated function with data
---> 22 return fn(cls, data, *args[2:], **kwargs)
File ~/git/alibylite/src/agora/io/signal.py:179, in Signal.apply_merging_picking(self, data, merges, picks)
177 merges = self.load_merges() if merges else np.array([])
178 if merges.any():
--> 179 merged = apply_merges(data, merges)
180 else:
181 merged = copy(data)
File ~/git/alibylite/src/agora/utils/merge.py:138, in apply_merges(data, merges)
135 # join left track with right track
136 for left_index, right_index in zip(left_indices, right_indices):
137 to_merge.loc[tuple(left_index)] = join_two_tracks(
--> 138 to_merge.loc[tuple(left_index)].values,
139 to_merge.loc[tuple(right_index)].values,
140 )
141 # drop indices for right tracks
142 to_merge.drop(map(tuple, right_indices), inplace=True)
File ~/.conda/envs/alibylite/lib/python3.8/site-packages/pandas/core/indexing.py:925, in _LocationIndexer.__getitem__(self, key)
923 with suppress(KeyError, IndexError):
924 return self.obj._get_value(*key, takeable=self._takeable)
--> 925 return self._getitem_tuple(key)
926 else:
927 # we by definition only have the 0th axis
928 axis = self.axis or 0
File ~/.conda/envs/alibylite/lib/python3.8/site-packages/pandas/core/indexing.py:1100, in _LocIndexer._getitem_tuple(self, tup)
1098 def _getitem_tuple(self, tup: tuple):
1099 with suppress(IndexingError):
-> 1100 return self._getitem_lowerdim(tup)
1102 # no multi-index, so validate all of the indexers
1103 self._has_valid_tuple(tup)
File ~/.conda/envs/alibylite/lib/python3.8/site-packages/pandas/core/indexing.py:838, in _LocationIndexer._getitem_lowerdim(self, tup)
834 for i, key in enumerate(tup):
835 if is_label_like(key):
836 # We don't need to check for tuples here because those are
837 # caught by the _is_nested_tuple_indexer check above.
--> 838 section = self._getitem_axis(key, axis=i)
840 # We should never have a scalar section here, because
841 # _getitem_lowerdim is only called after a check for
842 # is_scalar_access, which that would be.
843 if section.ndim == self.ndim:
844 # we're in the middle of slicing through a MultiIndex
845 # revise the key wrt to `section` by inserting an _NS
File ~/.conda/envs/alibylite/lib/python3.8/site-packages/pandas/core/indexing.py:1164, in _LocIndexer._getitem_axis(self, key, axis)
1162 # fall thru to straight lookup
1163 self._validate_key(key, axis)
-> 1164 return self._get_label(key, axis=axis)
File ~/.conda/envs/alibylite/lib/python3.8/site-packages/pandas/core/indexing.py:1113, in _LocIndexer._get_label(self, label, axis)
1111 def _get_label(self, label, axis: int):
1112 # GH#5667 this will fail if the label is not present in the axis.
-> 1113 return self.obj.xs(label, axis=axis)
File ~/.conda/envs/alibylite/lib/python3.8/site-packages/pandas/core/generic.py:3770, in NDFrame.xs(self, key, axis, level, drop_level)
3768 if isinstance(index, MultiIndex):
3769 try:
-> 3770 loc, new_index = index._get_loc_level(
3771 key, level=0, drop_level=drop_level
3772 )
3773 except TypeError as e:
3774 raise TypeError(f"Expected label or tuple of labels, got {key}") from e
File ~/.conda/envs/alibylite/lib/python3.8/site-packages/pandas/core/indexes/multi.py:3112, in MultiIndex._get_loc_level(self, key, level, drop_level)
3110 return indexer, maybe_mi_droplevels(indexer, ilevels, drop_level)
3111 else:
-> 3112 indexer = self._get_level_indexer(key, level=level)
3113 return indexer, maybe_mi_droplevels(indexer, [level], drop_level)
File ~/.conda/envs/alibylite/lib/python3.8/site-packages/pandas/core/indexes/multi.py:3223, in MultiIndex._get_level_indexer(self, key, level, indexer)
3219 end = level_codes.searchsorted(idx, side="right")
3221 if start == end:
3222 # The label is present in self.levels[level] but unused:
-> 3223 raise KeyError(key)
3224 return slice(start, end)
KeyError: 2
Possible fixes
I suspect that the error has something to do with correctly passing the full key for "buddings" to grouper, but apart from that, I am not sure.