Skip to content

Commit e564f8b

Browse files
committed
Fall back to anon=True for zarr extractors and analyzers in case backend/storage options is not provided
1 parent b1326d2 commit e564f8b

File tree

2 files changed

+57
-15
lines changed

2 files changed

+57
-15
lines changed

src/spikeinterface/core/sortinganalyzer.py

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -65,18 +65,18 @@ def create_sorting_analyzer(
6565
recording : Recording
6666
The recording object
6767
folder : str or Path or None, default: None
68-
The folder where waveforms are cached
68+
The folder where analyzer is cached
6969
format : "memory | "binary_folder" | "zarr", default: "memory"
70-
The mode to store waveforms. If "folder", waveforms are stored on disk in the specified folder.
70+
The mode to store analyzer. If "folder", the analyzer is stored on disk in the specified folder.
7171
The "folder" argument must be specified in case of mode "folder".
72-
If "memory" is used, the waveforms are stored in RAM. Use this option carefully!
72+
If "memory" is used, the analyzer is stored in RAM. Use this option carefully!
7373
sparse : bool, default: True
7474
If True, then a sparsity mask is computed using the `estimate_sparsity()` function using
7575
a few spikes to get an estimate of dense templates to create a ChannelSparsity object.
7676
Then, the sparsity will be propagated to all ResultExtention that handle sparsity (like wavforms, pca, ...)
7777
You can control `estimate_sparsity()` : all extra arguments are propagated to it (included job_kwargs)
7878
sparsity : ChannelSparsity or None, default: None
79-
The sparsity used to compute waveforms. If this is given, `sparse` is ignored.
79+
The sparsity used to compute exensions. If this is given, `sparse` is ignored.
8080
return_scaled : bool, default: True
8181
All extensions that play with traces will use this global return_scaled : "waveforms", "noise_levels", "templates".
8282
This prevent return_scaled being differents from different extensions and having wrong snr for instance.
@@ -98,7 +98,7 @@ def create_sorting_analyzer(
9898
--------
9999
>>> import spikeinterface as si
100100
101-
>>> # Extract dense waveforms and save to disk with binary_folder format.
101+
>>> # Create dense analyzer and save to disk with binary_folder format.
102102
>>> sorting_analyzer = si.create_sorting_analyzer(sorting, recording, format="binary_folder", folder="/path/to_my/result")
103103
104104
>>> # Can be reload
@@ -172,22 +172,40 @@ def load_sorting_analyzer(folder, load_extensions=True, format="auto", backend_o
172172
Parameters
173173
----------
174174
folder : str or Path
175-
The folder / zarr folder where the waveform extractor is stored
175+
The folder / zarr folder where the analyzer is stored. If the folder is a remote path stored in the cloud,
176+
the backend_options can be used to specify credentials. If the remote path is not accessible,
177+
and backend_options is not provided, the function will try to load the object in anonymous mode (anon=True),
178+
which enables to load data from open buckets.
176179
load_extensions : bool, default: True
177180
Load all extensions or not.
178181
format : "auto" | "binary_folder" | "zarr"
179182
The format of the folder.
180-
storage_options : dict | None, default: None
181-
The storage options to specify credentials to remote zarr bucket.
182-
For open buckets, it doesn't need to be specified.
183+
backend_options : dict | None, default: None
184+
The backend options for the backend.
185+
The dictionary can contain the following keys:
186+
- storage_options: dict | None (fsspec storage options)
187+
- saving_options: dict | None (additional saving options for creating and saving datasets)
183188
184189
Returns
185190
-------
186191
sorting_analyzer : SortingAnalyzer
187192
The loaded SortingAnalyzer
188193
189194
"""
190-
return SortingAnalyzer.load(folder, load_extensions=load_extensions, format=format, backend_options=backend_options)
195+
if is_path_remote(folder) and backend_options is None:
196+
try:
197+
return SortingAnalyzer.load(
198+
folder, load_extensions=load_extensions, format=format, backend_options=backend_options
199+
)
200+
except Exception as e:
201+
backend_options = dict(storage_options=dict(anon=True))
202+
return SortingAnalyzer.load(
203+
folder, load_extensions=load_extensions, format=format, backend_options=backend_options
204+
)
205+
else:
206+
return SortingAnalyzer.load(
207+
folder, load_extensions=load_extensions, format=format, backend_options=backend_options
208+
)
191209

192210

193211
class SortingAnalyzer:
@@ -2286,7 +2304,7 @@ def delete(self):
22862304

22872305
def reset(self):
22882306
"""
2289-
Reset the waveform extension.
2307+
Reset the extension.
22902308
Delete the sub folder and create a new empty one.
22912309
"""
22922310
self._reset_extension_folder()

src/spikeinterface/core/zarrextractors.py

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from .core_tools import define_function_from_class, check_json
1313
from .job_tools import split_job_kwargs
1414
from .recording_tools import determine_cast_unsigned
15+
from .core_tools import is_path_remote
1516

1617

1718
class ZarrRecordingExtractor(BaseRecording):
@@ -21,7 +22,11 @@ class ZarrRecordingExtractor(BaseRecording):
2122
Parameters
2223
----------
2324
folder_path : str or Path
24-
Path to the zarr root folder
25+
Path to the zarr root folder. This can be a local path or a remote path (s3:// or gcs://).
26+
If the path is a remote path, the storage_options can be provided to specify credentials.
27+
If the remote path is not accessible and backend_options is not provided,
28+
the function will try to load the object in anonymous mode (anon=True),
29+
which enables to load data from open buckets.
2530
storage_options : dict or None
2631
Storage options for zarr `store`. E.g., if "s3://" or "gcs://" they can provide authentication methods, etc.
2732
@@ -35,7 +40,14 @@ def __init__(self, folder_path: Path | str, storage_options: dict | None = None)
3540

3641
folder_path, folder_path_kwarg = resolve_zarr_path(folder_path)
3742

38-
self._root = zarr.open(str(folder_path), mode="r", storage_options=storage_options)
43+
if is_path_remote(str(folder_path)) and storage_options is None:
44+
try:
45+
self._root = zarr.open(str(folder_path), mode="r", storage_options=storage_options)
46+
except Exception as e:
47+
storage_options = {"anon": True}
48+
self._root = zarr.open(str(folder_path), mode="r", storage_options=storage_options)
49+
else:
50+
self._root = zarr.open(str(folder_path), mode="r", storage_options=storage_options)
3951

4052
sampling_frequency = self._root.attrs.get("sampling_frequency", None)
4153
num_segments = self._root.attrs.get("num_segments", None)
@@ -150,7 +162,11 @@ class ZarrSortingExtractor(BaseSorting):
150162
Parameters
151163
----------
152164
folder_path : str or Path
153-
Path to the zarr root file
165+
Path to the zarr root file. This can be a local path or a remote path (s3:// or gcs://).
166+
If the path is a remote path, the storage_options can be provided to specify credentials.
167+
If the remote path is not accessible and backend_options is not provided,
168+
the function will try to load the object in anonymous mode (anon=True),
169+
which enables to load data from open buckets.
154170
storage_options : dict or None
155171
Storage options for zarr `store`. E.g., if "s3://" or "gcs://" they can provide authentication methods, etc.
156172
zarr_group : str or None, default: None
@@ -165,7 +181,15 @@ def __init__(self, folder_path: Path | str, storage_options: dict | None = None,
165181

166182
folder_path, folder_path_kwarg = resolve_zarr_path(folder_path)
167183

168-
zarr_root = self._root = zarr.open(str(folder_path), mode="r", storage_options=storage_options)
184+
if is_path_remote(str(folder_path)) and storage_options is None:
185+
try:
186+
zarr_root = zarr.open(str(folder_path), mode="r", storage_options=storage_options)
187+
except Exception as e:
188+
storage_options = {"anon": True}
189+
zarr_root = zarr.open(str(folder_path), mode="r", storage_options=storage_options)
190+
else:
191+
zarr_root = zarr.open(str(folder_path), mode="r", storage_options=storage_options)
192+
169193
if zarr_group is None:
170194
self._root = zarr_root
171195
else:

0 commit comments

Comments
 (0)