239 lines
7.5 KiB
Python
239 lines
7.5 KiB
Python
# Authors: The MNE-Python contributors.
|
|
# License: BSD-3-Clause
|
|
# Copyright the MNE-Python contributors.
|
|
|
|
import os
|
|
import re
|
|
import time
|
|
from importlib.resources import files
|
|
from os import path as op
|
|
from pathlib import Path
|
|
|
|
from ...utils import _url_to_local_path, logger, verbose
|
|
from ..utils import _do_path_update, _downloader_params, _get_path, _log_time_size
|
|
|
|
EEGMI_URL = "https://physionet.org/files/eegmmidb/1.0.0/"
|
|
|
|
|
|
@verbose
|
|
def data_path(url, path=None, force_update=False, update_path=None, *, verbose=None):
|
|
"""Get path to local copy of EEGMMI dataset URL.
|
|
|
|
This is a low-level function useful for getting a local copy of a remote EEGBCI
|
|
dataset :footcite:`SchalkEtAl2004`, which is also available at PhysioNet
|
|
:footcite:`GoldbergerEtAl2000`.
|
|
|
|
Parameters
|
|
----------
|
|
url : str
|
|
The dataset to use.
|
|
path : None | path-like
|
|
Location of where to look for the EEGBCI data. If ``None``, the environment
|
|
variable or config parameter ``MNE_DATASETS_EEGBCI_PATH`` is used. If neither
|
|
exists, the ``~/mne_data`` directory is used. If the EEGBCI dataset is not found
|
|
under the given path, the data will be automatically downloaded to the specified
|
|
folder.
|
|
force_update : bool
|
|
Force update of the dataset even if a local copy exists.
|
|
update_path : bool | None
|
|
If ``True``, set ``MNE_DATASETS_EEGBCI_PATH`` in the configuration to the given
|
|
path. If ``None``, the user is prompted.
|
|
%(verbose)s
|
|
|
|
Returns
|
|
-------
|
|
path : list of Path
|
|
Local path to the given data file. This path is contained inside a list of
|
|
length one for compatibility.
|
|
|
|
Notes
|
|
-----
|
|
For example, one could do:
|
|
|
|
>>> from mne.datasets import eegbci
|
|
>>> url = "http://www.physionet.org/physiobank/database/eegmmidb/"
|
|
>>> eegbci.data_path(url, "~/datasets") # doctest:+SKIP
|
|
|
|
This would download the given EEGBCI data file to the ``~/datasets`` folder and
|
|
prompt the user to store this path in the config (if it does not already exist).
|
|
|
|
References
|
|
----------
|
|
.. footbibliography::
|
|
"""
|
|
import pooch
|
|
|
|
key = "MNE_DATASETS_EEGBCI_PATH"
|
|
name = "EEGBCI"
|
|
path = _get_path(path, key, name)
|
|
fname = "MNE-eegbci-data"
|
|
destination = _url_to_local_path(url, op.join(path, fname))
|
|
destinations = [destination]
|
|
|
|
# fetch the file
|
|
downloader = pooch.HTTPDownloader(**_downloader_params())
|
|
if not op.isfile(destination) or force_update:
|
|
if op.isfile(destination):
|
|
os.remove(destination)
|
|
if not op.isdir(op.dirname(destination)):
|
|
os.makedirs(op.dirname(destination))
|
|
pooch.retrieve(
|
|
url=url,
|
|
path=destination,
|
|
downloader=downloader,
|
|
fname=fname,
|
|
)
|
|
|
|
# offer to update the path
|
|
_do_path_update(path, update_path, key, name)
|
|
destinations = [Path(dest) for dest in destinations]
|
|
return destinations
|
|
|
|
|
|
@verbose
|
|
def load_data(
|
|
subject,
|
|
runs,
|
|
path=None,
|
|
force_update=False,
|
|
update_path=None,
|
|
base_url=EEGMI_URL,
|
|
verbose=None,
|
|
): # noqa: D301
|
|
"""Get paths to local copies of EEGBCI dataset files.
|
|
|
|
This will fetch data for the EEGBCI dataset :footcite:`SchalkEtAl2004`, which is
|
|
also available at PhysioNet :footcite:`GoldbergerEtAl2000`.
|
|
|
|
Parameters
|
|
----------
|
|
subject : int
|
|
The subject to use. Can be in the range of 1-109 (inclusive).
|
|
runs : int | list of int
|
|
The runs to use (see Notes for details).
|
|
path : None | path-like
|
|
Location of where to look for the EEGBCI data. If ``None``, the environment
|
|
variable or config parameter ``MNE_DATASETS_EEGBCI_PATH`` is used. If neither
|
|
exists, the ``~/mne_data`` directory is used. If the EEGBCI dataset is not found
|
|
under the given path, the data will be automatically downloaded to the specified
|
|
folder.
|
|
force_update : bool
|
|
Force update of the dataset even if a local copy exists.
|
|
update_path : bool | None
|
|
If ``True``, set ``MNE_DATASETS_EEGBCI_PATH`` in the configuration to the given
|
|
path. If ``None``, the user is prompted.
|
|
base_url : str
|
|
The URL root for the data.
|
|
%(verbose)s
|
|
|
|
Returns
|
|
-------
|
|
paths : list
|
|
List of local data paths of the given type.
|
|
|
|
Notes
|
|
-----
|
|
The run numbers correspond to:
|
|
|
|
========= ===================================
|
|
run task
|
|
========= ===================================
|
|
1 Baseline, eyes open
|
|
2 Baseline, eyes closed
|
|
3, 7, 11 Motor execution: left vs right hand
|
|
4, 8, 12 Motor imagery: left vs right hand
|
|
5, 9, 13 Motor execution: hands vs feet
|
|
6, 10, 14 Motor imagery: hands vs feet
|
|
========= ===================================
|
|
|
|
For example, one could do::
|
|
|
|
>>> from mne.datasets import eegbci
|
|
>>> eegbci.load_data(1, [6, 10, 14], "~/datasets") # doctest:+SKIP
|
|
|
|
This would download runs 6, 10, and 14 (hand/foot motor imagery) runs from subject 1
|
|
in the EEGBCI dataset to "~/datasets" and prompt the user to store this path in the
|
|
config (if it does not already exist).
|
|
|
|
References
|
|
----------
|
|
.. footbibliography::
|
|
"""
|
|
import pooch
|
|
|
|
t0 = time.time()
|
|
|
|
if not hasattr(runs, "__iter__"):
|
|
runs = [runs]
|
|
|
|
# get local storage path
|
|
config_key = "MNE_DATASETS_EEGBCI_PATH"
|
|
folder = "MNE-eegbci-data"
|
|
name = "EEGBCI"
|
|
path = _get_path(path, config_key, name)
|
|
|
|
# extract path parts
|
|
pattern = r"(?:https?://.*)(files)/(eegmmidb)/(\d+\.\d+\.\d+)/?"
|
|
match = re.compile(pattern).match(base_url)
|
|
if match is None:
|
|
raise ValueError(
|
|
"base_url does not match the expected EEGMI folder "
|
|
"structure. Please notify MNE-Python developers."
|
|
)
|
|
base_path = op.join(path, folder, *match.groups())
|
|
|
|
# create the download manager
|
|
fetcher = pooch.create(
|
|
path=base_path,
|
|
base_url=base_url,
|
|
version=None, # data versioning is decoupled from MNE-Python version
|
|
registry=None, # registry is loaded from file (below)
|
|
retry_if_failed=2, # 2 retries = 3 total attempts
|
|
)
|
|
|
|
# load the checksum registry
|
|
registry = files("mne").joinpath("data", "eegbci_checksums.txt")
|
|
fetcher.load_registry(registry)
|
|
|
|
# fetch the file(s)
|
|
data_paths = []
|
|
sz = 0
|
|
for run in runs:
|
|
file_part = f"S{subject:03d}/S{subject:03d}R{run:02d}.edf"
|
|
destination = Path(base_path, file_part)
|
|
data_paths.append(destination)
|
|
if destination.exists():
|
|
if force_update:
|
|
destination.unlink()
|
|
else:
|
|
continue
|
|
if sz == 0: # log once
|
|
logger.info("Downloading EEGBCI data")
|
|
fetcher.fetch(file_part)
|
|
# update path in config if desired
|
|
sz += destination.stat().st_size
|
|
_do_path_update(path, update_path, config_key, name)
|
|
if sz > 0:
|
|
_log_time_size(t0, sz)
|
|
return data_paths
|
|
|
|
|
|
def standardize(raw):
|
|
"""Standardize channel positions and names.
|
|
|
|
Parameters
|
|
----------
|
|
raw : instance of Raw
|
|
The raw data to standardize. Operates in-place.
|
|
"""
|
|
rename = dict()
|
|
for name in raw.ch_names:
|
|
std_name = name.strip(".")
|
|
std_name = std_name.upper()
|
|
if std_name.endswith("Z"):
|
|
std_name = std_name[:-1] + "z"
|
|
if std_name.startswith("FP"):
|
|
std_name = "Fp" + std_name[2:]
|
|
rename[name] = std_name
|
|
raw.rename_channels(rename)
|