# Authors: The MNE-Python contributors. # License: BSD-3-Clause # Copyright the MNE-Python contributors. import os import os.path as op import numpy as np from ...utils import _check_pandas_installed, _on_missing, _TempDir, verbose from ..utils import _downloader_params, _get_path AGE_SLEEP_RECORDS = op.join(op.dirname(__file__), "age_records.csv") TEMAZEPAM_SLEEP_RECORDS = op.join(op.dirname(__file__), "temazepam_records.csv") TEMAZEPAM_RECORDS_URL = ( "https://physionet.org/physiobank/database/sleep-edfx/ST-subjects.xls" # noqa: E501 ) TEMAZEPAM_RECORDS_URL_SHA1 = "f52fffe5c18826a2bd4c5d5cb375bb4a9008c885" AGE_RECORDS_URL = "https://physionet.org/physiobank/database/sleep-edfx/SC-subjects.xls" AGE_RECORDS_URL_SHA1 = "0ba6650892c5d33a8e2b3f62ce1cc9f30438c54f" sha1sums_fname = op.join(op.dirname(__file__), "SHA1SUMS") def _fetch_one(fname, hashsum, path, force_update, base_url): import pooch # Fetch the file url = base_url + "/" + fname destination = op.join(path, fname) if op.isfile(destination) and not force_update: return destination, False if op.isfile(destination): os.remove(destination) if not op.isdir(op.dirname(destination)): os.makedirs(op.dirname(destination)) downloader = pooch.HTTPDownloader(**_downloader_params()) pooch.retrieve( url=url, known_hash=f"sha1:{hashsum}", path=path, downloader=downloader, fname=fname, ) return destination, True @verbose def _data_path(path=None, verbose=None): """Get path to local copy of EEG Physionet age Polysomnography dataset URL. This is a low-level function useful for getting a local copy of a remote Polysomnography dataset :footcite:`KempEtAl2000` which is available at PhysioNet :footcite:`GoldbergerEtAl2000`. Parameters ---------- path : None | str Location of where to look for the data storing location. If None, the environment variable or config parameter ``PHYSIONET_SLEEP_PATH`` is used. If it doesn't exist, the "~/mne_data" directory is used. If the dataset is not found under the given path, the data will be automatically downloaded to the specified folder. %(verbose)s Returns ------- path : list of Path Local path to the given data file. This path is contained inside a list of length one, for compatibility. References ---------- .. footbibliography:: """ # noqa: E501 key = "PHYSIONET_SLEEP_PATH" name = "PHYSIONET_SLEEP" path = _get_path(path, key, name) return op.join(path, "physionet-sleep-data") def _update_sleep_temazepam_records(fname=TEMAZEPAM_SLEEP_RECORDS): """Help function to download Physionet's temazepam dataset records.""" import pooch pd = _check_pandas_installed() tmp = _TempDir() # Download subjects info. subjects_fname = op.join(tmp, "ST-subjects.xls") downloader = pooch.HTTPDownloader(**_downloader_params()) pooch.retrieve( url=TEMAZEPAM_RECORDS_URL, known_hash=f"sha1:{TEMAZEPAM_RECORDS_URL_SHA1}", path=tmp, downloader=downloader, fname=op.basename(subjects_fname), ) # Load and Massage the checksums. sha1_df = pd.read_csv( sha1sums_fname, sep=" ", header=None, names=["sha", "fname"], engine="python" ) select_age_records = sha1_df.fname.str.startswith( "ST" ) & sha1_df.fname.str.endswith("edf") sha1_df = sha1_df[select_age_records] sha1_df["id"] = [name[:6] for name in sha1_df.fname] # Load and massage the data. data = pd.read_excel(subjects_fname, header=[0, 1]) data = data.set_index(("Subject - age - sex", "Nr")) data.index.name = "subject" data.columns.names = [None, None] data = ( data.set_index( [("Subject - age - sex", "Age"), ("Subject - age - sex", "M1/F2")], append=True, ) .stack(level=0) .reset_index() ) data = data.rename( columns={ ("Subject - age - sex", "Age"): "age", ("Subject - age - sex", "M1/F2"): "sex", "level_3": "drug", } ) data["id"] = [f"ST7{s:02d}{n:1d}" for s, n in zip(data.subject, data["night nr"])] data = pd.merge(sha1_df, data, how="outer", on="id") data["record type"] = ( data.fname.str.split("-", expand=True)[1] .str.split(".", expand=True)[0] .astype("category") ) data = data.set_index( ["id", "subject", "age", "sex", "drug", "lights off", "night nr", "record type"] ).unstack() data.columns = [l1 + "_" + l2 for l1, l2 in data.columns] data = data.reset_index().drop(columns=["id"]) data["sex"] = data.sex.astype("category").cat.rename_categories( {1: "male", 2: "female"} ) data["drug"] = data["drug"].str.split(expand=True)[0] data["subject_orig"] = data["subject"] data["subject"] = data.index // 2 # to make sure index is from 0 to 21 # Save the data. data.to_csv(fname, index=False) def _update_sleep_age_records(fname=AGE_SLEEP_RECORDS): """Help function to download Physionet's age dataset records.""" import pooch pd = _check_pandas_installed() tmp = _TempDir() # Download subjects info. subjects_fname = op.join(tmp, "SC-subjects.xls") downloader = pooch.HTTPDownloader(**_downloader_params()) pooch.retrieve( url=AGE_RECORDS_URL, known_hash=f"sha1:{AGE_RECORDS_URL_SHA1}", path=tmp, downloader=downloader, fname=op.basename(subjects_fname), ) # Load and Massage the checksums. sha1_df = pd.read_csv( sha1sums_fname, sep=" ", header=None, names=["sha", "fname"], engine="python" ) select_age_records = sha1_df.fname.str.startswith( "SC" ) & sha1_df.fname.str.endswith("edf") sha1_df = sha1_df[select_age_records] sha1_df["id"] = [name[:6] for name in sha1_df.fname] # Load and massage the data. data = pd.read_excel(subjects_fname) data = data.rename( index=str, columns={"sex (F=1)": "sex", "LightsOff": "lights off"} ) data["sex"] = data.sex.astype("category").cat.rename_categories( {1: "female", 2: "male"} ) data["id"] = [f"SC4{s:02d}{n:1d}" for s, n in zip(data.subject, data.night)] data = data.set_index("id").join(sha1_df.set_index("id")).dropna() data["record type"] = ( data.fname.str.split("-", expand=True)[1] .str.split(".", expand=True)[0] .astype("category") ) data = data.reset_index().drop(columns=["id"]) data = data[ ["subject", "night", "record type", "age", "sex", "lights off", "sha", "fname"] ] # Save the data. data.to_csv(fname, index=False) def _check_subjects(subjects, n_subjects, missing=None, on_missing="raise"): """Check whether subjects are available. Parameters ---------- subjects : list Subject numbers to be checked. n_subjects : int Number of subjects available. missing : list | None Subject numbers that are missing. on_missing : 'raise' | 'warn' | 'ignore' What to do if one or several subjects are not available. Valid keys are 'raise' | 'warn' | 'ignore'. Default is 'error'. If on_missing is 'warn' it will proceed but warn, if 'ignore' it will proceed silently. """ valid_subjects = np.arange(n_subjects) if missing is not None: valid_subjects = np.setdiff1d(valid_subjects, missing) unknown_subjects = np.setdiff1d(subjects, valid_subjects) if unknown_subjects.size > 0: subjects_list = ", ".join([str(s) for s in unknown_subjects]) msg = ( f"This dataset contains subjects 0 to {n_subjects - 1} with " f"missing subjects {missing}. Unknown subjects: " f"{subjects_list}." ) _on_missing(on_missing, msg)