# Authors: The MNE-Python contributors. # License: BSD-3-Clause # Copyright the MNE-Python contributors. """EGI NetStation Load Function.""" import datetime import math import os.path as op import re from collections import OrderedDict from pathlib import Path import numpy as np from ..._fiff.constants import FIFF from ..._fiff.meas_info import _empty_info, _ensure_meas_date_none_or_dt, create_info from ..._fiff.proj import setup_proj from ..._fiff.utils import _create_chs, _mult_cal_one from ...annotations import Annotations from ...channels.montage import make_dig_montage from ...evoked import EvokedArray from ...utils import _check_fname, _check_option, _soft_import, logger, verbose, warn from ..base import BaseRaw from .events import _combine_triggers, _read_events, _triage_include_exclude from .general import ( _block_r, _extract, _get_blocks, _get_ep_info, _get_gains, _get_signalfname, ) REFERENCE_NAMES = ("VREF", "Vertex Reference") def _read_mff_header(filepath): """Read mff header.""" _soft_import("defusedxml", "reading EGI MFF data") from defusedxml.minidom import parse all_files = _get_signalfname(filepath) eeg_file = all_files["EEG"]["signal"] eeg_info_file = all_files["EEG"]["info"] info_filepath = op.join(filepath, "info.xml") # add with filepath tags = ["mffVersion", "recordTime"] version_and_date = _extract(tags, filepath=info_filepath) version = "" if len(version_and_date["mffVersion"]): version = version_and_date["mffVersion"][0] fname = op.join(filepath, eeg_file) signal_blocks = _get_blocks(fname) epochs = _get_ep_info(filepath) summaryinfo = dict(eeg_fname=eeg_file, info_fname=eeg_info_file) summaryinfo.update(signal_blocks) # sanity check and update relevant values record_time = version_and_date["recordTime"][0] # e.g., # 2018-07-30T10:47:01.021673-04:00 # 2017-09-20T09:55:44.072000000+01:00 g = re.match( r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.(\d{6}(?:\d{3})?)[+-]\d{2}:\d{2}", # noqa: E501 record_time, ) if g is None: raise RuntimeError(f"Could not parse recordTime {repr(record_time)}") frac = g.groups()[0] assert len(frac) in (6, 9) and all(f.isnumeric() for f in frac) # regex div = 1000 if len(frac) == 6 else 1000000 for key in ("last_samps", "first_samps"): # convert from times in µS to samples for ei, e in enumerate(epochs[key]): if e % div != 0: raise RuntimeError(f"Could not parse epoch time {e}") epochs[key][ei] = e // div epochs[key] = np.array(epochs[key], np.uint64) # I guess they refer to times in milliseconds? # What we really need to do here is: # epochs[key] *= signal_blocks['sfreq'] # epochs[key] //= 1000 # But that multiplication risks an overflow, so let's only multiply # by what we need to (e.g., a sample rate of 500 means we can multiply # by 1 and divide by 2 rather than multiplying by 500 and dividing by # 1000) numerator = int(signal_blocks["sfreq"]) denominator = 1000 this_gcd = math.gcd(numerator, denominator) numerator = numerator // this_gcd denominator = denominator // this_gcd with np.errstate(over="raise"): epochs[key] *= numerator epochs[key] //= denominator # Should be safe to cast to int now, which makes things later not # upbroadcast to float epochs[key] = epochs[key].astype(np.int64) n_samps_block = signal_blocks["samples_block"].sum() n_samps_epochs = (epochs["last_samps"] - epochs["first_samps"]).sum() bad = ( n_samps_epochs != n_samps_block or not (epochs["first_samps"] < epochs["last_samps"]).all() or not (epochs["first_samps"][1:] >= epochs["last_samps"][:-1]).all() ) if bad: raise RuntimeError( "EGI epoch first/last samps could not be parsed:\n" f'{list(epochs["first_samps"])}\n{list(epochs["last_samps"])}' ) summaryinfo.update(epochs) # index which samples in raw are actually readable from disk (i.e., not # in a skip) disk_samps = np.full(epochs["last_samps"][-1], -1) offset = 0 for first, last in zip(epochs["first_samps"], epochs["last_samps"]): n_this = last - first disk_samps[first:last] = np.arange(offset, offset + n_this) offset += n_this summaryinfo["disk_samps"] = disk_samps # Add the sensor info. sensor_layout_file = op.join(filepath, "sensorLayout.xml") sensor_layout_obj = parse(sensor_layout_file) summaryinfo["device"] = sensor_layout_obj.getElementsByTagName("name")[ 0 ].firstChild.data sensors = sensor_layout_obj.getElementsByTagName("sensor") chan_type = list() chan_unit = list() n_chans = 0 numbers = list() # used for identification for sensor in sensors: sensortype = int(sensor.getElementsByTagName("type")[0].firstChild.data) if sensortype in [0, 1]: sn = sensor.getElementsByTagName("number")[0].firstChild.data sn = sn.encode() numbers.append(sn) chan_type.append("eeg") chan_unit.append("uV") n_chans = n_chans + 1 if n_chans != summaryinfo["n_channels"]: raise RuntimeError( "Number of defined channels (%d) did not match the " "expected channels (%d)" % (n_chans, summaryinfo["n_channels"]) ) # Check presence of PNS data pns_names = [] if "PNS" in all_files: pns_fpath = op.join(filepath, all_files["PNS"]["signal"]) pns_blocks = _get_blocks(pns_fpath) pns_samples = pns_blocks["samples_block"] signal_samples = signal_blocks["samples_block"] same_blocks = np.array_equal( pns_samples[:-1], signal_samples[:-1] ) and pns_samples[-1] in (signal_samples[-1] - np.arange(2)) if not same_blocks: raise RuntimeError( "PNS and signals samples did not match:\n" f"{list(pns_samples)}\nvs\n{list(signal_samples)}" ) pns_file = op.join(filepath, "pnsSet.xml") pns_obj = parse(pns_file) sensors = pns_obj.getElementsByTagName("sensor") pns_types = [] pns_units = [] for sensor in sensors: # sensor number: # sensor.getElementsByTagName('number')[0].firstChild.data name = sensor.getElementsByTagName("name")[0].firstChild.data unit_elem = sensor.getElementsByTagName("unit")[0].firstChild unit = "" if unit_elem is not None: unit = unit_elem.data if name == "ECG": ch_type = "ecg" elif "EMG" in name: ch_type = "emg" else: ch_type = "bio" pns_types.append(ch_type) pns_units.append(unit) pns_names.append(name) summaryinfo.update( pns_types=pns_types, pns_units=pns_units, pns_fname=all_files["PNS"]["signal"], pns_sample_blocks=pns_blocks, ) summaryinfo.update( pns_names=pns_names, version=version, date=version_and_date["recordTime"][0], chan_type=chan_type, chan_unit=chan_unit, numbers=numbers, ) return summaryinfo def _read_header(input_fname): """Obtain the headers from the file package mff. Parameters ---------- input_fname : path-like Path for the file Returns ------- info : dict Main headers set. """ input_fname = str(input_fname) # cast to str any Paths mff_hdr = _read_mff_header(input_fname) with open(input_fname + "/signal1.bin", "rb") as fid: version = np.fromfile(fid, np.int32, 1)[0] """ the datetime.strptime .f directive (milleseconds) will only accept up to 6 digits. if there are more than six millesecond digits in the provided timestamp string (i.e. because of trailing zeros, as in test_egi_pns.mff) then slice both the first 26 elements and the last 6 elements of the timestamp string to truncate the milleseconds to 6 digits and extract the timezone, and then piece these together and assign back to mff_hdr['date'] """ if len(mff_hdr["date"]) > 32: dt, tz = [mff_hdr["date"][:26], mff_hdr["date"][-6:]] mff_hdr["date"] = dt + tz time_n = datetime.datetime.strptime(mff_hdr["date"], "%Y-%m-%dT%H:%M:%S.%f%z") info = dict( version=version, meas_dt_local=time_n, utc_offset=time_n.strftime("%z"), gain=0, bits=0, value_range=0, ) info.update( n_categories=0, n_segments=1, n_events=0, event_codes=[], category_names=[], category_lengths=[], pre_baseline=0, ) info.update(mff_hdr) return info def _get_eeg_calibration_info(filepath, egi_info): """Calculate calibration info for EEG channels.""" gains = _get_gains(op.join(filepath, egi_info["info_fname"])) if egi_info["value_range"] != 0 and egi_info["bits"] != 0: cals = [egi_info["value_range"] / 2 ** egi_info["bits"]] * len( egi_info["chan_type"] ) else: cal_scales = {"uV": 1e-6, "V": 1} cals = [cal_scales[t] for t in egi_info["chan_unit"]] if "gcal" in gains: cals *= gains["gcal"] return cals def _read_locs(filepath, egi_info, channel_naming): """Read channel locations.""" _soft_import("defusedxml", "reading EGI MFF data") from defusedxml.minidom import parse fname = op.join(filepath, "coordinates.xml") if not op.exists(fname): logger.warn("File coordinates.xml not found, not setting channel locations") ch_names = [channel_naming % (i + 1) for i in range(egi_info["n_channels"])] return ch_names, None dig_ident_map = { "Left periauricular point": "lpa", "Right periauricular point": "rpa", "Nasion": "nasion", } numbers = np.array(egi_info["numbers"]) coordinates = parse(fname) sensors = coordinates.getElementsByTagName("sensor") ch_pos = OrderedDict() hsp = list() nlr = dict() ch_names = list() for sensor in sensors: name_element = sensor.getElementsByTagName("name")[0].firstChild num_element = sensor.getElementsByTagName("number")[0].firstChild name = ( channel_naming % int(num_element.data) if name_element is None else name_element.data ) nr = num_element.data.encode() coords = [ float(sensor.getElementsByTagName(coord)[0].firstChild.data) for coord in "xyz" ] loc = np.array(coords) / 100 # cm -> m # create dig entry if name in dig_ident_map: nlr[dig_ident_map[name]] = loc else: # id_ is the index of the channel in egi_info['numbers'] id_ = np.flatnonzero(numbers == nr) # if it's not in egi_info['numbers'], it's a headshape point if len(id_) == 0: hsp.append(loc) # not HSP, must be a data or reference channel else: ch_names.append(name) ch_pos[name] = loc mon = make_dig_montage(ch_pos=ch_pos, hsp=hsp, **nlr) return ch_names, mon def _add_pns_channel_info(chs, egi_info, ch_names): """Add info for PNS channels to channel info dict.""" for i_ch, ch_name in enumerate(egi_info["pns_names"]): idx = ch_names.index(ch_name) ch_type = egi_info["pns_types"][i_ch] type_to_kind_map = {"ecg": FIFF.FIFFV_ECG_CH, "emg": FIFF.FIFFV_EMG_CH} ch_kind = type_to_kind_map.get(ch_type, FIFF.FIFFV_BIO_CH) ch_unit = FIFF.FIFF_UNIT_V ch_cal = 1e-6 if egi_info["pns_units"][i_ch] != "uV": ch_unit = FIFF.FIFF_UNIT_NONE ch_cal = 1.0 chs[idx].update( cal=ch_cal, kind=ch_kind, coil_type=FIFF.FIFFV_COIL_NONE, unit=ch_unit ) return chs @verbose def _read_raw_egi_mff( input_fname, eog=None, misc=None, include=None, exclude=None, preload=False, channel_naming="E%d", *, events_as_annotations=False, verbose=None, ): """Read EGI mff binary as raw object.""" return RawMff( input_fname, eog, misc, include, exclude, preload, channel_naming, events_as_annotations=events_as_annotations, verbose=verbose, ) class RawMff(BaseRaw): """RawMff class.""" _extra_attributes = ("event_id",) @verbose def __init__( self, input_fname, eog=None, misc=None, include=None, exclude=None, preload=False, channel_naming="E%d", *, events_as_annotations=True, verbose=None, ): """Init the RawMff class.""" input_fname = str( _check_fname( input_fname, "read", True, "input_fname", need_dir=True, ) ) logger.info(f"Reading EGI MFF Header from {input_fname}...") egi_info = _read_header(input_fname) if eog is None: eog = [] if misc is None: misc = np.where(np.array(egi_info["chan_type"]) != "eeg")[0].tolist() logger.info(" Reading events ...") egi_events, egi_info, mff_events = _read_events(input_fname, egi_info) cals = _get_eeg_calibration_info(input_fname, egi_info) logger.info(" Assembling measurement info ...") event_codes = egi_info["event_codes"] include = _triage_include_exclude(include, exclude, egi_events, egi_info) if egi_info["n_events"] > 0 and not events_as_annotations: logger.info(' Synthesizing trigger channel "STI 014" ...') if all(ch.startswith("D") for ch in include): # support the DIN format DIN1, DIN2, ..., DIN9, DI10, DI11, ... DI99, # D100, D101, ..., D255 that we get when sending 0-255 triggers on a # parallel port. events_ids = list() for ch in include: while not ch[0].isnumeric(): ch = ch[1:] events_ids.append(int(ch)) else: events_ids = np.arange(len(include)) + 1 egi_info["new_trigger"] = _combine_triggers( egi_events[[c in include for c in event_codes]], remapping=events_ids ) self.event_id = dict( zip([e for e in event_codes if e in include], events_ids) ) if egi_info["new_trigger"] is not None: egi_events = np.vstack([egi_events, egi_info["new_trigger"]]) else: self.event_id = None egi_info["new_trigger"] = None assert egi_events.shape[1] == egi_info["last_samps"][-1] meas_dt_utc = egi_info["meas_dt_local"].astimezone(datetime.timezone.utc) info = _empty_info(egi_info["sfreq"]) info["meas_date"] = _ensure_meas_date_none_or_dt(meas_dt_utc) info["utc_offset"] = egi_info["utc_offset"] info["device_info"] = dict(type=egi_info["device"]) # read in the montage, if it exists ch_names, mon = _read_locs(input_fname, egi_info, channel_naming) # Second: Stim ch_names.extend(list(egi_info["event_codes"])) n_extra = len(event_codes) + len(misc) + len(eog) + len(egi_info["pns_names"]) if egi_info["new_trigger"] is not None: ch_names.append("STI 014") # channel for combined events n_extra += 1 # Third: PNS ch_names.extend(egi_info["pns_names"]) cals = np.concatenate([cals, np.ones(n_extra)]) assert len(cals) == len(ch_names), (len(cals), len(ch_names)) # Actually create channels as EEG, then update stim and PNS ch_coil = FIFF.FIFFV_COIL_EEG ch_kind = FIFF.FIFFV_EEG_CH chs = _create_chs(ch_names, cals, ch_coil, ch_kind, eog, (), (), misc) sti_ch_idx = [ i for i, name in enumerate(ch_names) if name.startswith("STI") or name in event_codes ] for idx in sti_ch_idx: chs[idx].update( { "unit_mul": FIFF.FIFF_UNITM_NONE, "cal": cals[idx], "kind": FIFF.FIFFV_STIM_CH, "coil_type": FIFF.FIFFV_COIL_NONE, "unit": FIFF.FIFF_UNIT_NONE, } ) chs = _add_pns_channel_info(chs, egi_info, ch_names) info["chs"] = chs info._unlocked = False info._update_redundant() if mon is not None: info.set_montage(mon, on_missing="ignore") ref_idx = np.flatnonzero(np.isin(mon.ch_names, REFERENCE_NAMES)) if len(ref_idx): ref_idx = ref_idx.item() ref_coords = info["chs"][int(ref_idx)]["loc"][:3] for chan in info["chs"]: if chan["kind"] == FIFF.FIFFV_EEG_CH: chan["loc"][3:6] = ref_coords file_bin = op.join(input_fname, egi_info["eeg_fname"]) egi_info["egi_events"] = egi_events # Check how many channels to read are from EEG keys = ("eeg", "sti", "pns") idx = dict() idx["eeg"] = np.where([ch["kind"] == FIFF.FIFFV_EEG_CH for ch in chs])[0] idx["sti"] = np.where([ch["kind"] == FIFF.FIFFV_STIM_CH for ch in chs])[0] idx["pns"] = np.where( [ ch["kind"] in (FIFF.FIFFV_ECG_CH, FIFF.FIFFV_EMG_CH, FIFF.FIFFV_BIO_CH) for ch in chs ] )[0] # By construction this should always be true, but check anyway if not np.array_equal( np.concatenate([idx[key] for key in keys]), np.arange(len(chs)) ): raise ValueError( "Currently interlacing EEG and PNS channels is not supported" ) egi_info["kind_bounds"] = [0] for key in keys: egi_info["kind_bounds"].append(len(idx[key])) egi_info["kind_bounds"] = np.cumsum(egi_info["kind_bounds"]) assert egi_info["kind_bounds"][0] == 0 assert egi_info["kind_bounds"][-1] == info["nchan"] first_samps = [0] last_samps = [egi_info["last_samps"][-1] - 1] annot = dict(onset=list(), duration=list(), description=list()) if len(idx["pns"]): # PNS Data is present and should be read: egi_info["pns_filepath"] = op.join(input_fname, egi_info["pns_fname"]) # Check for PNS bug immediately pns_samples = np.sum(egi_info["pns_sample_blocks"]["samples_block"]) eeg_samples = np.sum(egi_info["samples_block"]) if pns_samples == eeg_samples - 1: warn("This file has the EGI PSG sample bug") annot["onset"].append(last_samps[-1] / egi_info["sfreq"]) annot["duration"].append(1 / egi_info["sfreq"]) annot["description"].append("BAD_EGI_PSG") elif pns_samples != eeg_samples: raise RuntimeError( "PNS samples (%d) did not match EEG samples (%d)" % (pns_samples, eeg_samples) ) self._filenames = [file_bin] self._raw_extras = [egi_info] super().__init__( info, preload=preload, orig_format="single", filenames=[file_bin], first_samps=first_samps, last_samps=last_samps, raw_extras=[egi_info], verbose=verbose, ) # Annotate acquisition skips for first, prev_last in zip( egi_info["first_samps"][1:], egi_info["last_samps"][:-1] ): gap = first - prev_last assert gap >= 0 if gap: annot["onset"].append((prev_last - 0.5) / egi_info["sfreq"]) annot["duration"].append(gap / egi_info["sfreq"]) annot["description"].append("BAD_ACQ_SKIP") # create events from annotations if events_as_annotations: for code, samples in mff_events.items(): if code not in include: continue annot["onset"].extend(np.array(samples) / egi_info["sfreq"]) annot["duration"].extend([0.0] * len(samples)) annot["description"].extend([code] * len(samples)) if len(annot["onset"]): self.set_annotations(Annotations(**annot)) def _read_segment_file(self, data, idx, fi, start, stop, cals, mult): """Read a chunk of data.""" logger.debug(f"Reading MFF {start:6d} ... {stop:6d} ...") dtype = "= bounds[1]) & (idx < bounds[2]))[0] stim_one = idx[stim_out] stim_in = idx[stim_out] - bounds[1] pns_out = np.where((idx >= bounds[2]) & (idx < bounds[3]))[0] pns_in = idx[pns_out] - bounds[2] pns_one = idx[pns_out, np.newaxis] del eeg_out, stim_out, pns_out # take into account events (already extended to correct size) one[stim_one, :] = egi_info["egi_events"][stim_in, start:stop] # Convert start and stop to limits in terms of the data # actually on disk, plus an indexer (disk_use_idx) that populates # the potentially larger `data` with it, taking skips into account disk_samps = egi_info["disk_samps"][start:stop] disk_use_idx = np.where(disk_samps > -1)[0] # short circuit in case we don't need any samples if not len(disk_use_idx): _mult_cal_one(data, one, idx, cals, mult) return start = disk_samps[disk_use_idx[0]] stop = disk_samps[disk_use_idx[-1]] + 1 assert len(disk_use_idx) == stop - start # Get starting/stopping block/samples block_samples_offset = np.cumsum(samples_block) offset_blocks = np.sum(block_samples_offset <= start) offset_samples = start - ( block_samples_offset[offset_blocks - 1] if offset_blocks > 0 else 0 ) # TODO: Refactor this reading with the PNS reading in a single function # (DRY) samples_to_read = stop - start with open(self._filenames[fi], "rb", buffering=0) as fid: # Go to starting block current_block = 0 current_block_info = None current_data_sample = 0 while current_block < offset_blocks: this_block_info = _block_r(fid) if this_block_info is not None: current_block_info = this_block_info fid.seek(current_block_info["block_size"], 1) current_block += 1 # Start reading samples while samples_to_read > 0: logger.debug(f" Reading from block {current_block}") this_block_info = _block_r(fid) current_block += 1 if this_block_info is not None: current_block_info = this_block_info to_read = current_block_info["nsamples"] * current_block_info["nc"] block_data = np.fromfile(fid, dtype, to_read) block_data = block_data.reshape(n_channels, -1, order="C") # Compute indexes samples_read = block_data.shape[1] logger.debug(f" Read {samples_read} samples") logger.debug(f" Offset {offset_samples} samples") if offset_samples > 0: # First block read, skip to the offset: block_data = block_data[:, offset_samples:] samples_read = samples_read - offset_samples offset_samples = 0 if samples_to_read < samples_read: # Last block to read, skip the last samples block_data = block_data[:, :samples_to_read] samples_read = samples_to_read logger.debug(f" Keep {samples_read} samples") s_start = current_data_sample s_end = s_start + samples_read one[eeg_one, disk_use_idx[s_start:s_end]] = block_data[eeg_in] samples_to_read = samples_to_read - samples_read current_data_sample = current_data_sample + samples_read if len(pns_one) > 0: # PNS Data is present and should be read: pns_filepath = egi_info["pns_filepath"] pns_info = egi_info["pns_sample_blocks"] n_channels = pns_info["n_channels"] samples_block = pns_info["samples_block"] # Get starting/stopping block/samples block_samples_offset = np.cumsum(samples_block) offset_blocks = np.sum(block_samples_offset < start) offset_samples = start - ( block_samples_offset[offset_blocks - 1] if offset_blocks > 0 else 0 ) samples_to_read = stop - start with open(pns_filepath, "rb", buffering=0) as fid: # Check file size fid.seek(0, 2) file_size = fid.tell() fid.seek(0) # Go to starting block current_block = 0 current_block_info = None current_data_sample = 0 while current_block < offset_blocks: this_block_info = _block_r(fid) if this_block_info is not None: current_block_info = this_block_info fid.seek(current_block_info["block_size"], 1) current_block += 1 # Start reading samples while samples_to_read > 0: if samples_to_read == 1 and fid.tell() == file_size: # We are in the presence of the EEG bug # fill with zeros and break the loop one[pns_one, -1] = 0 break this_block_info = _block_r(fid) if this_block_info is not None: current_block_info = this_block_info to_read = current_block_info["nsamples"] * current_block_info["nc"] block_data = np.fromfile(fid, dtype, to_read) block_data = block_data.reshape(n_channels, -1, order="C") # Compute indexes samples_read = block_data.shape[1] if offset_samples > 0: # First block read, skip to the offset: block_data = block_data[:, offset_samples:] samples_read = samples_read - offset_samples offset_samples = 0 if samples_to_read < samples_read: # Last block to read, skip the last samples block_data = block_data[:, :samples_to_read] samples_read = samples_to_read s_start = current_data_sample s_end = s_start + samples_read one[pns_one, disk_use_idx[s_start:s_end]] = block_data[pns_in] samples_to_read = samples_to_read - samples_read current_data_sample = current_data_sample + samples_read # do the calibration _mult_cal_one(data, one, idx, cals, mult) @verbose def read_evokeds_mff( fname, condition=None, channel_naming="E%d", baseline=None, verbose=None ): """Read averaged MFF file as EvokedArray or list of EvokedArray. Parameters ---------- fname : path-like File path to averaged MFF file. Should end in ``.mff``. condition : int or str | list of int or str | None The index (indices) or category (categories) from which to read in data. Averaged MFF files can contain separate averages for different categories. These can be indexed by the block number or the category name. If ``condition`` is a list or None, a list of EvokedArray objects is returned. channel_naming : str Channel naming convention for EEG channels. Defaults to 'E%%d' (resulting in channel names 'E1', 'E2', 'E3'...). baseline : None (default) or tuple of length 2 The time interval to apply baseline correction. If None do not apply it. If baseline is (a, b) the interval is between "a (s)" and "b (s)". If a is None the beginning of the data is used and if b is None then b is set to the end of the interval. If baseline is equal to (None, None) all the time interval is used. Correction is applied by computing mean of the baseline period and subtracting it from the data. The baseline (a, b) includes both endpoints, i.e. all timepoints t such that a <= t <= b. %(verbose)s Returns ------- evoked : EvokedArray or list of EvokedArray The evoked dataset(s); one EvokedArray if condition is int or str, or list of EvokedArray if condition is None or list. Raises ------ ValueError If ``fname`` has file extension other than '.mff'. ValueError If the MFF file specified by ``fname`` is not averaged. ValueError If no categories.xml file in MFF directory specified by ``fname``. See Also -------- Evoked, EvokedArray, create_info Notes ----- .. versionadded:: 0.22 """ mffpy = _import_mffpy() # Confirm `fname` is a path to an MFF file fname = Path(fname) # should be replace with _check_fname if not fname.suffix == ".mff": raise ValueError('fname must be an MFF file with extension ".mff".') # Confirm the input MFF is averaged mff = mffpy.Reader(fname) try: flavor = mff.mff_flavor except AttributeError: # < 6.3 flavor = mff.flavor if flavor not in ("averaged", "segmented"): # old, new names raise ValueError( f"{fname} is a {flavor} MFF file. " "fname must be the path to an averaged MFF file." ) # Check for categories.xml file if "categories.xml" not in mff.directory.listdir(): raise ValueError( "categories.xml not found in MFF directory. " f"{fname} may not be an averaged MFF file." ) return_list = True if condition is None: categories = mff.categories.categories condition = list(categories.keys()) elif not isinstance(condition, list): condition = [condition] return_list = False logger.info(f"Reading {len(condition)} evoked datasets from {fname} ...") output = [ _read_evoked_mff( fname, c, channel_naming=channel_naming, verbose=verbose ).apply_baseline(baseline) for c in condition ] return output if return_list else output[0] def _read_evoked_mff(fname, condition, channel_naming="E%d", verbose=None): """Read evoked data from MFF file.""" import mffpy egi_info = _read_header(fname) mff = mffpy.Reader(fname) categories = mff.categories.categories if isinstance(condition, str): # Condition is interpreted as category name category = _check_option( "condition", condition, categories, extra="provided as category name" ) epoch = mff.epochs[category] elif isinstance(condition, int): # Condition is interpreted as epoch index try: epoch = mff.epochs[condition] except IndexError: raise ValueError( f'"condition" parameter ({condition}), provided ' "as epoch index, is out of range for available " f"epochs ({len(mff.epochs)})." ) category = epoch.name else: raise TypeError('"condition" parameter must be either int or str.') # Read in signals from the target epoch data = mff.get_physical_samples_from_epoch(epoch) eeg_data, t0 = data["EEG"] if "PNSData" in data: pns_data, t0 = data["PNSData"] all_data = np.vstack((eeg_data, pns_data)) ch_types = egi_info["chan_type"] + egi_info["pns_types"] else: all_data = eeg_data ch_types = egi_info["chan_type"] all_data *= 1e-6 # convert to volts # Load metadata into info object # Exclude info['meas_date'] because record time info in # averaged MFF is the time of the averaging, not true record time. ch_names, mon = _read_locs(fname, egi_info, channel_naming) ch_names.extend(egi_info["pns_names"]) info = create_info(ch_names, mff.sampling_rates["EEG"], ch_types) with info._unlock(): info["device_info"] = dict(type=egi_info["device"]) info["nchan"] = sum(mff.num_channels.values()) # Add individual channel info # Get calibration info for EEG channels cals = _get_eeg_calibration_info(fname, egi_info) # Initialize calibration for PNS channels, will be updated later cals = np.concatenate([cals, np.repeat(1, len(egi_info["pns_names"]))]) ch_coil = FIFF.FIFFV_COIL_EEG ch_kind = FIFF.FIFFV_EEG_CH chs = _create_chs(ch_names, cals, ch_coil, ch_kind, (), (), (), ()) # Update PNS channel info chs = _add_pns_channel_info(chs, egi_info, ch_names) with info._unlock(): info["chs"] = chs if mon is not None: info.set_montage(mon, on_missing="ignore") # Add bad channels to info info["description"] = category try: channel_status = categories[category][0]["channelStatus"] except KeyError: warn( f"Channel status data not found for condition {category}. " "No channels will be marked as bad.", category=UserWarning, ) channel_status = None bads = [] if channel_status: for entry in channel_status: if entry["exclusion"] == "badChannels": if entry["signalBin"] == 1: # Add bad EEG channels for ch in entry["channels"]: bads.append(ch_names[ch - 1]) elif entry["signalBin"] == 2: # Add bad PNS channels for ch in entry["channels"]: bads.append(egi_info["pns_names"][ch - 1]) info["bads"] = bads # Add EEG reference to info try: fp = mff.directory.filepointer("history") except (ValueError, FileNotFoundError): # old (<=0.6.3) vs new mffpy pass else: with fp: history = mffpy.XML.from_file(fp) for entry in history.entries: if entry["method"] == "Montage Operations Tool": if "Average Reference" in entry["settings"]: # Average reference has been applied _, info = setup_proj(info) # Get nave from categories.xml try: nave = categories[category][0]["keys"]["#seg"]["data"] except KeyError: warn( f"Number of averaged epochs not found for condition {category}. " "nave will default to 1.", category=UserWarning, ) nave = 1 # Let tmin default to 0 return EvokedArray( all_data, info, tmin=0.0, comment=category, nave=nave, verbose=verbose ) def _import_mffpy(why="read averaged .mff files"): """Import and return module mffpy.""" try: import mffpy except ImportError as exp: msg = f"mffpy is required to {why}, got:\n{exp}" raise ImportError(msg) return mffpy