针对pulse-transit的工具
This commit is contained in:
432
dist/client/mne/stats/regression.py
vendored
Normal file
432
dist/client/mne/stats/regression.py
vendored
Normal file
@@ -0,0 +1,432 @@
|
||||
# Authors: The MNE-Python contributors.
|
||||
# License: BSD-3-Clause
|
||||
# Copyright the MNE-Python contributors.
|
||||
|
||||
from collections import namedtuple
|
||||
from inspect import isgenerator
|
||||
|
||||
import numpy as np
|
||||
from scipy import linalg, sparse, stats
|
||||
|
||||
from .._fiff.pick import _picks_to_idx, pick_info, pick_types
|
||||
from ..epochs import BaseEpochs
|
||||
from ..evoked import Evoked, EvokedArray
|
||||
from ..source_estimate import SourceEstimate
|
||||
from ..utils import _reject_data_segments, fill_doc, logger, warn
|
||||
|
||||
|
||||
def linear_regression(inst, design_matrix, names=None):
|
||||
"""Fit Ordinary Least Squares (OLS) regression.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
inst : instance of Epochs | iterable of SourceEstimate
|
||||
The data to be regressed. Contains all the trials, sensors, and time
|
||||
points for the regression. For Source Estimates, accepts either a list
|
||||
or a generator object.
|
||||
design_matrix : ndarray, shape (n_observations, n_regressors)
|
||||
The regressors to be used. Must be a 2d array with as many rows as
|
||||
the first dimension of the data. The first column of this matrix will
|
||||
typically consist of ones (intercept column).
|
||||
names : array-like | None
|
||||
Optional parameter to name the regressors (i.e., the columns in the
|
||||
design matrix). If provided, the length must correspond to the number
|
||||
of columns present in design matrix (including the intercept, if
|
||||
present). Otherwise, the default names are ``'x0'``, ``'x1'``,
|
||||
``'x2', …, 'x(n-1)'`` for ``n`` regressors.
|
||||
|
||||
Returns
|
||||
-------
|
||||
results : dict of namedtuple
|
||||
For each regressor (key), a namedtuple is provided with the
|
||||
following attributes:
|
||||
|
||||
- ``beta`` : regression coefficients
|
||||
- ``stderr`` : standard error of regression coefficients
|
||||
- ``t_val`` : t statistics (``beta`` / ``stderr``)
|
||||
- ``p_val`` : two-sided p-value of t statistic under the t
|
||||
distribution
|
||||
- ``mlog10_p_val`` : -log₁₀-transformed p-value.
|
||||
|
||||
The tuple members are numpy arrays. The shape of each numpy array is
|
||||
the shape of the data minus the first dimension; e.g., if the shape of
|
||||
the original data was ``(n_observations, n_channels, n_timepoints)``,
|
||||
then the shape of each of the arrays will be
|
||||
``(n_channels, n_timepoints)``.
|
||||
"""
|
||||
if names is None:
|
||||
names = [f"x{i}" for i in range(design_matrix.shape[1])]
|
||||
|
||||
if isinstance(inst, BaseEpochs):
|
||||
picks = pick_types(
|
||||
inst.info,
|
||||
meg=True,
|
||||
eeg=True,
|
||||
ref_meg=True,
|
||||
stim=False,
|
||||
eog=False,
|
||||
ecg=False,
|
||||
emg=False,
|
||||
exclude=["bads"],
|
||||
)
|
||||
if [inst.ch_names[p] for p in picks] != inst.ch_names:
|
||||
warn("Fitting linear model to non-data or bad channels. Check picking")
|
||||
msg = "Fitting linear model to epochs"
|
||||
data = inst.get_data(copy=False)
|
||||
out = EvokedArray(np.zeros(data.shape[1:]), inst.info, inst.tmin)
|
||||
elif isgenerator(inst):
|
||||
msg = "Fitting linear model to source estimates (generator input)"
|
||||
out = next(inst)
|
||||
data = np.array([out.data] + [i.data for i in inst])
|
||||
elif isinstance(inst, list) and isinstance(inst[0], SourceEstimate):
|
||||
msg = "Fitting linear model to source estimates (list input)"
|
||||
out = inst[0]
|
||||
data = np.array([i.data for i in inst])
|
||||
else:
|
||||
raise ValueError("Input must be epochs or iterable of source estimates")
|
||||
logger.info(msg + f", ({np.prod(data.shape[1:])} targets, {len(names)} regressors)")
|
||||
lm_params = _fit_lm(data, design_matrix, names)
|
||||
lm = namedtuple("lm", "beta stderr t_val p_val mlog10_p_val")
|
||||
lm_fits = {}
|
||||
for name in names:
|
||||
parameters = [p[name] for p in lm_params]
|
||||
for ii, value in enumerate(parameters):
|
||||
out_ = out.copy()
|
||||
if not isinstance(out_, (SourceEstimate, Evoked)):
|
||||
raise RuntimeError("Invalid container.")
|
||||
out_._data[:] = value
|
||||
parameters[ii] = out_
|
||||
lm_fits[name] = lm(*parameters)
|
||||
logger.info("Done")
|
||||
return lm_fits
|
||||
|
||||
|
||||
def _fit_lm(data, design_matrix, names):
|
||||
"""Aux function."""
|
||||
n_samples = len(data)
|
||||
n_features = np.prod(data.shape[1:])
|
||||
if design_matrix.ndim != 2:
|
||||
raise ValueError("Design matrix must be a 2d array")
|
||||
n_rows, n_predictors = design_matrix.shape
|
||||
|
||||
if n_samples != n_rows:
|
||||
raise ValueError(
|
||||
"Number of rows in design matrix must be equal to number of observations"
|
||||
)
|
||||
if n_predictors != len(names):
|
||||
raise ValueError(
|
||||
"Number of regressor names must be equal to "
|
||||
"number of column in design matrix"
|
||||
)
|
||||
|
||||
y = np.reshape(data, (n_samples, n_features))
|
||||
betas, resid_sum_squares, _, _ = linalg.lstsq(a=design_matrix, b=y)
|
||||
|
||||
df = n_rows - n_predictors
|
||||
sqrt_noise_var = np.sqrt(resid_sum_squares / df).reshape(data.shape[1:])
|
||||
design_invcov = linalg.inv(np.dot(design_matrix.T, design_matrix))
|
||||
unscaled_stderrs = np.sqrt(np.diag(design_invcov))
|
||||
tiny = np.finfo(np.float64).tiny
|
||||
beta, stderr, t_val, p_val, mlog10_p_val = (dict() for _ in range(5))
|
||||
for x, unscaled_stderr, predictor in zip(betas, unscaled_stderrs, names):
|
||||
beta[predictor] = x.reshape(data.shape[1:])
|
||||
stderr[predictor] = sqrt_noise_var * unscaled_stderr
|
||||
p_val[predictor] = np.empty_like(stderr[predictor])
|
||||
t_val[predictor] = np.empty_like(stderr[predictor])
|
||||
|
||||
stderr_pos = stderr[predictor] > 0
|
||||
beta_pos = beta[predictor] > 0
|
||||
t_val[predictor][stderr_pos] = (
|
||||
beta[predictor][stderr_pos] / stderr[predictor][stderr_pos]
|
||||
)
|
||||
cdf = stats.t.cdf(np.abs(t_val[predictor][stderr_pos]), df)
|
||||
p_val[predictor][stderr_pos] = np.clip((1.0 - cdf) * 2.0, tiny, 1.0)
|
||||
# degenerate cases
|
||||
mask = ~stderr_pos & beta_pos
|
||||
t_val[predictor][mask] = np.inf * np.sign(beta[predictor][mask])
|
||||
p_val[predictor][mask] = tiny
|
||||
# could do NaN here, but hopefully this is safe enough
|
||||
mask = ~stderr_pos & ~beta_pos
|
||||
t_val[predictor][mask] = 0
|
||||
p_val[predictor][mask] = 1.0
|
||||
mlog10_p_val[predictor] = -np.log10(p_val[predictor])
|
||||
|
||||
return beta, stderr, t_val, p_val, mlog10_p_val
|
||||
|
||||
|
||||
@fill_doc
|
||||
def linear_regression_raw(
|
||||
raw,
|
||||
events,
|
||||
event_id=None,
|
||||
tmin=-0.1,
|
||||
tmax=1,
|
||||
covariates=None,
|
||||
reject=None,
|
||||
flat=None,
|
||||
tstep=1.0,
|
||||
decim=1,
|
||||
picks=None,
|
||||
solver="cholesky",
|
||||
):
|
||||
"""Estimate regression-based evoked potentials/fields by linear modeling.
|
||||
|
||||
This models the full M/EEG time course, including correction for
|
||||
overlapping potentials and allowing for continuous/scalar predictors.
|
||||
Internally, this constructs a predictor matrix X of size
|
||||
n_samples * (n_conds * window length), solving the linear system
|
||||
``Y = bX`` and returning ``b`` as evoked-like time series split by
|
||||
condition. See :footcite:`SmithKutas2015`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
raw : instance of Raw
|
||||
A raw object. Note: be very careful about data that is not
|
||||
downsampled, as the resulting matrices can be enormous and easily
|
||||
overload your computer. Typically, 100 Hz sampling rate is
|
||||
appropriate - or using the decim keyword (see below).
|
||||
events : ndarray of int, shape (n_events, 3)
|
||||
An array where the first column corresponds to samples in raw
|
||||
and the last to integer codes in event_id.
|
||||
event_id : dict | None
|
||||
As in Epochs; a dictionary where the values may be integers or
|
||||
iterables of integers, corresponding to the 3rd column of
|
||||
events, and the keys are condition names.
|
||||
If None, uses all events in the events array.
|
||||
tmin : float | dict
|
||||
If float, gives the lower limit (in seconds) for the time window for
|
||||
which all event types' effects are estimated. If a dict, can be used to
|
||||
specify time windows for specific event types: keys correspond to keys
|
||||
in event_id and/or covariates; for missing values, the default (-.1) is
|
||||
used.
|
||||
tmax : float | dict
|
||||
If float, gives the upper limit (in seconds) for the time window for
|
||||
which all event types' effects are estimated. If a dict, can be used to
|
||||
specify time windows for specific event types: keys correspond to keys
|
||||
in event_id and/or covariates; for missing values, the default (1.) is
|
||||
used.
|
||||
covariates : dict-like | None
|
||||
If dict-like (e.g., a pandas DataFrame), values have to be array-like
|
||||
and of the same length as the rows in ``events``. Keys correspond
|
||||
to additional event types/conditions to be estimated and are matched
|
||||
with the time points given by the first column of ``events``. If
|
||||
None, only binary events (from event_id) are used.
|
||||
reject : None | dict
|
||||
For cleaning raw data before the regression is performed: set up
|
||||
rejection parameters based on peak-to-peak amplitude in continuously
|
||||
selected subepochs. If None, no rejection is done.
|
||||
If dict, keys are types ('grad' | 'mag' | 'eeg' | 'eog' | 'ecg')
|
||||
and values are the maximal peak-to-peak values to select rejected
|
||||
epochs, e.g.::
|
||||
|
||||
reject = dict(grad=4000e-12, # T / m (gradiometers)
|
||||
mag=4e-11, # T (magnetometers)
|
||||
eeg=40e-5, # V (EEG channels)
|
||||
eog=250e-5 # V (EOG channels))
|
||||
|
||||
flat : None | dict
|
||||
For cleaning raw data before the regression is performed: set up
|
||||
rejection parameters based on flatness of the signal. If None, no
|
||||
rejection is done. If a dict, keys are ('grad' | 'mag' |
|
||||
'eeg' | 'eog' | 'ecg') and values are minimal peak-to-peak values to
|
||||
select rejected epochs.
|
||||
tstep : float
|
||||
Length of windows for peak-to-peak detection for raw data cleaning.
|
||||
decim : int
|
||||
Decimate by choosing only a subsample of data points. Highly
|
||||
recommended for data recorded at high sampling frequencies, as
|
||||
otherwise huge intermediate matrices have to be created and inverted.
|
||||
%(picks_good_data)s
|
||||
solver : str | callable
|
||||
Either a function which takes as its inputs the sparse predictor
|
||||
matrix X and the observation matrix Y, and returns the coefficient
|
||||
matrix b; or a string.
|
||||
X is of shape (n_times, n_predictors * time_window_length).
|
||||
y is of shape (n_channels, n_times).
|
||||
If str, must be ``'cholesky'``, in which case the solver used is
|
||||
``linalg.solve(dot(X.T, X), dot(X.T, y))``.
|
||||
|
||||
Returns
|
||||
-------
|
||||
evokeds : dict
|
||||
A dict where the keys correspond to conditions and the values are
|
||||
Evoked objects with the ER[F/P]s. These can be used exactly like any
|
||||
other Evoked object, including e.g. plotting or statistics.
|
||||
|
||||
References
|
||||
----------
|
||||
.. footbibliography::
|
||||
"""
|
||||
if isinstance(solver, str):
|
||||
if solver not in {"cholesky"}:
|
||||
raise ValueError(f"No such solver: {solver}")
|
||||
if solver == "cholesky":
|
||||
|
||||
def solver(X, y):
|
||||
a = (X.T * X).toarray() # dot product of sparse matrices
|
||||
return linalg.solve(
|
||||
a, X.T * y, assume_a="pos", overwrite_a=True, overwrite_b=True
|
||||
).T
|
||||
|
||||
elif callable(solver):
|
||||
pass
|
||||
else:
|
||||
raise TypeError("The solver must be a str or a callable.")
|
||||
|
||||
# build data
|
||||
data, info, events = _prepare_rerp_data(raw, events, picks=picks, decim=decim)
|
||||
|
||||
if event_id is None:
|
||||
event_id = {str(v): v for v in set(events[:, 2])}
|
||||
|
||||
# build predictors
|
||||
X, conds, cond_length, tmin_s, tmax_s = _prepare_rerp_preds(
|
||||
n_samples=data.shape[1],
|
||||
sfreq=info["sfreq"],
|
||||
events=events,
|
||||
event_id=event_id,
|
||||
tmin=tmin,
|
||||
tmax=tmax,
|
||||
covariates=covariates,
|
||||
)
|
||||
|
||||
# remove "empty" and contaminated data points
|
||||
X, data = _clean_rerp_input(X, data, reject, flat, decim, info, tstep)
|
||||
|
||||
# solve linear system
|
||||
coefs = solver(X, data.T)
|
||||
if coefs.shape[0] != data.shape[0]:
|
||||
raise ValueError(
|
||||
"solver output has unexcepted shape. Supply a "
|
||||
"function that returns coefficients in the form "
|
||||
"(n_targets, n_features), where targets == channels."
|
||||
)
|
||||
|
||||
# construct Evoked objects to be returned from output
|
||||
evokeds = _make_evokeds(coefs, conds, cond_length, tmin_s, tmax_s, info)
|
||||
|
||||
return evokeds
|
||||
|
||||
|
||||
def _prepare_rerp_data(raw, events, picks=None, decim=1):
|
||||
"""Prepare events and data, primarily for `linear_regression_raw`."""
|
||||
picks = _picks_to_idx(raw.info, picks)
|
||||
info = pick_info(raw.info, picks)
|
||||
decim = int(decim)
|
||||
with info._unlock():
|
||||
info["sfreq"] /= decim
|
||||
data, times = raw[:]
|
||||
data = data[picks, ::decim]
|
||||
if len(set(events[:, 0])) < len(events[:, 0]):
|
||||
raise ValueError(
|
||||
"`events` contains duplicate time points. Make "
|
||||
"sure all entries in the first column of `events` "
|
||||
"are unique."
|
||||
)
|
||||
|
||||
events = events.copy()
|
||||
events[:, 0] -= raw.first_samp
|
||||
events[:, 0] //= decim
|
||||
if len(set(events[:, 0])) < len(events[:, 0]):
|
||||
raise ValueError(
|
||||
"After decimating, `events` contains duplicate time "
|
||||
"points. This means some events are too closely "
|
||||
"spaced for the requested decimation factor. Choose "
|
||||
"different events, drop close events, or choose a "
|
||||
"different decimation factor."
|
||||
)
|
||||
|
||||
return data, info, events
|
||||
|
||||
|
||||
def _prepare_rerp_preds(
|
||||
n_samples, sfreq, events, event_id=None, tmin=-0.1, tmax=1, covariates=None
|
||||
):
|
||||
"""Build predictor matrix and metadata (e.g. condition time windows)."""
|
||||
conds = list(event_id)
|
||||
if covariates is not None:
|
||||
conds += list(covariates)
|
||||
|
||||
# time windows (per event type) are converted to sample points from times
|
||||
# int(round()) to be safe and match Epochs constructor behavior
|
||||
if isinstance(tmin, (float, int)):
|
||||
tmin_s = {cond: int(round(tmin * sfreq)) for cond in conds}
|
||||
else:
|
||||
tmin_s = {cond: int(round(tmin.get(cond, -0.1) * sfreq)) for cond in conds}
|
||||
if isinstance(tmax, (float, int)):
|
||||
tmax_s = {cond: int(round(tmax * sfreq) + 1) for cond in conds}
|
||||
else:
|
||||
tmax_s = {cond: int(round(tmax.get(cond, 1.0) * sfreq)) + 1 for cond in conds}
|
||||
|
||||
# Construct predictor matrix
|
||||
# We do this by creating one array per event type, shape (lags, samples)
|
||||
# (where lags depends on tmin/tmax and can be different for different
|
||||
# event types). Columns correspond to predictors, predictors correspond to
|
||||
# time lags. Thus, each array is mostly sparse, with one diagonal of 1s
|
||||
# per event (for binary predictors).
|
||||
|
||||
cond_length = dict()
|
||||
xs = []
|
||||
for cond in conds:
|
||||
tmin_, tmax_ = tmin_s[cond], tmax_s[cond]
|
||||
n_lags = int(tmax_ - tmin_) # width of matrix
|
||||
if cond in event_id: # for binary predictors
|
||||
ids = (
|
||||
[event_id[cond]] if isinstance(event_id[cond], int) else event_id[cond]
|
||||
)
|
||||
onsets = -(events[np.isin(events[:, 2], ids), 0] + tmin_)
|
||||
values = np.ones((len(onsets), n_lags))
|
||||
|
||||
else: # for predictors from covariates, e.g. continuous ones
|
||||
covs = covariates[cond]
|
||||
if len(covs) != len(events):
|
||||
error = (
|
||||
f"Condition {cond} from ``covariates`` is not the same length as "
|
||||
"``events``"
|
||||
)
|
||||
raise ValueError(error)
|
||||
onsets = -(events[np.where(covs != 0), 0] + tmin_)[0]
|
||||
v = np.asarray(covs)[np.nonzero(covs)].astype(float)
|
||||
values = np.ones((len(onsets), n_lags)) * v[:, np.newaxis]
|
||||
|
||||
cond_length[cond] = len(onsets)
|
||||
xs.append(sparse.dia_matrix((values, onsets), shape=(n_samples, n_lags)))
|
||||
|
||||
return sparse.hstack(xs), conds, cond_length, tmin_s, tmax_s
|
||||
|
||||
|
||||
def _clean_rerp_input(X, data, reject, flat, decim, info, tstep):
|
||||
"""Remove empty and contaminated points from data & predictor matrices."""
|
||||
# find only those positions where at least one predictor isn't 0
|
||||
has_val = np.unique(X.nonzero()[0])
|
||||
|
||||
# reject positions based on extreme steps in the data
|
||||
if reject is not None:
|
||||
_, inds = _reject_data_segments(
|
||||
data, reject, flat, decim=None, info=info, tstep=tstep
|
||||
)
|
||||
for t0, t1 in inds:
|
||||
has_val = np.setdiff1d(has_val, range(t0, t1))
|
||||
|
||||
return X.tocsr()[has_val], data[:, has_val]
|
||||
|
||||
|
||||
def _make_evokeds(coefs, conds, cond_length, tmin_s, tmax_s, info):
|
||||
"""Create a dictionary of Evoked objects.
|
||||
|
||||
These will be created from a coefs matrix and condition durations.
|
||||
"""
|
||||
evokeds = dict()
|
||||
cumul = 0
|
||||
for cond in conds:
|
||||
tmin_, tmax_ = tmin_s[cond], tmax_s[cond]
|
||||
evokeds[cond] = EvokedArray(
|
||||
coefs[:, cumul : cumul + tmax_ - tmin_],
|
||||
info=info,
|
||||
comment=cond,
|
||||
tmin=tmin_ / float(info["sfreq"]),
|
||||
nave=cond_length[cond],
|
||||
kind="average",
|
||||
) # nave and kind are technically incorrect
|
||||
cumul += tmax_ - tmin_
|
||||
return evokeds
|
||||
Reference in New Issue
Block a user