# Authors: The MNE-Python contributors. # License: BSD-3-Clause # Copyright the MNE-Python contributors. import logging import numpy as np from scipy.sparse import issparse from ..fixes import _get_check_scoring from ..parallel import parallel_func from ..utils import ProgressBar, _parse_verbose, array_split_idx, fill_doc, verbose from .base import BaseEstimator, _check_estimator from .mixin import TransformerMixin @fill_doc class SlidingEstimator(BaseEstimator, TransformerMixin): """Search Light. Fit, predict and score a series of models to each subset of the dataset along the last dimension. Each entry in the last dimension is referred to as a task. Parameters ---------- %(base_estimator)s %(scoring)s %(n_jobs)s %(position)s %(allow_2d)s %(verbose)s Attributes ---------- estimators_ : array-like, shape (n_tasks,) List of fitted scikit-learn estimators (one per task). """ @verbose def __init__( self, base_estimator, scoring=None, n_jobs=None, *, position=0, allow_2d=False, verbose=None, ): _check_estimator(base_estimator) self.base_estimator = base_estimator self.n_jobs = n_jobs self.scoring = scoring self.position = position self.allow_2d = allow_2d self.verbose = verbose def _more_tags(self): return {"no_validation": True, "requires_fit": False} @property def _estimator_type(self): return getattr(self.base_estimator, "_estimator_type", None) def __repr__(self): # noqa: D105 repr_str = "<" + super().__repr__() if hasattr(self, "estimators_"): repr_str = repr_str[:-1] repr_str += f", fitted with {len(self.estimators_)} estimators" return repr_str + ">" def fit(self, X, y, **fit_params): """Fit a series of independent estimators to the dataset. Parameters ---------- X : array, shape (n_samples, nd_features, n_tasks) The training input samples. For each data slice, a clone estimator is fitted independently. The feature dimension can be multidimensional e.g. X.shape = (n_samples, n_features_1, n_features_2, n_tasks). y : array, shape (n_samples,) | (n_samples, n_targets) The target values. **fit_params : dict of string -> object Parameters to pass to the fit method of the estimator. Returns ------- self : object Return self. """ X = self._check_Xy(X, y) parallel, p_func, n_jobs = parallel_func( _sl_fit, self.n_jobs, max_jobs=X.shape[-1], verbose=False ) self.estimators_ = list() self.fit_params_ = fit_params # For fitting, the parallelization is across estimators. context = _create_progressbar_context(self, X, "Fitting") with context as pb: estimators = parallel( p_func(self.base_estimator, split, y, pb.subset(pb_idx), **fit_params) for pb_idx, split in array_split_idx(X, n_jobs, axis=-1) ) # Each parallel job can have a different number of training estimators # We can't directly concatenate them because of sklearn's Bagging API # (see scikit-learn #9720) self.estimators_ = np.empty(X.shape[-1], dtype=object) idx = 0 for job_estimators in estimators: for est in job_estimators: self.estimators_[idx] = est idx += 1 return self def fit_transform(self, X, y, **fit_params): """Fit and transform a series of independent estimators to the dataset. Parameters ---------- X : array, shape (n_samples, nd_features, n_tasks) The training input samples. For each task, a clone estimator is fitted independently. The feature dimension can be multidimensional, e.g.:: X.shape = (n_samples, n_features_1, n_features_2, n_estimators) y : array, shape (n_samples,) | (n_samples, n_targets) The target values. **fit_params : dict of string -> object Parameters to pass to the fit method of the estimator. Returns ------- y_pred : array, shape (n_samples, n_tasks) | (n_samples, n_tasks, n_targets) The predicted values for each estimator. """ # noqa: E501 return self.fit(X, y, **fit_params).transform(X) def _transform(self, X, method): """Aux. function to make parallel predictions/transformation.""" X = self._check_Xy(X) method = _check_method(self.base_estimator, method) if X.shape[-1] != len(self.estimators_): raise ValueError("The number of estimators does not match X.shape[-1]") # For predictions/transforms the parallelization is across the data and # not across the estimators to avoid memory load. parallel, p_func, n_jobs = parallel_func( _sl_transform, self.n_jobs, max_jobs=X.shape[-1], verbose=False ) X_splits = np.array_split(X, n_jobs, axis=-1) idx, est_splits = zip(*array_split_idx(self.estimators_, n_jobs)) context = _create_progressbar_context(self, X, "Transforming") with context as pb: y_pred = parallel( p_func(est, x, method, pb.subset(pb_idx)) for pb_idx, est, x in zip(idx, est_splits, X_splits) ) y_pred = np.concatenate(y_pred, axis=1) return y_pred def transform(self, X): """Transform each data slice/task with a series of independent estimators. The number of tasks in X should match the number of tasks/estimators given at fit time. Parameters ---------- X : array, shape (n_samples, nd_features, n_tasks) The input samples. For each data slice/task, the corresponding estimator makes a transformation of the data, e.g. ``[estimators[ii].transform(X[..., ii]) for ii in range(n_estimators)]``. The feature dimension can be multidimensional e.g. X.shape = (n_samples, n_features_1, n_features_2, n_tasks). Returns ------- Xt : array, shape (n_samples, n_estimators) The transformed values generated by each estimator. """ # noqa: E501 return self._transform(X, "transform").astype(X.dtype) def predict(self, X): """Predict each data slice/task with a series of independent estimators. The number of tasks in X should match the number of tasks/estimators given at fit time. Parameters ---------- X : array, shape (n_samples, nd_features, n_tasks) The input samples. For each data slice, the corresponding estimator makes the sample predictions, e.g.: ``[estimators[ii].predict(X[..., ii]) for ii in range(n_estimators)]``. The feature dimension can be multidimensional e.g. X.shape = (n_samples, n_features_1, n_features_2, n_tasks). Returns ------- y_pred : array, shape (n_samples, n_estimators) | (n_samples, n_tasks, n_targets) Predicted values for each estimator/data slice. """ # noqa: E501 return self._transform(X, "predict") def predict_proba(self, X): """Predict each data slice with a series of independent estimators. The number of tasks in X should match the number of tasks/estimators given at fit time. Parameters ---------- X : array, shape (n_samples, nd_features, n_tasks) The input samples. For each data slice, the corresponding estimator makes the sample probabilistic predictions, e.g.: ``[estimators[ii].predict_proba(X[..., ii]) for ii in range(n_estimators)]``. The feature dimension can be multidimensional e.g. X.shape = (n_samples, n_features_1, n_features_2, n_tasks). Returns ------- y_pred : array, shape (n_samples, n_tasks, n_classes) Predicted probabilities for each estimator/data slice/task. """ # noqa: E501 return self._transform(X, "predict_proba") def decision_function(self, X): """Estimate distances of each data slice to the hyperplanes. Parameters ---------- X : array, shape (n_samples, nd_features, n_tasks) The input samples. For each data slice, the corresponding estimator outputs the distance to the hyperplane, e.g.: ``[estimators[ii].decision_function(X[..., ii]) for ii in range(n_estimators)]``. The feature dimension can be multidimensional e.g. X.shape = (n_samples, n_features_1, n_features_2, n_estimators). Returns ------- y_pred : array, shape (n_samples, n_estimators, n_classes * (n_classes-1) // 2) Predicted distances for each estimator/data slice. Notes ----- This requires base_estimator to have a ``decision_function`` method. """ # noqa: E501 return self._transform(X, "decision_function") def _check_Xy(self, X, y=None): """Aux. function to check input data.""" # Once we require sklearn 1.1+ we should do something like: # from sklearn.utils import check_array # X = check_array(X, ensure_2d=False, input_name="X") # y = check_array(y, dtype=None, ensure_2d=False, input_name="y") if issparse(X): raise TypeError("X should be a dense array, got sparse instead.") X = np.asarray(X) if y is not None: y = np.asarray(y) if len(X) != len(y) or len(y) < 1: raise ValueError("X and y must have the same length.") if X.ndim < 3: err = None if not self.allow_2d: err = 3 elif X.ndim < 2: err = 2 if err: raise ValueError(f"X must have at least {err} dimensions.") X = X[..., np.newaxis] return X def score(self, X, y): """Score each estimator on each task. The number of tasks in X should match the number of tasks/estimators given at fit time, i.e. we need ``X.shape[-1] == len(self.estimators_)``. Parameters ---------- X : array, shape (n_samples, nd_features, n_tasks) The input samples. For each data slice, the corresponding estimator scores the prediction, e.g.: ``[estimators[ii].score(X[..., ii], y) for ii in range(n_estimators)]``. The feature dimension can be multidimensional e.g. X.shape = (n_samples, n_features_1, n_features_2, n_tasks). y : array, shape (n_samples,) | (n_samples, n_targets) The target values. Returns ------- score : array, shape (n_samples, n_estimators) Score for each estimator/task. """ # noqa: E501 check_scoring = _get_check_scoring() X = self._check_Xy(X, y) if X.shape[-1] != len(self.estimators_): raise ValueError("The number of estimators does not match X.shape[-1]") scoring = check_scoring(self.base_estimator, self.scoring) y = _fix_auc(scoring, y) # For predictions/transforms the parallelization is across the data and # not across the estimators to avoid memory load. parallel, p_func, n_jobs = parallel_func( _sl_score, self.n_jobs, max_jobs=X.shape[-1], verbose=False ) X_splits = np.array_split(X, n_jobs, axis=-1) est_splits = np.array_split(self.estimators_, n_jobs) score = parallel( p_func(est, scoring, x, y) for (est, x) in zip(est_splits, X_splits) ) score = np.concatenate(score, axis=0) return score @property def classes_(self): if not hasattr(self.estimators_[0], "classes_"): raise AttributeError( "classes_ attribute available only if base_estimator has it, and " f"estimator {self.estimators_[0]} does not" ) return self.estimators_[0].classes_ @fill_doc def _sl_fit(estimator, X, y, pb, **fit_params): """Aux. function to fit SlidingEstimator in parallel. Fit a clone estimator to each slice of data. Parameters ---------- %(base_estimator)s X : array, shape (n_samples, nd_features, n_estimators) The target data. The feature dimension can be multidimensional e.g. X.shape = (n_samples, n_features_1, n_features_2, n_estimators) y : array, shape (n_sample, ) The target values. pb : instance of ProgressBar The progress bar to update. fit_params : dict | None Parameters to pass to the fit method of the estimator. Returns ------- estimators_ : list of estimators The fitted estimators. """ from sklearn.base import clone estimators_ = list() for ii in range(X.shape[-1]): est = clone(estimator) est.fit(X[..., ii], y, **fit_params) estimators_.append(est) pb.update(ii + 1) return estimators_ def _sl_transform(estimators, X, method, pb): """Aux. function to transform SlidingEstimator in parallel. Applies transform/predict/decision_function etc for each slice of data. Parameters ---------- estimators : list of estimators The fitted estimators. X : array, shape (n_samples, nd_features, n_estimators) The target data. The feature dimension can be multidimensional e.g. X.shape = (n_samples, n_features_1, n_features_2, n_estimators) method : str The estimator method to use (e.g. 'predict', 'transform'). pb : instance of ProgressBar The progress bar to update. Returns ------- y_pred : array, shape (n_samples, n_estimators, n_classes * (n_classes-1) // 2) The transformations for each slice of data. """ # noqa: E501 for ii, est in enumerate(estimators): transform = getattr(est, method) _y_pred = transform(X[..., ii]) # Initialize array of predictions on the first transform iteration if ii == 0: y_pred = _sl_init_pred(_y_pred, X) y_pred[:, ii, ...] = _y_pred pb.update(ii + 1) return y_pred def _sl_init_pred(y_pred, X): """Aux. function to SlidingEstimator to initialize y_pred.""" n_sample, n_tasks = X.shape[0], X.shape[-1] y_pred = np.zeros((n_sample, n_tasks) + y_pred.shape[1:], y_pred.dtype) return y_pred def _sl_score(estimators, scoring, X, y): """Aux. function to score SlidingEstimator in parallel. Predict and score each slice of data. Parameters ---------- estimators : list, shape (n_tasks,) The fitted estimators. X : array, shape (n_samples, nd_features, n_tasks) The target data. The feature dimension can be multidimensional e.g. X.shape = (n_samples, n_features_1, n_features_2, n_tasks) scoring : callable, str or None If scoring is None (default), the predictions are internally generated by estimator.score(). Else, we must first get the predictions to pass them to ad-hoc scorer. y : array, shape (n_samples,) | (n_samples, n_targets) The target values. Returns ------- score : array, shape (n_tasks,) The score for each task / slice of data. """ n_tasks = X.shape[-1] score = np.zeros(n_tasks) for ii, est in enumerate(estimators): score[ii] = scoring(est, X[..., ii], y) return score def _check_method(estimator, method): """Check that an estimator has the method attribute. If method == 'transform' and estimator does not have 'transform', use 'predict' instead. """ if method == "transform" and not hasattr(estimator, "transform"): method = "predict" if not hasattr(estimator, method): ValueError(f"base_estimator does not have `{method}` method.") return method @fill_doc class GeneralizingEstimator(SlidingEstimator): """Generalization Light. Fit a search-light along the last dimension and use them to apply a systematic cross-tasks generalization. Parameters ---------- %(base_estimator)s %(scoring)s %(n_jobs)s %(position)s %(allow_2d)s %(verbose)s """ def __repr__(self): # noqa: D105 repr_str = super().__repr__() if hasattr(self, "estimators_"): repr_str = repr_str[:-1] repr_str += f", fitted with {len(self.estimators_)} estimators>" return repr_str def _transform(self, X, method): """Aux. function to make parallel predictions/transformation.""" X = self._check_Xy(X) method = _check_method(self.base_estimator, method) parallel, p_func, n_jobs = parallel_func( _gl_transform, self.n_jobs, max_jobs=X.shape[-1], verbose=False ) context = _create_progressbar_context(self, X, "Transforming") with context as pb: y_pred = parallel( p_func(self.estimators_, x_split, method, pb.subset(pb_idx)) for pb_idx, x_split in array_split_idx( X, n_jobs, axis=-1, n_per_split=len(self.estimators_) ) ) y_pred = np.concatenate(y_pred, axis=2) return y_pred def transform(self, X): """Transform each data slice with all possible estimators. Parameters ---------- X : array, shape (n_samples, nd_features, n_slices) The input samples. For estimator the corresponding data slice is used to make a transformation. The feature dimension can be multidimensional e.g. X.shape = (n_samples, n_features_1, n_features_2, n_estimators). Returns ------- Xt : array, shape (n_samples, n_estimators, n_slices) The transformed values generated by each estimator. """ return self._transform(X, "transform") def predict(self, X): """Predict each data slice with all possible estimators. Parameters ---------- X : array, shape (n_samples, nd_features, n_slices) The training input samples. For each data slice, a fitted estimator predicts each slice of the data independently. The feature dimension can be multidimensional e.g. X.shape = (n_samples, n_features_1, n_features_2, n_estimators). Returns ------- y_pred : array, shape (n_samples, n_estimators, n_slices) | (n_samples, n_estimators, n_slices, n_targets) The predicted values for each estimator. """ # noqa: E501 return self._transform(X, "predict") def predict_proba(self, X): """Estimate probabilistic estimates of each data slice with all possible estimators. Parameters ---------- X : array, shape (n_samples, nd_features, n_slices) The training input samples. For each data slice, a fitted estimator predicts a slice of the data. The feature dimension can be multidimensional e.g. ``X.shape = (n_samples, n_features_1, n_features_2, n_estimators)``. Returns ------- y_pred : array, shape (n_samples, n_estimators, n_slices, n_classes) The predicted values for each estimator. Notes ----- This requires ``base_estimator`` to have a ``predict_proba`` method. """ # noqa: E501 return self._transform(X, "predict_proba") def decision_function(self, X): """Estimate distances of each data slice to all hyperplanes. Parameters ---------- X : array, shape (n_samples, nd_features, n_slices) The training input samples. Each estimator outputs the distance to its hyperplane, e.g.: ``[estimators[ii].decision_function(X[..., ii]) for ii in range(n_estimators)]``. The feature dimension can be multidimensional e.g. ``X.shape = (n_samples, n_features_1, n_features_2, n_estimators)``. Returns ------- y_pred : array, shape (n_samples, n_estimators, n_slices, n_classes * (n_classes-1) // 2) The predicted values for each estimator. Notes ----- This requires ``base_estimator`` to have a ``decision_function`` method. """ # noqa: E501 return self._transform(X, "decision_function") def score(self, X, y): """Score each of the estimators on the tested dimensions. Parameters ---------- X : array, shape (n_samples, nd_features, n_slices) The input samples. For each data slice, the corresponding estimator scores the prediction, e.g.: ``[estimators[ii].score(X[..., ii], y) for ii in range(n_slices)]``. The feature dimension can be multidimensional e.g. ``X.shape = (n_samples, n_features_1, n_features_2, n_estimators)``. y : array, shape (n_samples,) | (n_samples, n_targets) The target values. Returns ------- score : array, shape (n_samples, n_estimators, n_slices) Score for each estimator / data slice couple. """ # noqa: E501 check_scoring = _get_check_scoring() X = self._check_Xy(X, y) # For predictions/transforms the parallelization is across the data and # not across the estimators to avoid memory load. parallel, p_func, n_jobs = parallel_func( _gl_score, self.n_jobs, max_jobs=X.shape[-1], verbose=False ) scoring = check_scoring(self.base_estimator, self.scoring) y = _fix_auc(scoring, y) context = _create_progressbar_context(self, X, "Scoring") with context as pb: score = parallel( p_func(self.estimators_, scoring, x, y, pb.subset(pb_idx)) for pb_idx, x in array_split_idx( X, n_jobs, axis=-1, n_per_split=len(self.estimators_) ) ) score = np.concatenate(score, axis=1) return score def _gl_transform(estimators, X, method, pb): """Transform the dataset. This will apply each estimator to all slices of the data. Parameters ---------- X : array, shape (n_samples, nd_features, n_slices) The training input samples. For each data slice, a clone estimator is fitted independently. The feature dimension can be multidimensional e.g. X.shape = (n_samples, n_features_1, n_features_2, n_estimators) method : str The method to call for each estimator. pb : instance of ProgressBar The progress bar to update. Returns ------- Xt : array, shape (n_samples, n_slices) The transformed values generated by each estimator. """ n_sample, n_iter = X.shape[0], X.shape[-1] for ii, est in enumerate(estimators): # stack generalized data for faster prediction X_stack = X.transpose(np.r_[0, X.ndim - 1, range(1, X.ndim - 1)]) X_stack = X_stack.reshape(np.r_[n_sample * n_iter, X_stack.shape[2:]]) transform = getattr(est, method) _y_pred = transform(X_stack) # unstack generalizations if _y_pred.ndim == 2: _y_pred = np.reshape(_y_pred, [n_sample, n_iter, _y_pred.shape[1]]) else: shape = np.r_[n_sample, n_iter, _y_pred.shape[1:]].astype(int) _y_pred = np.reshape(_y_pred, shape) # Initialize array of predictions on the first transform iteration if ii == 0: y_pred = _gl_init_pred(_y_pred, X, len(estimators)) y_pred[:, ii, ...] = _y_pred pb.update((ii + 1) * n_iter) return y_pred def _gl_init_pred(y_pred, X, n_train): """Aux. function to GeneralizingEstimator to initialize y_pred.""" n_sample, n_iter = X.shape[0], X.shape[-1] if y_pred.ndim == 3: y_pred = np.zeros((n_sample, n_train, n_iter, y_pred.shape[-1]), y_pred.dtype) else: y_pred = np.zeros((n_sample, n_train, n_iter), y_pred.dtype) return y_pred def _gl_score(estimators, scoring, X, y, pb): """Score GeneralizingEstimator in parallel. Predict and score each slice of data. Parameters ---------- estimators : list of estimators The fitted estimators. scoring : callable, string or None If scoring is None (default), the predictions are internally generated by estimator.score(). Else, we must first get the predictions to pass them to ad-hoc scorer. X : array, shape (n_samples, nd_features, n_slices) The target data. The feature dimension can be multidimensional e.g. X.shape = (n_samples, n_features_1, n_features_2, n_estimators) y : array, shape (n_samples,) | (n_samples, n_targets) The target values. pb : instance of ProgressBar The progress bar to update. Returns ------- score : array, shape (n_estimators, n_slices) The score for each slice of data. """ # FIXME: The level parallelization may be a bit high, and might be memory # consuming. Perhaps need to lower it down to the loop across X slices. score_shape = [len(estimators), X.shape[-1]] for jj in range(X.shape[-1]): for ii, est in enumerate(estimators): _score = scoring(est, X[..., jj], y) # Initialize array of predictions on the first score iteration if (ii == 0) and (jj == 0): dtype = type(_score) score = np.zeros(score_shape, dtype) score[ii, jj, ...] = _score pb.update(jj * len(estimators) + ii + 1) return score def _fix_auc(scoring, y): from sklearn.preprocessing import LabelEncoder # This fixes sklearn's inability to compute roc_auc when y not in [0, 1] # scikit-learn/scikit-learn#6874 if scoring is not None: score_func = getattr(scoring, "_score_func", None) kwargs = getattr(scoring, "_kwargs", {}) if ( getattr(score_func, "__name__", "") == "roc_auc_score" and kwargs.get("multi_class", "raise") == "raise" ): if np.ndim(y) != 1 or len(set(y)) != 2: raise ValueError( "roc_auc scoring can only be computed for two-class problems." ) y = LabelEncoder().fit_transform(y) return y def _create_progressbar_context(inst, X, message): """Create a progress bar taking into account ``inst.verbose``.""" multiply = len(inst.estimators_) if isinstance(inst, GeneralizingEstimator) else 1 n_steps = X.shape[-1] * max(1, multiply) mesg = f"{message} {inst.__class__.__name__}" which_tqdm = "off" if not _check_verbose(inst.verbose) else None context = ProgressBar( n_steps, mesg=mesg, position=inst.position, which_tqdm=which_tqdm ) return context def _check_verbose(verbose): """Check if verbose is above or equal 'INFO' level.""" logging_level = _parse_verbose(verbose) bool_verbose = logging_level <= logging.INFO return bool_verbose