"""T-test with permutations.""" # Authors: The MNE-Python contributors. # License: BSD-3-Clause # Copyright the MNE-Python contributors. from math import sqrt import numpy as np from ..parallel import parallel_func from ..utils import check_random_state, logger, verbose def _max_stat(X, X2, perms, dof_scaling): """Aux function for permutation_t_test (for parallel comp).""" n_samples = len(X) mus = np.dot(perms, X) / float(n_samples) stds = np.sqrt(X2[None, :] - mus * mus) * dof_scaling # std with splitting max_abs = np.max(np.abs(mus) / (stds / sqrt(n_samples)), axis=1) # t-max return max_abs @verbose def permutation_t_test( X, n_permutations=10000, tail=0, n_jobs=None, seed=None, verbose=None ): """One sample/paired sample permutation test based on a t-statistic. This function can perform the test on one variable or simultaneously on multiple variables. When applying the test to multiple variables, the "tmax" method is used for adjusting the p-values of each variable for multiple comparisons. Like Bonferroni correction, this method adjusts p-values in a way that controls the family-wise error rate. However, the permutation method will be more powerful than Bonferroni correction when different variables in the test are correlated (see :footcite:`NicholsHolmes2002`). Parameters ---------- X : array, shape (n_samples, n_tests) Samples (observations) by number of tests (variables). n_permutations : int | 'all' Number of permutations. If n_permutations is 'all' all possible permutations are tested. It's the exact test, that can be untractable when the number of samples is big (e.g. > 20). If n_permutations >= 2**n_samples then the exact test is performed. tail : -1 or 0 or 1 (default = 0) If tail is 1, the alternative hypothesis is that the mean of the data is greater than 0 (upper tailed test). If tail is 0, the alternative hypothesis is that the mean of the data is different than 0 (two tailed test). If tail is -1, the alternative hypothesis is that the mean of the data is less than 0 (lower tailed test). %(n_jobs)s %(seed)s %(verbose)s Returns ------- T_obs : array of shape [n_tests] T-statistic observed for all variables. p_values : array of shape [n_tests] P-values for all the tests (a.k.a. variables). H0 : array of shape [n_permutations] T-statistic obtained by permutations and t-max trick for multiple comparison. Notes ----- If ``n_permutations >= 2 ** (n_samples - (tail == 0))``, ``n_permutations`` and ``seed`` will be ignored since an exact test (full permutation test) will be performed. References ---------- .. footbibliography:: """ from .cluster_level import _get_1samp_orders n_samples, n_tests = X.shape X2 = np.mean(X**2, axis=0) # precompute moments mu0 = np.mean(X, axis=0) dof_scaling = sqrt(n_samples / (n_samples - 1.0)) std0 = np.sqrt(X2 - mu0**2) * dof_scaling # get std with var splitting T_obs = np.mean(X, axis=0) / (std0 / sqrt(n_samples)) rng = check_random_state(seed) orders, _, extra = _get_1samp_orders(n_samples, n_permutations, tail, rng) perms = 2 * np.array(orders) - 1 # from 0, 1 -> 1, -1 logger.info(f"Permuting {len(orders)} times{extra}...") parallel, my_max_stat, n_jobs = parallel_func(_max_stat, n_jobs) max_abs = np.concatenate( parallel( my_max_stat(X, X2, p, dof_scaling) for p in np.array_split(perms, n_jobs) ) ) max_abs = np.concatenate((max_abs, [np.abs(T_obs).max()])) H0 = np.sort(max_abs) if tail == 0: p_values = (H0 >= np.abs(T_obs[:, np.newaxis])).mean(-1) elif tail == 1: p_values = (H0 >= T_obs[:, np.newaxis]).mean(-1) elif tail == -1: p_values = (-H0 <= T_obs[:, np.newaxis]).mean(-1) return T_obs, p_values, H0 def bootstrap_confidence_interval( arr, ci=0.95, n_bootstraps=2000, stat_fun="mean", random_state=None ): """Get confidence intervals from non-parametric bootstrap. Parameters ---------- arr : ndarray, shape (n_samples, ...) The input data on which to calculate the confidence interval. ci : float Level of the confidence interval between 0 and 1. n_bootstraps : int Number of bootstraps. stat_fun : str | callable Can be "mean", "median", or a callable operating along ``axis=0``. random_state : int | float | array_like | None The seed at which to initialize the bootstrap. Returns ------- cis : ndarray, shape (2, ...) Containing the lower boundary of the CI at ``cis[0, ...]`` and the upper boundary of the CI at ``cis[1, ...]``. """ if stat_fun == "mean": def stat_fun(x): return x.mean(axis=0) elif stat_fun == "median": def stat_fun(x): return np.median(x, axis=0) elif not callable(stat_fun): raise ValueError("stat_fun must be 'mean', 'median' or callable.") n_trials = arr.shape[0] indices = np.arange(n_trials, dtype=int) # BCA would be cool to have too rng = check_random_state(random_state) boot_indices = rng.choice(indices, replace=True, size=(n_bootstraps, len(indices))) stat = np.array([stat_fun(arr[inds]) for inds in boot_indices]) ci = (((1 - ci) / 2) * 100, (1 - ((1 - ci) / 2)) * 100) ci_low, ci_up = np.percentile(stat, ci, axis=0) return np.array([ci_low, ci_up]) def _ci(arr, ci=0.95, method="bootstrap", n_bootstraps=2000, random_state=None): """Calculate confidence interval. Aux function for plot_compare_evokeds.""" if method == "bootstrap": return bootstrap_confidence_interval( arr, ci=ci, n_bootstraps=n_bootstraps, random_state=random_state ) else: from .parametric import _parametric_ci return _parametric_ci(arr, ci=ci)