Source code for esmtools.testing

import numpy as np
import xarray as xr
from scipy.stats import ttest_ind_from_stats as tti_from_stats
from statsmodels.stats.multitest import multipletests as statsmodels_multipletests

from .checks import is_xarray
from .constants import MULTIPLE_TESTS

__all__ = ['ttest_ind_from_stats', 'multipletests']


[docs]@is_xarray(0) def multipletests(p, alpha=0.05, method=None, **multipletests_kwargs): """Apply statsmodels.stats.multitest.multipletests for multi-dimensional xr.objects. Args: p (xr.object): uncorrected p-values. alpha (optional float): FWER, family-wise error rate. Defaults to 0.05. method (str): Method used for testing and adjustment of pvalues. Can be either the full name or initial letters. Available methods are: - bonferroni : one-step correction - sidak : one-step correction - holm-sidak : step down method using Sidak adjustments - holm : step-down method using Bonferroni adjustments - simes-hochberg : step-up method (independent) - hommel : closed method based on Simes tests (non-negative) - fdr_bh : Benjamini/Hochberg (non-negative) - fdr_by : Benjamini/Yekutieli (negative) - fdr_tsbh : two stage fdr correction (non-negative) - fdr_tsbky : two stage fdr correction (non-negative) **multipletests_kwargs (optional dict): is_sorted, returnsorted see statsmodels.stats.multitest.multitest Returns: reject (xr.object): true for hypothesis that can be rejected for given alpha pvals_corrected (xr.object): p-values corrected for multiple tests Example: >>> from esmtools.testing import multipletests >>> reject, xpvals_corrected = multipletests(p, method='fdr_bh') """ if method is None: raise ValueError( f"Please indicate a method using the 'method=...' keyword. " f'Select from {MULTIPLE_TESTS}' ) elif method not in MULTIPLE_TESTS: raise ValueError( f"Your method '{method}' is not in the accepted methods: {MULTIPLE_TESTS}" ) # stack all to 1d array p_stacked = p.stack(s=p.dims) # mask only where not nan: # https://github.com/statsmodels/statsmodels/issues/2899 mask = np.isfinite(p_stacked) pvals_corrected = xr.full_like(p_stacked, np.nan) reject = xr.full_like(p_stacked, np.nan) # apply test where mask reject[mask], pvals_corrected[mask], *_ = statsmodels_multipletests( p_stacked[mask], alpha=alpha, method=method, **multipletests_kwargs ) return reject.unstack('s'), pvals_corrected.unstack('s')
[docs]def ttest_ind_from_stats(mean1, std1, nobs1, mean2, std2, nobs2, equal_var=True): """Parallelize scipy.stats.ttest_ind_from_stats and make dask-compatible. Args: mean1, mean2 (array_like): The means of samples 1 and 2. std1, std2 (array_like): The standard deviations of samples 1 and 2. nobs1, nobs2 (array_like): The number of observations for samples 1 and 2. equal_var (bool, optional): If True (default), perform a standard independent 2 sample test that assumes equal population variances. If False, perform Welch's t-test, which does not assume equal population variance. Returns: statistic (float or array): The calculated t-statistics. pvalue (float or array): The two-tailed p-value. """ return xr.apply_ufunc( tti_from_stats, mean1, std1, nobs1, mean2, std2, nobs2, equal_var, input_core_dims=[[], [], [], [], [], [], []], output_core_dims=[[], []], vectorize=True, dask='parallelized', )