Skip to content

Commit

Permalink
Merge pull request #66 from EricHUANG970823/new-branch
Browse files Browse the repository at this point in the history
Dunnett's test
  • Loading branch information
maximtrp authored Feb 17, 2024
2 parents 18bc6a7 + 7aee929 commit 31aabc5
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 4 deletions.
6 changes: 3 additions & 3 deletions scikit_posthocs/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = '0.8.1'
__version__ = '0.8.2'

from scikit_posthocs._global import global_simes_test, global_f_test
from scikit_posthocs._omnibus import test_osrt, test_durbin, test_mackwolfe
Expand All @@ -10,8 +10,8 @@
posthoc_nemenyi_friedman, posthoc_npm_test, posthoc_quade,
posthoc_scheffe, posthoc_siegel_friedman, posthoc_tamhane,
posthoc_ttest, posthoc_tukey, posthoc_tukey_hsd,
posthoc_vanwaerden, posthoc_wilcoxon, __convert_to_df,
__convert_to_block_df,
posthoc_vanwaerden, posthoc_wilcoxon, posthoc_dunnett,
__convert_to_df, __convert_to_block_df,
)

from scikit_posthocs._plotting import (
Expand Down
79 changes: 78 additions & 1 deletion scikit_posthocs/_posthocs.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from statsmodels.sandbox.stats.multicomp import multipletests
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from statsmodels.stats.libqsturng import psturng
from pandas import DataFrame
from pandas import DataFrame, Series, MultiIndex


def __convert_to_df(
Expand Down Expand Up @@ -1561,6 +1561,83 @@ def compare_stats(i, j):
return DataFrame(vs, index=groups, columns=groups)


def posthoc_dunnett(a: Union[list, np.ndarray, DataFrame],
val_col: str = None,
group_col: str = None,
control: str = None,
sort: bool = False,
to_matrix: bool = True) -> Series | DataFrame:
"""
Dunnett's test [1, 2, 3] for multiple comparisons against a control group, used after parametric
ANOVA. The control group is specified by the `control` parameter.
Parameters
----------
a : array_like or pandas DataFrame object
An array, any object exposing the array interface or a pandas
DataFrame. Array must be two-dimensional.
val_col : str, optional
Name of a DataFrame column that contains dependent variable values (test
or response variable). Values should have a non-nominal scale. Must be
specified if `a` is a pandas DataFrame object.
group_col : str, optional
Name of a DataFrame column that contains independent variable values
(grouping or predictor variable). Values should have a nominal scale
(categorical). Must be specified if `a` is a pandas DataFrame object.
control : str, optional
Name of the control group within the `group_col` column. Values should
have a nominal scale (categorical). Must be specified if `a` is a pandas
sort : bool, optional
Specifies whether to sort DataFrame by group_col or not. Recommended
unless you sort your data manually.
to_matrix: bool, optional
Specifies whether to return a DataFrame or a Series. If True, a DataFrame
is returned with some NaN values since it's not pairwise comparison.
Default is True.
Returns
-------
result : pandas.Series or pandas.DataFrame
P values.
References
----------
.. [1] Charles W. Dunnett (1955). “A Multiple Comparison Procedure for Comparing Several Treatments with a Control.”
.. [2] https://en.wikipedia.org/wiki/Dunnett%27s_test
.. [3] https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.dunnett.html#id1
"""
x, _val_col, _group_col = __convert_to_df(a, val_col, group_col)
x = x.sort_values(by=[_group_col], ascending=True) if sort else x
x = x.set_index(_group_col)[_val_col]
x_embedded = x.groupby(_group_col).agg(lambda y: y.dropna().tolist())
control_data = x_embedded.loc[control]
treatment_data = x_embedded.drop(control)

pvals = ss.dunnett(*treatment_data, control=control_data).pvalue

multi_index = MultiIndex.from_product([[control], treatment_data.index.tolist()])
dunnett_sr = Series(pvals, index=multi_index)

if not to_matrix:
return dunnett_sr

else:
levels = x.index.unique().tolist()
result_df = DataFrame(index=levels, columns=levels)

for pair in dunnett_sr.index:
a, b = pair
result_df.loc[a, b] = dunnett_sr[pair]
result_df.loc[b, a] = dunnett_sr[pair]
result_df.loc[control, control] = 1.0
return result_df


def posthoc_ttest(
a: Union[list, np.ndarray, DataFrame],
val_col: str = None,
Expand Down

0 comments on commit 31aabc5

Please sign in to comment.