Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix STHDQ bugs #3

Merged
merged 1 commit into from
Nov 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ jobs:

- name: Run ruff
run: |
poetry run ruff check .
poetry run ruff format . --check
poetry run ruff check --output-format=github .
poetry run ruff format . --diff

- name: Run mypy
run: |
Expand Down
2 changes: 1 addition & 1 deletion CITATION
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,5 @@ repository-code: 'https://github.com/glevv/obscure_stats'
repository-artifact: 'https://pypi.org/project/obscure_stats'
abstract: Collection of less-known statistical measures
license: MIT
version: 0.1.2
version: 0.1.3
date-released: '2023-10-21'
17 changes: 16 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,9 +1,24 @@
[tool.poetry]
name = "obscure_stats"
version = "0.1.2"
version = "0.1.3"
description = "Collection of less known statistical functions"
authors = ["Gleb Levitski"]
readme = "README.md"
classifiers = [
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Science/Research",
"Intended Audience :: Developers",
"License :: OSI Approved :: MIT License",
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Topic :: Software Development :: Libraries",
"Topic :: Scientific/Engineering",
"Operating System :: OS Independent",
]

[tool.poetry.dependencies]
python = ">=3.9,<3.13"
Expand Down
18 changes: 18 additions & 0 deletions src/obscure_stats/association/association.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,10 @@ def concordance_rate(
Holmes, P. (2001).
Correlation: From Picture to Formula.
Teaching Statistics. 23 (3): 67-71.

See Also
--------
Quadrant count ratio.
"""
if _check_arrays(x, y):
return np.nan
Expand Down Expand Up @@ -222,6 +226,10 @@ def symmetric_chatterjeexi(x: np.ndarray, y: np.ndarray) -> float:
Chatterjee, S. (2021).
A new coefficient of correlation.
Journal of the American Statistical Association, 116(536), 2009-2022.

See Also
--------
obscure_stats.associaton.chatterjeexi - Chatterjee Xi coefficient.
"""
return max(chatterjeexi(x, y), chatterjeexi(y, x))

Expand Down Expand Up @@ -252,6 +260,11 @@ def zhangi(x: np.ndarray, y: np.ndarray) -> float:
Notes
-----
This measure is assymetric: (x, y) != (y, x).

See Also
--------
scipy.stats.spearmanr - Spearman R coefficient.
obscure_stats.associaton.chatterjeexi - Chatterjee Xi coefficient.
"""
if _check_arrays(x, y):
return np.nan
Expand Down Expand Up @@ -286,6 +299,11 @@ def tanimoto_similarity(x: np.ndarray, y: np.ndarray) -> float:
Rogers, D. J.; Tanimoto, T. T. (1960).
A Computer Program for Classifying Plants.
Science. 132 (3434): 1115-8.

See Also
--------
Jaccard similarity
Cosine similarity
"""
if _check_arrays(x, y):
return np.nan
Expand Down
10 changes: 9 additions & 1 deletion src/obscure_stats/central_tendency/central_tendency.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,10 @@ def standard_trimmed_harrell_davis_quantile(x: np.ndarray, q: float = 0.5) -> fl
Trimmed Harrell-Davis quantile estimator based on
the highest density interval of the given width.
Communications in Statistics - Simulation and Computation, pp. 1-11.

See Also
--------
scipy.stats.mstats.hdquantiles - Harrell-Davis quantile estimates.
"""
if len(x) <= 1:
return x[0]
Expand All @@ -207,7 +211,7 @@ def standard_trimmed_harrell_davis_quantile(x: np.ndarray, q: float = 0.5) -> fl
n_calculated = 1 / n**0.5 # heuristic suggested by the author
a = (n + 1) * q
b = (n + 1) * (1.0 - q)
hdi = (0.5 - n_calculated * q, 0.5 + n_calculated * (1.0 - q))
hdi = (max(0, q - n_calculated * 0.5), min(1, q + n_calculated * 0.5))
hdi_cdf = stats.beta.cdf(hdi, a, b)
i_start = int(math.floor(hdi[0] * n))
i_end = int(math.ceil(hdi[1] * n))
Expand Down Expand Up @@ -241,6 +245,10 @@ def half_sample_mode(x: np.ndarray) -> float:
On a fast, robust estimator of the mode:
Comparisons to other robust estimators with applications.
Computational Statistics & Data Analysis, 50(12), 3500-3530.

See Also
--------
scipy.stats.mode - Mode estimator.
"""
y = np.asarray(x)
y = y[np.isfinite(y)]
Expand Down
2 changes: 0 additions & 2 deletions src/obscure_stats/skewness/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
hossain_adnan_skew,
kelly_skew,
medeen_skew,
pearson_halfmode_skew,
pearson_median_skew,
pearson_mode_skew,
wauc_skew_gamma,
Expand All @@ -26,6 +25,5 @@
"medeen_skew",
"pearson_median_skew",
"pearson_mode_skew",
"pearson_halfmode_skew",
"bickel_mode_skew",
]
33 changes: 4 additions & 29 deletions src/obscure_stats/skewness/skewness.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,36 +35,11 @@ def pearson_mode_skew(x: np.ndarray) -> float:
return (mean - mode) / std


def pearson_halfmode_skew(x: np.ndarray) -> float:
"""Calculate Pearson's mode skew coefficient with half sample mode.

This measure should be more stable version than regular
Pearson's mode skew coefficien.

Parameters
----------
x : array_like
Input array.

Returns
-------
phmods : float or array_like.
The value of Pearson's half mode skew coefficient.

References
----------
Pearson, E. S.; Hartley, H. O. (1966).
Biometrika Tables for Statisticians, vols. I and II.
Cambridge University Press, Cambridge.
"""
mean = np.nanmean(x)
mode = half_sample_mode(x)
std = np.nanstd(x)
return (mean - mode) / std


def bickel_mode_skew(x: np.ndarray) -> float:
"""Calculate Robust mode skew with half sample mode.
"""Calculate Robust Mode skew with half sample mode.

This measure should be more stable than Pearson mode
skewness coefficient.

Parameters
----------
Expand Down
6 changes: 6 additions & 0 deletions tests/test_central_tendency.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,12 @@ def test_thdm(thdme_test_data: np.ndarray) -> None:
if result != pytest.approx(0.6268, rel=1e-4):
msg = "Results from the test and paper do not match."
raise ValueError(msg)
if result > standard_trimmed_harrell_davis_quantile(thdme_test_data, q=0.99):
msg = "Results from the test and paper do not match."
raise ValueError(msg)
if result < standard_trimmed_harrell_davis_quantile(thdme_test_data, q=0.01):
msg = "Results from the test and paper do not match."
raise ValueError(msg)


def test_edge_cases(x_array_float: np.ndarray) -> None:
Expand Down
4 changes: 0 additions & 4 deletions tests/test_skewness.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
hossain_adnan_skew,
kelly_skew,
medeen_skew,
pearson_halfmode_skew,
pearson_median_skew,
pearson_mode_skew,
wauc_skew_gamma,
Expand All @@ -33,7 +32,6 @@
medeen_skew,
pearson_median_skew,
pearson_mode_skew,
pearson_halfmode_skew,
bickel_mode_skew,
],
)
Expand Down Expand Up @@ -64,7 +62,6 @@ def test_mock_aggregation_functions(
medeen_skew,
pearson_median_skew,
pearson_mode_skew,
pearson_halfmode_skew,
bickel_mode_skew,
],
)
Expand Down Expand Up @@ -101,7 +98,6 @@ def test_rank_skew(rank_skewness_test_data: np.ndarray) -> None:
medeen_skew,
pearson_median_skew,
pearson_mode_skew,
pearson_halfmode_skew,
bickel_mode_skew,
],
)
Expand Down