glevv · glevv · Nov 26, 2023 · Nov 26, 2023
@@ -57,8 +57,8 @@ jobs:
 
       - name: Run ruff
         run: |
-          poetry run ruff check .
-          poetry run ruff format . --check
+          poetry run ruff check --output-format=github .
+          poetry run ruff format . --diff
 
       - name: Run mypy
         run: |

@@ -18,5 +18,5 @@ repository-code: 'https://github.com/glevv/obscure_stats'
 repository-artifact: 'https://pypi.org/project/obscure_stats'
 abstract: Collection of less-known statistical measures
 license: MIT
-version: 0.1.2
+version: 0.1.3
 date-released: '2023-10-21'
@@ -1,9 +1,24 @@
 [tool.poetry]
 name = "obscure_stats"
-version = "0.1.2"
+version = "0.1.3"
 description = "Collection of less known statistical functions"
 authors = ["Gleb Levitski"]
 readme = "README.md"
+classifiers = [
+    "Development Status :: 5 - Production/Stable",
+    "Intended Audience :: Science/Research",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Topic :: Software Development :: Libraries",
+    "Topic :: Scientific/Engineering",
+    "Operating System :: OS Independent",
+]
 
 [tool.poetry.dependencies]
 python = ">=3.9,<3.13"

@@ -175,6 +175,10 @@ def concordance_rate(
     Holmes, P. (2001).
     Correlation: From Picture to Formula.
     Teaching Statistics. 23 (3): 67-71.
+
+    See Also
+    --------
+    Quadrant count ratio.
     """
     if _check_arrays(x, y):
         return np.nan
@@ -222,6 +226,10 @@ def symmetric_chatterjeexi(x: np.ndarray, y: np.ndarray) -> float:
     Chatterjee, S. (2021).
     A new coefficient of correlation.
     Journal of the American Statistical Association, 116(536), 2009-2022.
+
+    See Also
+    --------
+    obscure_stats.associaton.chatterjeexi - Chatterjee Xi coefficient.
     """
     return max(chatterjeexi(x, y), chatterjeexi(y, x))
 
@@ -252,6 +260,11 @@ def zhangi(x: np.ndarray, y: np.ndarray) -> float:
     Notes
     -----
     This measure is assymetric: (x, y) != (y, x).
+
+    See Also
+    --------
+    scipy.stats.spearmanr - Spearman R coefficient.
+    obscure_stats.associaton.chatterjeexi - Chatterjee Xi coefficient.
     """
     if _check_arrays(x, y):
         return np.nan
@@ -286,6 +299,11 @@ def tanimoto_similarity(x: np.ndarray, y: np.ndarray) -> float:
     Rogers, D. J.; Tanimoto, T. T. (1960).
     A Computer Program for Classifying Plants.
     Science. 132 (3434): 1115-8.
+
+    See Also
+    --------
+    Jaccard similarity
+    Cosine similarity
     """
     if _check_arrays(x, y):
         return np.nan

@@ -196,6 +196,10 @@ def standard_trimmed_harrell_davis_quantile(x: np.ndarray, q: float = 0.5) -> fl
     Trimmed Harrell-Davis quantile estimator based on
     the highest density interval of the given width.
     Communications in Statistics - Simulation and Computation, pp. 1-11.
+
+    See Also
+    --------
+    scipy.stats.mstats.hdquantiles - Harrell-Davis quantile estimates.
     """
     if len(x) <= 1:
         return x[0]
@@ -207,7 +211,7 @@ def standard_trimmed_harrell_davis_quantile(x: np.ndarray, q: float = 0.5) -> fl
     n_calculated = 1 / n**0.5  # heuristic suggested by the author
     a = (n + 1) * q
     b = (n + 1) * (1.0 - q)
-    hdi = (0.5 - n_calculated * q, 0.5 + n_calculated * (1.0 - q))
+    hdi = (max(0, q - n_calculated * 0.5), min(1, q + n_calculated * 0.5))
     hdi_cdf = stats.beta.cdf(hdi, a, b)
     i_start = int(math.floor(hdi[0] * n))
     i_end = int(math.ceil(hdi[1] * n))
@@ -241,6 +245,10 @@ def half_sample_mode(x: np.ndarray) -> float:
     On a fast, robust estimator of the mode:
     Comparisons to other robust estimators with applications.
     Computational Statistics & Data Analysis, 50(12), 3500-3530.
+
+    See Also
+    --------
+    scipy.stats.mode - Mode estimator.
     """
     y = np.asarray(x)
     y = y[np.isfinite(y)]

@@ -9,7 +9,6 @@
     hossain_adnan_skew,
     kelly_skew,
     medeen_skew,
-    pearson_halfmode_skew,
     pearson_median_skew,
     pearson_mode_skew,
     wauc_skew_gamma,
@@ -26,6 +25,5 @@
     "medeen_skew",
     "pearson_median_skew",
     "pearson_mode_skew",
-    "pearson_halfmode_skew",
     "bickel_mode_skew",
 ]
@@ -35,36 +35,11 @@ def pearson_mode_skew(x: np.ndarray) -> float:
     return (mean - mode) / std
 
 
-def pearson_halfmode_skew(x: np.ndarray) -> float:
-    """Calculate Pearson's mode skew coefficient with half sample mode.
-
-    This measure should be more stable version than regular
-    Pearson's mode skew coefficien.
-
-    Parameters
-    ----------
-    x : array_like
-        Input array.
-
-    Returns
-    -------
-    phmods : float or array_like.
-        The value of Pearson's half mode skew coefficient.
-
-    References
-    ----------
-    Pearson, E. S.; Hartley, H. O. (1966).
-    Biometrika Tables for Statisticians, vols. I and II.
-    Cambridge University Press, Cambridge.
-    """
-    mean = np.nanmean(x)
-    mode = half_sample_mode(x)
-    std = np.nanstd(x)
-    return (mean - mode) / std
-
-
 def bickel_mode_skew(x: np.ndarray) -> float:
-    """Calculate Robust mode skew with half sample mode.
+    """Calculate Robust Mode skew with half sample mode.
+
+    This measure should be more stable than Pearson mode
+    skewness coefficient.
 
     Parameters
     ----------

@@ -51,6 +51,12 @@ def test_thdm(thdme_test_data: np.ndarray) -> None:
     if result != pytest.approx(0.6268, rel=1e-4):
         msg = "Results from the test and paper do not match."
         raise ValueError(msg)
+    if result > standard_trimmed_harrell_davis_quantile(thdme_test_data, q=0.99):
+        msg = "Results from the test and paper do not match."
+        raise ValueError(msg)
+    if result < standard_trimmed_harrell_davis_quantile(thdme_test_data, q=0.01):
+        msg = "Results from the test and paper do not match."
+        raise ValueError(msg)
 
 
 def test_edge_cases(x_array_float: np.ndarray) -> None:

@@ -13,7 +13,6 @@
     hossain_adnan_skew,
     kelly_skew,
     medeen_skew,
-    pearson_halfmode_skew,
     pearson_median_skew,
     pearson_mode_skew,
     wauc_skew_gamma,
@@ -33,7 +32,6 @@
         medeen_skew,
         pearson_median_skew,
         pearson_mode_skew,
-        pearson_halfmode_skew,
         bickel_mode_skew,
     ],
 )
@@ -64,7 +62,6 @@ def test_mock_aggregation_functions(
         medeen_skew,
         pearson_median_skew,
         pearson_mode_skew,
-        pearson_halfmode_skew,
         bickel_mode_skew,
     ],
 )
@@ -101,7 +98,6 @@ def test_rank_skew(rank_skewness_test_data: np.ndarray) -> None:
         medeen_skew,
         pearson_median_skew,
         pearson_mode_skew,
-        pearson_halfmode_skew,
         bickel_mode_skew,
     ],
 )