Merge pull request #4 from PharmCat/dev

v0.1.3
PharmCat · May 12, 2023 · 5205f4f · 5205f4f · PharmCat · May 12, 2023
2 parents 1c249dc + 9427407
commit 5205f4f
Show file tree

Hide file tree

Showing 7 changed files with 255 additions and 101 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "MetidaFreq"
 uuid = "bd16ee1e-1b2f-4f89-b253-604a522f8c5f"
 authors = ["PharmCat <[email protected]>"]
-version = "0.1.2"
+version = "0.1.3"
 
 [deps]
 

diff --git a/docs/src/api.md b/docs/src/api.md
@@ -30,6 +30,11 @@ MetidaFreq.colorder
 MetidaFreq.colreduce
 ```
 
+### MetidaFreq.dropzeros!
+```@docs
+MetidaFreq.dropzeros!
+```
+
 ### Base.permutedims
 ```@docs
 MetidaFreq.permutedims

diff --git a/src/MetidaFreq.jl b/src/MetidaFreq.jl
@@ -13,7 +13,7 @@ import HypothesisTests, MetidaBase
 import StatsBase: confint
 import MetidaBase: AbstractData, AbstractIdData, DataSet, Proportion, PrettyTables, metida_table_, getid, map, getdata
 import HypothesisTests: ChisqTest, MultinomialLRTest, FisherExactTest
-import Base: ht_keyindex, size, show, permutedims
+import Base: ht_keyindex, size, show, permutedims, getindex
 
 export contab, propci, diffci, orci, rrci, confint, colorder, sumrows, addcol, colreduce
 

diff --git a/src/confint.jl b/src/confint.jl
@@ -1,4 +1,95 @@
+#=
+Brown L.D., Cai T.T. and Dasgupta A. (2001) Interval estimation for a binomial proportion Statistical Science, 16(2), pp. 101-133.
+Witting H. (1985) Mathematische Statistik I. Stuttgart: Teubner.
+Pratt J. W. (1968) A normal approximation for binomial, F, Beta, and other common, related tail probabilities Journal of the American Statistical Association, 63, 1457- 1483.
+Wilcox, R. R. (2005) Introduction to robust estimation and hypothesis testing. Elsevier Academic Press
+=#
 
+"""
+    propci(x::Int, n::Int; level = 0.95, method = :default)
+
+`method`:
+
+- `:wilson` | `:default` - Wilson's confidence interval (CI) for a single proportion (wilson score) (Wilson, 1927);
+- `:wilsoncc` - Wilson's CI with continuity correction (CC);
+- `:cp` - Clopper-Pearson exact CI (Clopper&Pearson, 1934);
+- `:blaker` - Blaker exact CI for discrete distributions (Blaker, 2000);
+- `:soc` - SOC: Second-Order corrected CI;
+- `:arc` - Arcsine CI;
+- `:wald` - Wald CI without CC;
+- `:waldcc` - Wald CI with CC (1/2/n);
+- `:ac` - Agresti-Coull;
+- `:jeffrey` - Jeffreys interval (Brown et al,2001).
+
+Reference:
+
+* Wilson, E.B. (1927) Probable inference, the law of succession, and statistical inference J. Amer.Stat. Assoc 22, 209–212;
+* Clopper, C. and Pearson, E.S. (1934) The use of confidence or fiducial limits illustrated in the caseof the binomial.Biometrika26, 404–413;
+* Agresti A. and Coull B.A. (1998) Approximate is better than "exact" for interval estimation of binomial proportions. American Statistician, 52, pp. 119-126.
+* Newcombe, R. G. (1998) Two-sided confidence intervals for the single proportion: comparison of seven methods, Statistics in Medicine, 17:857-872 https://pubmed.ncbi.nlm.nih.gov/16206245/
+* Blaker, H. (2000). Confidence curves and improved exact confidence intervals for discrete distributions, Canadian Journal of Statistics 28 (4), 783–798;
+* Pires, Ana & Amado, Conceição. (2008). Interval Estimators for a Binomial Proportion: Comparison of Twenty Methods. REVSTAT. 6. 10.57805/revstat.v6i2.63. 
+
+"""
+function propci(x::Int, n::Int; level = 0.95, method = :default)
+    #=
+    TODO: modified wilson, modified jeffreys, witting, pratt, midp, lik
+    =#
+    if  x > n throw(ArgumentError("x > n")) end
+    alpha    = 1 - level
+    if method == :wilson || method == :default
+        fx = ci_prop_wilson
+    elseif method==:wilsoncc
+        fx = ci_prop_wilson_cc
+    elseif method==:cp
+        fx = ci_prop_cp
+    elseif method==:blaker
+        fx = ci_prop_blaker
+    elseif method==:soc
+        fx = ci_prop_soc
+    elseif method==:arc
+        fx = ci_prop_arc
+    elseif method==:wald
+        fx = ci_prop_wald
+    elseif method==:waldcc
+        fx = ci_prop_wald_cc
+    elseif method==:ac
+        fx = ci_prop_ac
+    elseif method==:jeffrey
+        fx = ci_prop_jeffrey
+    else
+        throw(ArgumentError("unknown ci method=$(method), possible is: :wilson, :wilsoncc, :cp, :blaker, :soc, :arc, :wald, :waldcc, :ac, :jeffrey"))
+    end
+    ci = fx(x, n, alpha)
+    (max(ci[1], 0), min(ci[2], 1))
+end
+"""
+    propci(contab::ConTab; level = 0.95, method = :default)
+"""
+function propci(contab::ConTab; level = 0.95, method = :default)
+    if  size(contab.tab, 2) != 2 throw(ArgumentError("CI only for N X 2 tables.")) end
+    if size(contab, 1) > 1
+        v = Vector{Tuple{Float64, Float64}}(undef, size(contab, 1))
+        for i = 1:size(contab, 1)
+            x = contab.tab[i, 1]
+            n = x + contab.tab[i, 2]
+            #println(x, " : ", n)
+            v[i] = propci(x, n; level = level, method = method)
+        end
+        return v
+    else
+        x = contab.tab[1, 1]
+        n = x + contab.tab[1, 2]
+        return propci(x, n; level = level, method = method)
+    end
+end
+
+"""
+    propci(prop::Proportion; level = 0.95, method = :default)
+"""
+function propci(prop::Proportion; level = 0.95, method = :default)
+    propci(prop.x, prop.n; level = level, method = method)
+end
 
 """
     diffci(x1, n1, x2, n2; level = 0.95, method = :default)
@@ -185,85 +276,6 @@ function rrci(contab::ConTab; level = 0.95, method = :default)
     rrci(x1, n1, x2, n2; level = level, method = method)
 end
 
-"""
-    propci(x::Int, n::Int; level = 0.95, method = :default)
-
-`method`:
-
-- `:wilson` | `:default` - Wilson's confidence interval (CI) for a single proportion (wilson score) (Wilson, 1927);
-- `:wilsoncc` - Wilson's CI with continuity correction (CC);
-- `:cp` - Clopper-Pearson exact CI (Clopper&Pearson, 1934);
-- `:blaker` - Blaker exact CI for discrete distributions (Blaker, 2000);
-- `:soc` - SOC: Second-Order corrected CI;
-- `:arc` - Arcsine CI;
-- `:wald` - Wald CI without CC;
-- `:waldcc` - Wald CI with CC;
-- `:ac` - Agresti-Coull;
-- `:jeffrey` - Jeffreys interval.
-
-Reference:
-
-* Wilson, E.B. (1927) Probable inference, the law of succession, and statistical inference J. Amer.Stat. Assoc 22, 209–212;
-* Clopper, C. and Pearson, E.S. (1934) The use of confidence or fiducial limits illustrated in the caseof the binomial.Biometrika26, 404–413;
-* Blaker, H. (2000). Confidence curves and improved exact confidence intervals for discrete distributions, Canadian Journal of Statistics 28 (4), 783–798;
-
-"""
-function propci(x::Int, n::Int; level = 0.95, method = :default)
-    if  x > n throw(ArgumentError("x > n")) end
-    alpha    = 1 - level
-    if method == :wilson || method == :default
-        fx = ci_prop_wilson
-    elseif method==:wilsoncc
-        fx = ci_prop_wilson_cc
-    elseif method==:cp
-        fx = ci_prop_cp
-    elseif method==:blaker
-        fx = ci_prop_blaker
-    elseif method==:soc
-        fx = ci_prop_soc
-    elseif method==:arc
-        fx = ci_prop_arc
-    elseif method==:wald
-        fx = ci_prop_wald
-    elseif method==:waldcc
-        fx = ci_prop_wald_cc
-    elseif method==:ac
-        fx = ci_prop_ac
-    elseif method==:jeffrey
-        fx = ci_prop_jeffrey
-    else
-        throw(ArgumentError("unknown ci method=$(method), possible is: :wilson, :wilsoncc, :cp, :blaker, :soc, :arc, :wald, :waldcc, :ac, :jeffrey"))
-    end
-    fx(x, n, alpha)
-end
-"""
-    propci(contab::ConTab; level = 0.95, method = :default)
-"""
-function propci(contab::ConTab; level = 0.95, method = :default)
-    if  size(contab.tab, 2) != 2 throw(ArgumentError("CI only for N X 2 tables.")) end
-    if size(contab, 1) > 1
-        v = Vector{Tuple{Float64, Float64}}(undef, size(contab, 1))
-        for i = 1:size(contab, 1)
-            x = contab.tab[i, 1]
-            n = x + contab.tab[i, 2]
-            #println(x, " : ", n)
-            v[i] = propci(x, n; level = level, method = method)
-        end
-        return v
-    else
-        x = contab.tab[1, 1]
-        n = x + contab.tab[1, 2]
-        return propci(x, n; level = level, method = method)
-    end
-end
-
-"""
-    propci(prop::Proportion; level = 0.95, method = :default)
-"""
-function propci(prop::Proportion; level = 0.95, method = :default)
-    propci(prop.x, prop.n; level = level, method = method)
-end
-
 """
     mpropci(contab::ConTab; level = 0.95, method = :default)
 
@@ -410,7 +422,8 @@ function ci_diff_fm(x1, n1, x2, n2, alpha; atol::Float64 = 1E-8)
     uci = find_zero(fmnd, ucis)
     return  lci, uci
 end
-# Wald
+# Wald 
+# Pires, Ana & Amado, Conceição. (2008). Interval Estimators for a Binomial Proportion: Comparison of Twenty Methods. REVSTAT. 6. 10.57805/revstat.v6i2.63. 
 function ci_diff_wald(x1, n1, x2, n2, alpha)
     p1       = x1 / n1
     p2       = x2 / n2
@@ -420,6 +433,7 @@ function ci_diff_wald(x1, n1, x2, n2, alpha)
     return est - z * se, est + z * se
 end
 # Wald continuity correction
+# Pires, Ana & Amado, Conceição. (2008). Interval Estimators for a Binomial Proportion: Comparison of Twenty Methods. REVSTAT. 6. 10.57805/revstat.v6i2.63. 
 function ci_diff_wald_cc(x1, n1, x2, n2, alpha)
     p1       = x1 / n1
     p2       = x2 / n2
@@ -698,11 +712,14 @@ end
 # Wilson CC
 # Newcombe, R. G. (1998). "Two-sided confidence intervals for the single proportion: comparison of seven methods". Statistics in Medicine. 17 (8): 857–872. doi:10.1002/(SICI)1097-0258(19980430)17:8<857::AID-SIM777>3.0.CO;2-E. PMID 959561
 function ci_prop_wilson_cc(x, n, alpha)
-    z = abs(quantile(Normal(), 1 - alpha / 2))
+    q = abs(quantile(Normal(), 1 - alpha / 2))
     p = x / n
-    l = (2*n*p+z*z-1-z*sqrt(z*z-2-1/n+4*p*(n*(1-p)+1)))/2/(n+z*z)
-    u = (2*n*p+z*z+1+z*sqrt(z*z+2-1/n+4*p*(n*(1-p)-1)))/2/(n+z*z)
-    return min(p, l), max(p, u)
+    q² = q * q
+    a = 2x + q²
+    b = 2 * (n + q²)
+    l = (a - 1 - q * sqrt(q² - 2 - 1 / n + 4x * (1 + (1 - x) / n))) / b
+    u = (a + 1 + q * sqrt(q² + 2 - 1 / n + 4x * (1 - (1 + x) / n))) / b
+    return (max(min(p, l), 0), min(max(p, u), 1))
 end
 #Clopper-Pearson exatct CI
 #Clopper, C. and Pearson, E.S. (1934) The use of confidence or fiducial limits illustrated in the caseof the binomial.Biometrika26, 404–413.
@@ -769,6 +786,7 @@ function ci_prop_wald(x, n, alpha)
     return p-b, p+b
 end
 # Wald CI CC
+
 function ci_prop_wald_cc(x::Int, n::Int, alpha::Real)
     p=x/n
     b = quantile(Normal(), 1-alpha/2)*sqrt(p*(1-p)/n)
@@ -807,3 +825,5 @@ function ci_prop_goodman(v, alpha::T2) where T2
     end
     ci
 end
+
+## Wenzel D, Zapf A. Difference of two dependent sensitivities and specificities: Comparison of various approaches. Biom J. 2013 Sep;55(5):705-18. doi: 10.1002/bimj.201200186. Epub 2013 Jul 5. PMID: 23828661.
diff --git a/src/contab.jl b/src/contab.jl
@@ -56,6 +56,13 @@ function contab(ct::ConTab, rr, cr)
     contab(ct.tab[rr, cr]; rownames = ct.rown[rr], colnames = ct.coln[cr], id = copy(ct.id))
 end
 
+"""
+    Base.getindex(ct::ConTab, args...) = getindex(ct.tab, args...)
+
+Returns values of Contab by index.
+"""
+Base.getindex(ct::ConTab, args...) = getindex(ct.tab, args...)
+
 """
     Base.permutedims(ct::ConTab)
 
@@ -75,6 +82,8 @@ function sumrows_(f::Function, ct::ConTab)
 end
 """
     sumrows(f::Function, contab::ConTab; coln = "Val")
+
+Aplpy function to each element of row, sum and make new column.
 """
 function sumrows(f::Function, ct::ConTab; coln = "Val")
     mx = sumrows_(f, ct)
@@ -85,13 +94,17 @@ function sumrows(f::Function, ct::ConTab; coln = "Val")
 end
 """
     sumrows(contab::ConTab; coln = "Val")
+
+Aplpy `identity` function to each element of row, sum and make new column.
 """
 function sumrows(ct::ConTab; coln = "Val")
     sumrows(identity, ct; coln = coln)
 end
 
 """
     addcol(ct::ConTab, col::Vector{Int}; coln = "Val")
+
+Add column.
 """
 function addcol(ct::ConTab, col::Vector{Int}; coln = "Val")
     contab(hcat(ct.tab, col);
@@ -102,6 +115,8 @@ end
 
 """
     addcol(f::Function, ct::ConTab; coln = "Val")
+
+Apply function to row and make new column.
 """
 function addcol(f::Function, ct::ConTab; coln = "Val")
     n   = size(ct, 1)
@@ -117,6 +132,8 @@ end
 
 """
     addcol(f::Function, ct::ConTab, col::Vector{Int}; coln = "Val")
+
+Example function (x,y) -> sum(x) + y, where x - row, y - value of col item. 
 """
 function addcol(f::Function, ct::ConTab, col::Vector{Int}; coln = "Val")
     n   = size(ct, 1)
@@ -131,6 +148,8 @@ function addcol(f::Function, ct::ConTab, col::Vector{Int}; coln = "Val")
 end
 """
     colreduce(f::Function, data::DataSet{<:ConTab}; coln = nothing)
+
+Sum rows for each table, than make new table where in each column pleced sums.
 """
 function colreduce(f::Function, data::DataSet{<:ConTab}; coln = nothing)
     fst = data.ds[1].rown
@@ -359,7 +378,10 @@ end
 function Base.show(io::IO, contab::ConTab)
     println(io, "  Contingency table:")
     tab  = hcat(contab.tab, sum(contab.tab, dims = 2))
-    coln = push!(copy(contab.coln), "Total")
+    coln = Vector{String}(undef, length(contab.coln) + 1)
+    copyto!(view(coln, 1:length(contab.coln)), contab.coln)
+    coln[end] = "Total"
+    #coln = push!(copy(contab.coln), "Total")
     PrettyTables.pretty_table(io, tab; header = coln, row_labels = contab.rown, tf = PrettyTables.tf_compact)
     if !isnothing(contab.id) && length(contab.id) > 0
         print(io, "  ID: ")

diff --git a/src/metaprop.jl b/src/metaprop.jl
@@ -342,17 +342,6 @@ function Base.show(io::IO, mp::MetaProp)
     println(io, "  Metric vector: $(mp.y)")
     print(io,   "  Metric variance: $(mp.var)")
 end
-struct MetaPropResult{Symbol}
-    data::MetaProp
-    wts::Vector{Float64}
-    est::Float64
-    var::Float64
-    chisq::Float64
-    hetq::Float64
-    heti::Float64
-    hettau::Float64
-end
-
 
 function weights(mpr::MetaPropResult)
     mpr.wts ./ (sum(mpr.wts) / 100)