From 5ae8a0507654515e6d81d01c9eb0dd3a34d68620 Mon Sep 17 00:00:00 2001 From: Esteban Zapata Rojas <2417465+estebanz01@users.noreply.github.com> Date: Sun, 30 Oct 2022 21:28:26 -0400 Subject: [PATCH] Issue 78: Solve Division by Zero when performing a chi squared good fitness test. (#79) * test: Write tests for the ZeroDivisionError ocurred on chi squared. * fix: Implement zero division bugfix in lower gamma function calculation. This change implements a fix, discovered in #78 , where the incomplete gamma function raises a `ZeroDivisionError` when X is too small. This was caused by the liberal use of the `round` function when calculating the iterations needed to perform the simpson's rule. Now, with this change, if the X is too small, it rounds it to the first decimal, then rounds it again after multiplying it by 10_000. Now, if the second round still give us zero, then we default to 100_000 iterations instead, as we must assume it's a pretty small value that needs to be accounted for. * fix: Update p-value in certain tests due to the increased iterations. --- lib/math.rb | 5 ++++- spec/statistics/bigdecimal_spec.rb | 2 +- spec/statistics/math_spec.rb | 20 +++++++++++++++++++ .../statistical_test/chi_squared_test_spec.rb | 20 ++++++++++++++++++- 4 files changed, 44 insertions(+), 3 deletions(-) diff --git a/lib/math.rb b/lib/math.rb index a86d1e3..bf1bf63 100644 --- a/lib/math.rb +++ b/lib/math.rb @@ -46,7 +46,10 @@ def self.simpson_rule(a, b, n, &block) def self.lower_incomplete_gamma_function(s, x) # The greater the iterations, the better. That's why we are iterating 10_000 * x times - self.simpson_rule(0, x.to_r, (10_000 * x.round).round) do |t| + iterator = (10_000 * x.round(1)).round + iterator = 100_000 if iterator.zero? + + self.simpson_rule(0, x.to_r, iterator) do |t| (t ** (s - 1)) * Math.exp(-t) end end diff --git a/spec/statistics/bigdecimal_spec.rb b/spec/statistics/bigdecimal_spec.rb index 832ed87..12f71ba 100644 --- a/spec/statistics/bigdecimal_spec.rb +++ b/spec/statistics/bigdecimal_spec.rb @@ -96,7 +96,7 @@ expected = BigDecimal(100, 1) result = StatisticalTest::ChiSquaredTest.goodness_of_fit(0.05, expected, observed_counts) - expect(result[:p_value]).to eq -6.509459637982218e-12 + expect(result[:p_value]).to eq -6.5354388567584465e-12 expect(result[:null]).to be false expect(result[:alternative]).to be true end diff --git a/spec/statistics/math_spec.rb b/spec/statistics/math_spec.rb index e310fb4..21f3250 100644 --- a/spec/statistics/math_spec.rb +++ b/spec/statistics/math_spec.rb @@ -85,6 +85,26 @@ ).to eq results[index] end end + + # The following context is based on the numbers reported in https://github.com/estebanz01/ruby-statistics/issues/78 + # which give us a minimum test case scenario where the integral being solved with simpson's rule + # uses zero iterations, raising errors. + context 'When X for the lower incomplete gamma function is rounded to zero' do + let(:s_parameter) { 4.5 } + let(:x) { (52/53).to_r } + + it 'does not try to perform a division by zero' do + expect do + described_class.lower_incomplete_gamma_function(s_parameter, x) + end.not_to raise_error + end + + it "tries to solve the function using simpson's rule with at least 100_000 iterations" do + expect(described_class).to receive(:simpson_rule).with(0, x, 100_000) + + described_class.lower_incomplete_gamma_function(s_parameter, x) + end + end end describe '.beta_function' do diff --git a/spec/statistics/statistical_test/chi_squared_test_spec.rb b/spec/statistics/statistical_test/chi_squared_test_spec.rb index d428d9b..0ebde99 100644 --- a/spec/statistics/statistical_test/chi_squared_test_spec.rb +++ b/spec/statistics/statistical_test/chi_squared_test_spec.rb @@ -48,7 +48,7 @@ expected = 100 # this is equal to [100, 100, 100, 100, 100, 100] result = described_class.goodness_of_fit(0.05, expected, observed_counts) - expect(result[:p_value]).to eq -6.509237593377293e-12 + expect(result[:p_value]).to eq -6.5358829459682966e-12 expect(result[:null]).to be false expect(result[:alternative]).to be true end @@ -63,5 +63,23 @@ expect(result[:null]).to be true expect(result[:alternative]).to be false end + + # The following test is based on the numbers reported in https://github.com/estebanz01/ruby-statistics/issues/78 + # which give us a minimum test case scenario where the integral being solved with simpson's rule + # uses zero iterations, raising errors. + it 'performs a goodness of fit test with values that generates small chi statistics' do + observed_counts = [481, 483, 482, 488, 478, 471, 477, 479, 475, 462] + expected = 477 + + result = {} + + expect do + result = described_class.goodness_of_fit(0.01, expected, observed_counts) + end.not_to raise_error + + expect(result[:p_value].round(4)).to eq(0.9995) + expect(result[:null]).to be true + expect(result[:alternative]).to be false + end end end