From 5ae8a0507654515e6d81d01c9eb0dd3a34d68620 Mon Sep 17 00:00:00 2001
From: Esteban Zapata Rojas <2417465+estebanz01@users.noreply.github.com>
Date: Sun, 30 Oct 2022 21:28:26 -0400
Subject: [PATCH] Issue 78: Solve Division by Zero when performing a chi
 squared good fitness test. (#79)

* test: Write tests for the ZeroDivisionError ocurred on chi squared.

* fix: Implement zero division bugfix in lower gamma function calculation.

This change implements a fix, discovered in #78 , where the
incomplete gamma function raises a `ZeroDivisionError` when
X is too small.

This was caused by the liberal use of the `round` function when
calculating the iterations needed to perform the simpson's rule.

Now, with this change, if the X is too small, it rounds it to
the first decimal, then rounds it again after multiplying it by
10_000. Now, if the second round still give us zero, then we default
to 100_000 iterations instead, as we must assume it's a pretty small
value that needs to be accounted for.

* fix: Update p-value in certain tests due to the increased iterations.
---
 lib/math.rb                                   |  5 ++++-
 spec/statistics/bigdecimal_spec.rb            |  2 +-
 spec/statistics/math_spec.rb                  | 20 +++++++++++++++++++
 .../statistical_test/chi_squared_test_spec.rb | 20 ++++++++++++++++++-
 4 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/lib/math.rb b/lib/math.rb
index a86d1e3..bf1bf63 100644
--- a/lib/math.rb
+++ b/lib/math.rb
@@ -46,7 +46,10 @@ def self.simpson_rule(a, b, n, &block)
 
   def self.lower_incomplete_gamma_function(s, x)
     # The greater the iterations, the better. That's why we are iterating 10_000 * x times
-    self.simpson_rule(0, x.to_r, (10_000 * x.round).round) do |t|
+    iterator = (10_000 * x.round(1)).round
+    iterator = 100_000 if iterator.zero?
+
+    self.simpson_rule(0, x.to_r, iterator) do |t|
       (t ** (s - 1)) * Math.exp(-t)
     end
   end
diff --git a/spec/statistics/bigdecimal_spec.rb b/spec/statistics/bigdecimal_spec.rb
index 832ed87..12f71ba 100644
--- a/spec/statistics/bigdecimal_spec.rb
+++ b/spec/statistics/bigdecimal_spec.rb
@@ -96,7 +96,7 @@
       expected = BigDecimal(100, 1)
       result = StatisticalTest::ChiSquaredTest.goodness_of_fit(0.05, expected, observed_counts)
 
-      expect(result[:p_value]).to eq -6.509459637982218e-12
+      expect(result[:p_value]).to eq -6.5354388567584465e-12
       expect(result[:null]).to be false
       expect(result[:alternative]).to be true
     end
diff --git a/spec/statistics/math_spec.rb b/spec/statistics/math_spec.rb
index e310fb4..21f3250 100644
--- a/spec/statistics/math_spec.rb
+++ b/spec/statistics/math_spec.rb
@@ -85,6 +85,26 @@
         ).to eq results[index]
       end
     end
+
+    # The following context is based on the numbers reported in https://github.com/estebanz01/ruby-statistics/issues/78
+    # which give us a minimum test case scenario where the integral being solved with simpson's rule
+    # uses zero iterations, raising errors.
+    context 'When X for the lower incomplete gamma function is rounded to zero' do
+      let(:s_parameter) { 4.5 }
+      let(:x) { (52/53).to_r }
+
+      it 'does not try to perform a division by zero' do
+        expect do
+          described_class.lower_incomplete_gamma_function(s_parameter, x)
+        end.not_to raise_error
+      end
+
+      it "tries to solve the function using simpson's rule with at least 100_000 iterations" do
+        expect(described_class).to receive(:simpson_rule).with(0, x, 100_000)
+
+        described_class.lower_incomplete_gamma_function(s_parameter, x)
+      end
+    end
   end
 
   describe '.beta_function' do
diff --git a/spec/statistics/statistical_test/chi_squared_test_spec.rb b/spec/statistics/statistical_test/chi_squared_test_spec.rb
index d428d9b..0ebde99 100644
--- a/spec/statistics/statistical_test/chi_squared_test_spec.rb
+++ b/spec/statistics/statistical_test/chi_squared_test_spec.rb
@@ -48,7 +48,7 @@
       expected = 100 # this is equal to [100, 100, 100, 100, 100, 100]
       result = described_class.goodness_of_fit(0.05, expected, observed_counts)
 
-      expect(result[:p_value]).to eq -6.509237593377293e-12
+      expect(result[:p_value]).to eq -6.5358829459682966e-12
       expect(result[:null]).to be false
       expect(result[:alternative]).to be true
     end
@@ -63,5 +63,23 @@
       expect(result[:null]).to be true
       expect(result[:alternative]).to be false
     end
+
+    # The following test is based on the numbers reported in https://github.com/estebanz01/ruby-statistics/issues/78
+    # which give us a minimum test case scenario where the integral being solved with simpson's rule
+    # uses zero iterations, raising errors.
+    it 'performs a goodness of fit test with values that generates small chi statistics' do
+      observed_counts = [481, 483, 482, 488, 478, 471, 477, 479, 475, 462]
+      expected = 477
+
+      result = {}
+
+      expect do
+        result = described_class.goodness_of_fit(0.01, expected, observed_counts)
+      end.not_to raise_error
+
+      expect(result[:p_value].round(4)).to eq(0.9995)
+      expect(result[:null]).to be true
+      expect(result[:alternative]).to be false
+    end
   end
 end