Issue 78: Solve Division by Zero when performing a chi squared good f…

…itness test. (#79) * test: Write tests for the ZeroDivisionError ocurred on chi squared. * fix: Implement zero division bugfix in lower gamma function calculation. This change implements a fix, discovered in #78 , where the incomplete gamma function raises a `ZeroDivisionError` when X is too small. This was caused by the liberal use of the `round` function when calculating the iterations needed to perform the simpson's rule. Now, with this change, if the X is too small, it rounds it to the first decimal, then rounds it again after multiplying it by 10_000. Now, if the second round still give us zero, then we default to 100_000 iterations instead, as we must assume it's a pretty small value that needs to be accounted for. * fix: Update p-value in certain tests due to the increased iterations.
estebanz01 · Oct 31, 2022 · 5ae8a05 · 5ae8a05
1 parent 4d0372b
commit 5ae8a05
Show file tree

Hide file tree

Showing 4 changed files with 44 additions and 3 deletions.
diff --git a/lib/math.rb b/lib/math.rb
@@ -46,7 +46,10 @@ def self.simpson_rule(a, b, n, &block)
 
   def self.lower_incomplete_gamma_function(s, x)
     # The greater the iterations, the better. That's why we are iterating 10_000 * x times
-    self.simpson_rule(0, x.to_r, (10_000 * x.round).round) do |t|
+    iterator = (10_000 * x.round(1)).round
+    iterator = 100_000 if iterator.zero?
+
+    self.simpson_rule(0, x.to_r, iterator) do |t|
       (t ** (s - 1)) * Math.exp(-t)
     end
   end

diff --git a/spec/statistics/bigdecimal_spec.rb b/spec/statistics/bigdecimal_spec.rb
@@ -96,7 +96,7 @@
       expected = BigDecimal(100, 1)
       result = StatisticalTest::ChiSquaredTest.goodness_of_fit(0.05, expected, observed_counts)
 
-      expect(result[:p_value]).to eq -6.509459637982218e-12
+      expect(result[:p_value]).to eq -6.5354388567584465e-12
       expect(result[:null]).to be false
       expect(result[:alternative]).to be true
     end

diff --git a/spec/statistics/math_spec.rb b/spec/statistics/math_spec.rb
@@ -85,6 +85,26 @@
         ).to eq results[index]
       end
     end
+
+    # The following context is based on the numbers reported in https://github.com/estebanz01/ruby-statistics/issues/78
+    # which give us a minimum test case scenario where the integral being solved with simpson's rule
+    # uses zero iterations, raising errors.
+    context 'When X for the lower incomplete gamma function is rounded to zero' do
+      let(:s_parameter) { 4.5 }
+      let(:x) { (52/53).to_r }
+
+      it 'does not try to perform a division by zero' do
+        expect do
+          described_class.lower_incomplete_gamma_function(s_parameter, x)
+        end.not_to raise_error
+      end
+
+      it "tries to solve the function using simpson's rule with at least 100_000 iterations" do
+        expect(described_class).to receive(:simpson_rule).with(0, x, 100_000)
+
+        described_class.lower_incomplete_gamma_function(s_parameter, x)
+      end
+    end
   end
 
   describe '.beta_function' do

diff --git a/spec/statistics/statistical_test/chi_squared_test_spec.rb b/spec/statistics/statistical_test/chi_squared_test_spec.rb
@@ -48,7 +48,7 @@
       expected = 100 # this is equal to [100, 100, 100, 100, 100, 100]
       result = described_class.goodness_of_fit(0.05, expected, observed_counts)
 
-      expect(result[:p_value]).to eq -6.509237593377293e-12
+      expect(result[:p_value]).to eq -6.5358829459682966e-12
       expect(result[:null]).to be false
       expect(result[:alternative]).to be true
     end
@@ -63,5 +63,23 @@
       expect(result[:null]).to be true
       expect(result[:alternative]).to be false
     end
+
+    # The following test is based on the numbers reported in https://github.com/estebanz01/ruby-statistics/issues/78
+    # which give us a minimum test case scenario where the integral being solved with simpson's rule
+    # uses zero iterations, raising errors.
+    it 'performs a goodness of fit test with values that generates small chi statistics' do
+      observed_counts = [481, 483, 482, 488, 478, 471, 477, 479, 475, 462]
+      expected = 477
+
+      result = {}
+
+      expect do
+        result = described_class.goodness_of_fit(0.01, expected, observed_counts)
+      end.not_to raise_error
+
+      expect(result[:p_value].round(4)).to eq(0.9995)
+      expect(result[:null]).to be true
+      expect(result[:alternative]).to be false
+    end
   end
 end