JustGlowing · JustGlowing · Apr 24, 2024 · Apr 16, 2024 · Apr 16, 2024 · Apr 18, 2024
diff --git a/minisom.py b/minisom.py
@@ -71,39 +71,92 @@ def fast_norm(x):
     return sqrt(dot(x, x.T))
 
 
-def asymptotic_decay(learning_rate, t, max_iter):
-    """Decay function of the learning process.
+def inverse_decay_to_zero(learning_rate, t, max_iter):
+    """Decay function of the learning process that asymptotically
+    approaches zero.
+
     Parameters
     ----------
     learning_rate : float
-        current learning rate.
+        Current learning rate.
 
     t : int
-        current iteration.
+        Current iteration.
 
     max_iter : int
-        maximum number of iterations for the training.
+        Maximum number of iterations for the training.
     """
-    return learning_rate / (1+t/(max_iter/2))
+    C = max_iter / 100.0
+    return learning_rate * C / (C + t)
 
 
-def linear_decay(learning_rate, t, max_iter):
-    return learning_rate * (1 - t/max_iter)
+def linear_decay_to_zero(learning_rate, t, max_iter):
+    """Decay function of the learning process that linearly
+    decreases to zero.
+
+    Parameters
+    ----------
+    learning_rate : float
+        Current learning rate.
 
+    t : int
+        Current iteration.
 
-def inverse_time_decay(learning_rate, t, max_iter):
-    C = max_iter / 100.0
-    return learning_rate * C / (C+t)
+    max_iter : int
+        Maximum number of iterations for the training.
+    """
+    return learning_rate * (1 - t / max_iter)
+
+
+def inverse_decay_to_one(sigma, t, max_iter):
+    """Decay function of sigma that asymptotically approaches one.
+
+    Parameters
+    ----------
+    sigma : float
+        Current sigma.
+
+    t : int
+        Current iteration.
+
+    max_iter : int
+        Maximum number of iterations for the training.
+    """
+    C = (sigma - 1) / max_iter
+    return sigma / (1 + (t * C))
+
+
+def asymptotic_decay(dynamic_parameter, t, max_iter):
+    """Legacy default decay function of the learning process
+    and sigma that decays these values asymptotically to 1/3
+    of their original values.
+
+    Using this function may lead to overfitting for sigma values
+    less than three or poor solution convergence for sigma values
+    greater than three.
+
+    Parameters
+    ----------
+    dynamic_parameter : float
+        Current learning rate/sigma.
+
+    t : int
+        Current iteration.
+
+    max_iter : int
+        Maximum number of iterations for the training.
+    """
+    return dynamic_parameter / (1 + t / (max_iter / 2))
 
 
 class MiniSom(object):
     Y_HEX_CONV_FACTOR = (3.0 / 2.0) / sqrt(3)
 
-    def __init__(self, x, y, input_len, sigma=1.0, learning_rate=0.5,
-                 decay_function=asymptotic_decay,
+    def __init__(self, x, y, input_len, sigma=None, learning_rate=0.5,
+                 learning_rate_decay_function='inverse_decay_to_zero',
                  neighborhood_function='gaussian', topology='rectangular',
                  activation_distance='euclidean', random_seed=None,
-                 sigma_decay_function=asymptotic_decay):
+                 sigma_decay_function='inverse_decay_to_one'):
         """Initializes a Self Organizing Maps.
 
         A rule of thumb to set the size of the grid for a dimensionality
@@ -124,28 +177,35 @@ def __init__(self, x, y, input_len, sigma=1.0, learning_rate=0.5,
         input_len : int
             Number of the elements of the vectors in input.
 
-        sigma : float, optional (default=1.0)
-            Spread of the neighborhood function, needs to be adequate
-            to the dimensions of the map.
-            (at the iteration t we have sigma(t) = sigma / (1 + t/T)
-            where T is #num_iteration/2)
-        learning_rate : initial learning rate
-            (at the iteration t we have
-            learning_rate(t) = learning_rate / (1 + t/T)
-            where T is #num_iteration/2)
-
-        decay_function : function (default=asymptotic_decay)
-            Function that reduces learning_rate at each iteration
-            the default function is:
-                        learning_rate / (1+t/(max_iterarations/2))
+        sigma : float, optional (default=sqrt(x^2 + y^2))
+            Spread of the neighborhood function.
+
+            Needs to be adequate to the dimensions of the map.
+
+            By default, at the iteration t, we have:
+                sigma(t) = sigma / (1 + (t * (sigma - 1) / max_iter))
+
+        learning_rate : float, optional (default=0.5)
+            Initial learning rate.
+
+            Adequate values are dependent on the data used for training.
+
+            By default, at the iteration t, we have:
+                learning_rate(t) = learning_rate / (1 + t * (100 / max_iter))
+
+        learning_rate_decay_function : string, optional
+        (default='inverse_decay_to_zero')
+            Function that reduces learning_rate at each iteration.
+
+            The default function is:
+                learning_rate(t) = learning_rate / (1 + t * (100 / max_iter))
 
             A custom decay function will need to to take in input
             three parameters in the following order:
 
-            1. learning rate
-            2. current iteration
-            3. maximum number of iterations allowed
-
+            1. Learning rate
+            2. Current iteration
+            3. Maximum number of iterations allowed
 
             Note that if a lambda function is used to define the decay
             MiniSom will not be pickable anymore.
@@ -170,11 +230,31 @@ def euclidean(x, w):
         random_seed : int, optional (default=None)
             Random seed to use.
 
-        sigma_decay_function : function (default=asymptotic_decay)
+        sigma_decay_function : string, optional
+        (default='inverse_decay_to_one')
             Function that reduces sigma at each iteration.
+
+            The default function is:
+                sigma(t) = sigma / (1 + (t * (sigma - 1) / max_iter))
+
+            A custom decay function will need to to take in input
+            three parameters in the following order:
+
+            1. Sigma
+            2. Current iteration
+            3. Maximum number of iterations allowed
+
+            To prevent overfitting, custom decay functions should not
+            decay to zero. Ending with a sigma value greater than one
+            may also lead to poor solutions.
+
+            Note that if a lambda function is used to define the decay
+            MiniSom will not be pickable anymore.
         """
+        if sigma is None:
+            sigma = sqrt(x*x + y*y)
         if sigma > sqrt(x*x + y*y):
-            warn('Warning: sigma might be too high' +
+            warn('Warning: sigma might be too high ' +
                  'for the dimension of the map.')
 
         self._random_generator = random.RandomState(random_seed)
@@ -204,8 +284,32 @@ def euclidean(x, w):
                 warn('triangle neighborhood function does not ' +
                      'take in account hexagonal topology')
 
-        self._decay_function = decay_function
-        self._sigma_decay_function = sigma_decay_function
+        lr_decay_functions = {'inverse_decay_to_zero': inverse_decay_to_zero,
+                              'linear_decay_to_zero': linear_decay_to_zero,
+                              'asymptotic_decay': asymptotic_decay}
+
+        if learning_rate_decay_function not in lr_decay_functions:
+            msg = '%s not supported. Functions available: %s'
+            raise ValueError(msg % (learning_rate_decay_function,
+                                    ', '.join(lr_decay_functions.keys())))
+
+        self._learning_rate_decay_function = \
+            lr_decay_functions[learning_rate_decay_function]
+
+        sig_decay_functions = {'inverse_decay_to_one': inverse_decay_to_one,
+                               'asymptotic_decay': asymptotic_decay}
+
+        if sigma_decay_function not in sig_decay_functions:
+            msg = '%s not supported. Functions available: %s'
+            raise ValueError(msg % (sigma_decay_function,
+                                    ', '.join(sig_decay_functions.keys())))
+
+        if sigma_decay_function in ['asymptotic_decay']:
+            warn('using this legacy function may lead to overfitting for ' +
+                 'sigma values less than three or poor solution convergence ' +
+                 'for sigma values greater than three')
+
+        self._sigma_decay_function = sig_decay_functions[sigma_decay_function]
 
         neig_functions = {'gaussian': self._gaussian,
                           'mexican_hat': self._mexican_hat,
@@ -352,8 +456,8 @@ def update(self, x, win, t, max_iteration):
             If use_epochs is False:
                 Maximum number of iterations (one iteration per sample).
         """
-        eta = self._decay_function(self._learning_rate, t, max_iteration)
-        # sigma and learning rate decrease with the same rule
+        eta = self._learning_rate_decay_function(self._learning_rate,
+                                                 t, max_iteration)
         sig = self._sigma_decay_function(self._sigma, t, max_iteration)
         # improves the performances
         g = self.neighborhood(win, sig)*eta
@@ -667,15 +771,19 @@ def setUp(self):
                     self.hex_som._weights[i, j]))
         self.hex_som._weights = zeros((5, 5, 1))  # fake weights
 
-    def test_asymptotic_decay_function(self):
-        assert asymptotic_decay(1., 2., 3.) == 1./(1.+2./(3./2))
+    def test_linear_decay_to_zero_function(self):
+        assert linear_decay_to_zero(1, 2, 3) == 1 * (1 - 2 / 3)
+
+    def test_inverse_decay_to_zero_function(self):
+        C = 3 / 100
+        assert inverse_decay_to_zero(1, 2, 3) == 1 * C / (C + 2)
 
-    def test_linear_decay_function(self):
-        assert linear_decay(1., 2., 3.) == 1.*(1.-2./3)
+    def test_inverse_decay_to_one_function(self):
+        C = (1 - 1) / 3
+        assert inverse_decay_to_one(1, 2, 3) == 1 / (1 + (2 * C))
 
-    def test_inverse_time_function(self):
-        C = 3 / 100.
-        assert inverse_time_decay(1., 2., 3.) == 1. * C / (C + 2)
+    def test_asymptotic_decay_function(self):
+        assert asymptotic_decay(1, 2, 3) == 1 / (1 + 2 / (3 / 2))
 
     def test_fast_norm(self):
         assert fast_norm(array([1, 3])) == sqrt(1+9)