Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Decay Function Improvements #185

Merged
Merged
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
198 changes: 153 additions & 45 deletions minisom.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,39 +71,92 @@ def fast_norm(x):
return sqrt(dot(x, x.T))


def asymptotic_decay(learning_rate, t, max_iter):
BrandonGarciaWx marked this conversation as resolved.
Show resolved Hide resolved
"""Decay function of the learning process.
def inverse_decay_to_zero(learning_rate, t, max_iter):
"""Decay function of the learning process that asymptotically
approaches zero.

Parameters
----------
learning_rate : float
current learning rate.
Current learning rate.

t : int
current iteration.
Current iteration.

max_iter : int
maximum number of iterations for the training.
Maximum number of iterations for the training.
"""
return learning_rate / (1+t/(max_iter/2))
C = max_iter / 100.0
return learning_rate * C / (C + t)


def linear_decay(learning_rate, t, max_iter):
return learning_rate * (1 - t/max_iter)
def linear_decay_to_zero(learning_rate, t, max_iter):
"""Decay function of the learning process that linearly
decreases to zero.

Parameters
----------
learning_rate : float
Current learning rate.

t : int
Current iteration.

def inverse_time_decay(learning_rate, t, max_iter):
C = max_iter / 100.0
return learning_rate * C / (C+t)
max_iter : int
Maximum number of iterations for the training.
"""
return learning_rate * (1 - t / max_iter)


def inverse_decay_to_one(sigma, t, max_iter):
BrandonGarciaWx marked this conversation as resolved.
Show resolved Hide resolved
"""Decay function of sigma that asymptotically approaches one.

Parameters
----------
sigma : float
Current sigma.

t : int
Current iteration.

max_iter : int
Maximum number of iterations for the training.
"""
C = (sigma - 1) / max_iter
return sigma / (1 + (t * C))


def asymptotic_decay(dynamic_parameter, t, max_iter):
"""Legacy default decay function of the learning process
and sigma that decays these values asymptotically to 1/3
of their original values.

Using this function may lead to overfitting for sigma values
BrandonGarciaWx marked this conversation as resolved.
Show resolved Hide resolved
less than three or poor solution convergence for sigma values
greater than three.

Parameters
----------
dynamic_parameter : float
Current learning rate/sigma.

t : int
Current iteration.

max_iter : int
Maximum number of iterations for the training.
"""
return dynamic_parameter / (1 + t / (max_iter / 2))


class MiniSom(object):
Y_HEX_CONV_FACTOR = (3.0 / 2.0) / sqrt(3)

def __init__(self, x, y, input_len, sigma=1.0, learning_rate=0.5,
decay_function=asymptotic_decay,
def __init__(self, x, y, input_len, sigma=None, learning_rate=0.5,
learning_rate_decay_function='inverse_decay_to_zero',
neighborhood_function='gaussian', topology='rectangular',
activation_distance='euclidean', random_seed=None,
sigma_decay_function=asymptotic_decay):
sigma_decay_function='inverse_decay_to_one'):
"""Initializes a Self Organizing Maps.

A rule of thumb to set the size of the grid for a dimensionality
Expand All @@ -124,28 +177,35 @@ def __init__(self, x, y, input_len, sigma=1.0, learning_rate=0.5,
input_len : int
Number of the elements of the vectors in input.

sigma : float, optional (default=1.0)
Spread of the neighborhood function, needs to be adequate
to the dimensions of the map.
(at the iteration t we have sigma(t) = sigma / (1 + t/T)
where T is #num_iteration/2)
learning_rate : initial learning rate
(at the iteration t we have
learning_rate(t) = learning_rate / (1 + t/T)
where T is #num_iteration/2)

decay_function : function (default=asymptotic_decay)
Function that reduces learning_rate at each iteration
the default function is:
learning_rate / (1+t/(max_iterarations/2))
sigma : float, optional (default=sqrt(x^2 + y^2))
Spread of the neighborhood function.

Needs to be adequate to the dimensions of the map.

By default, at the iteration t, we have:
sigma(t) = sigma / (1 + (t * (sigma - 1) / max_iter))

learning_rate : float, optional (default=0.5)
Initial learning rate.

Adequate values are dependent on the data used for training.

By default, at the iteration t, we have:
learning_rate(t) = learning_rate / (1 + t * (100 / max_iter))

learning_rate_decay_function : string, optional
(default='inverse_decay_to_zero')
BrandonGarciaWx marked this conversation as resolved.
Show resolved Hide resolved
Function that reduces learning_rate at each iteration.

The default function is:
learning_rate(t) = learning_rate / (1 + t * (100 / max_iter))

A custom decay function will need to to take in input
three parameters in the following order:

1. learning rate
2. current iteration
3. maximum number of iterations allowed

1. Learning rate
2. Current iteration
3. Maximum number of iterations allowed

Note that if a lambda function is used to define the decay
MiniSom will not be pickable anymore.
Expand All @@ -170,11 +230,31 @@ def euclidean(x, w):
random_seed : int, optional (default=None)
Random seed to use.

sigma_decay_function : function (default=asymptotic_decay)
sigma_decay_function : string, optional
BrandonGarciaWx marked this conversation as resolved.
Show resolved Hide resolved
(default='inverse_decay_to_one')
Function that reduces sigma at each iteration.

The default function is:
sigma(t) = sigma / (1 + (t * (sigma - 1) / max_iter))

A custom decay function will need to to take in input
BrandonGarciaWx marked this conversation as resolved.
Show resolved Hide resolved
three parameters in the following order:

1. Sigma
2. Current iteration
3. Maximum number of iterations allowed

To prevent overfitting, custom decay functions should not
BrandonGarciaWx marked this conversation as resolved.
Show resolved Hide resolved
decay to zero. Ending with a sigma value greater than one
may also lead to poor solutions.

Note that if a lambda function is used to define the decay
MiniSom will not be pickable anymore.
"""
if sigma is None:
sigma = sqrt(x*x + y*y)
if sigma > sqrt(x*x + y*y):
warn('Warning: sigma might be too high' +
warn('Warning: sigma might be too high ' +
'for the dimension of the map.')

self._random_generator = random.RandomState(random_seed)
Expand Down Expand Up @@ -204,8 +284,32 @@ def euclidean(x, w):
warn('triangle neighborhood function does not ' +
'take in account hexagonal topology')

self._decay_function = decay_function
self._sigma_decay_function = sigma_decay_function
lr_decay_functions = {'inverse_decay_to_zero': inverse_decay_to_zero,
'linear_decay_to_zero': linear_decay_to_zero,
'asymptotic_decay': asymptotic_decay}

if learning_rate_decay_function not in lr_decay_functions:
msg = '%s not supported. Functions available: %s'
raise ValueError(msg % (learning_rate_decay_function,
', '.join(lr_decay_functions.keys())))

self._learning_rate_decay_function = \
lr_decay_functions[learning_rate_decay_function]

sig_decay_functions = {'inverse_decay_to_one': inverse_decay_to_one,
'asymptotic_decay': asymptotic_decay}

if sigma_decay_function not in sig_decay_functions:
msg = '%s not supported. Functions available: %s'
raise ValueError(msg % (sigma_decay_function,
', '.join(sig_decay_functions.keys())))

if sigma_decay_function in ['asymptotic_decay']:
warn('using this legacy function may lead to overfitting for ' +
'sigma values less than three or poor solution convergence ' +
'for sigma values greater than three')

self._sigma_decay_function = sig_decay_functions[sigma_decay_function]

neig_functions = {'gaussian': self._gaussian,
'mexican_hat': self._mexican_hat,
Expand Down Expand Up @@ -352,8 +456,8 @@ def update(self, x, win, t, max_iteration):
If use_epochs is False:
Maximum number of iterations (one iteration per sample).
"""
eta = self._decay_function(self._learning_rate, t, max_iteration)
# sigma and learning rate decrease with the same rule
eta = self._learning_rate_decay_function(self._learning_rate,
t, max_iteration)
sig = self._sigma_decay_function(self._sigma, t, max_iteration)
# improves the performances
g = self.neighborhood(win, sig)*eta
Expand Down Expand Up @@ -667,15 +771,19 @@ def setUp(self):
self.hex_som._weights[i, j]))
self.hex_som._weights = zeros((5, 5, 1)) # fake weights

def test_asymptotic_decay_function(self):
assert asymptotic_decay(1., 2., 3.) == 1./(1.+2./(3./2))
def test_linear_decay_to_zero_function(self):
assert linear_decay_to_zero(1, 2, 3) == 1 * (1 - 2 / 3)

def test_inverse_decay_to_zero_function(self):
C = 3 / 100
assert inverse_decay_to_zero(1, 2, 3) == 1 * C / (C + 2)

def test_linear_decay_function(self):
assert linear_decay(1., 2., 3.) == 1.*(1.-2./3)
def test_inverse_decay_to_one_function(self):
C = (1 - 1) / 3
assert inverse_decay_to_one(1, 2, 3) == 1 / (1 + (2 * C))

def test_inverse_time_function(self):
C = 3 / 100.
assert inverse_time_decay(1., 2., 3.) == 1. * C / (C + 2)
def test_asymptotic_decay_function(self):
assert asymptotic_decay(1, 2, 3) == 1 / (1 + 2 / (3 / 2))

def test_fast_norm(self):
assert fast_norm(array([1, 3])) == sqrt(1+9)
Expand Down
Loading