Skip to content

Commit

Permalink
add adamax for torch (keras-team#549)
Browse files Browse the repository at this point in the history
Co-authored-by: Haifeng Jin <[email protected]>
  • Loading branch information
haifeng-jin and haifeng-jin authored Jul 19, 2023
1 parent dc9d822 commit 8514729
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 4 deletions.
52 changes: 52 additions & 0 deletions keras_core/backend/torch/optimizers/torch_adamax.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import torch

from keras_core import ops
from keras_core import optimizers
from keras_core.backend.torch.optimizers import torch_parallel_optimizer


class Adamax(
torch_parallel_optimizer.TorchParallelOptimizer, optimizers.Adamax
):
def _parallel_update_step(
self,
grads,
variables,
learning_rate,
):
keras_variables = variables
variables = [v.value for v in variables]

dtype = variables[0].dtype
lr = ops.cast(learning_rate, dtype)

local_step = ops.cast(self.iterations + 1, dtype)

beta_1_power = ops.power(ops.cast(self.beta_1, dtype), local_step)

m_list = [
self._m[self._get_variable_index(variable)].value
for variable in keras_variables
]
u_list = [
self._u[self._get_variable_index(variable)].value
for variable in keras_variables
]

torch._foreach_mul_(m_list, self.beta_1)
torch._foreach_add_(m_list, grads, alpha=1 - self.beta_1)

torch._foreach_mul_(u_list, self.beta_2)
torch._foreach_maximum_(u_list, torch._foreach_abs(grads))

torch._foreach_add_(
variables,
torch._foreach_div(
torch._foreach_mul(m_list, lr),
torch._foreach_mul(
torch._foreach_add(u_list, self.epsilon),
1 - beta_1_power,
),
),
alpha=-1,
)
2 changes: 2 additions & 0 deletions keras_core/backend/torch/optimizers/torch_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ def __new__(cls, *args, **kwargs):
from keras_core.backend.torch.optimizers import torch_adadelta
from keras_core.backend.torch.optimizers import torch_adagrad
from keras_core.backend.torch.optimizers import torch_adam
from keras_core.backend.torch.optimizers import torch_adamax
from keras_core.backend.torch.optimizers import torch_adamw
from keras_core.backend.torch.optimizers import torch_rmsprop
from keras_core.backend.torch.optimizers import torch_sgd
Expand All @@ -18,6 +19,7 @@ def __new__(cls, *args, **kwargs):
optimizers.Adadelta: torch_adadelta.Adadelta,
optimizers.Adagrad: torch_adagrad.Adagrad,
optimizers.Adam: torch_adam.Adam,
optimizers.Adamax: torch_adamax.Adamax,
optimizers.AdamW: torch_adamw.AdamW,
optimizers.RMSprop: torch_rmsprop.RMSprop,
optimizers.SGD: torch_sgd.SGD,
Expand Down
9 changes: 5 additions & 4 deletions keras_core/optimizers/adamax_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import numpy as np

from keras_core import backend
from keras_core import ops
from keras_core import testing
from keras_core.optimizers.adamax import Adamax

Expand All @@ -20,14 +21,14 @@ def test_config(self):

def test_single_step(self):
optimizer = Adamax(learning_rate=0.5)
grads = np.array([1.0, 6.0, 7.0, 2.0])
grads = ops.array([1.0, 6.0, 7.0, 2.0])
vars = backend.Variable([1.0, 2.0, 3.0, 4.0])
optimizer.apply_gradients(zip([grads], [vars]))
self.assertAllClose(vars, [0.5, 1.5, 2.5, 3.5], rtol=1e-4, atol=1e-4)

def test_weight_decay(self):
grads, var1, var2, var3 = (
np.zeros(()),
ops.zeros(()),
backend.Variable(2.0),
backend.Variable(2.0, name="exclude"),
backend.Variable(2.0),
Expand All @@ -53,8 +54,8 @@ def test_correctness_with_golden(self):
)

x = backend.Variable(np.ones([10]))
grads = np.arange(0.1, 1.1, 0.1)
first_grads = np.full((10,), 0.01)
grads = ops.arange(0.1, 1.1, 0.1)
first_grads = ops.full((10,), 0.01)

# fmt: off
golden = np.array(
Expand Down

0 comments on commit 8514729

Please sign in to comment.