From bb1bcacc11adb3b946bf33d1c31519ffc1bc94e1 Mon Sep 17 00:00:00 2001
From: Carson Zhang <carsonzhang4@gmail.com>
Date: Thu, 16 Jan 2025 19:40:59 +0100
Subject: [PATCH] feat/callback-lr_schedule (#317)

---
 DESCRIPTION                                  |   1 +
 NAMESPACE                                    |   1 +
 NEWS.md                                      |   1 +
 R/CallbackSetLRScheduler.R                   | 188 +++++++++++++++++++
 R/TorchCallback.R                            |   9 +-
 R/TorchDescriptor.R                          |  10 +-
 man/TorchCallback.Rd                         |   6 +-
 man/TorchDescriptor.Rd                       |   6 +-
 man/mlr_callback_set.lr_scheduler.Rd         |  92 +++++++++
 man/mlr_learners.mlp.Rd                      |   1 -
 man/mlr_learners.tab_resnet.Rd               |   1 -
 man/mlr_learners.torch_featureless.Rd        |   3 +-
 man/mlr_learners.torchvision.Rd              |   1 -
 man/mlr_learners_torch.Rd                    |   1 -
 man/mlr_learners_torch_image.Rd              |   1 -
 man/mlr_learners_torch_model.Rd              |   1 -
 man/torch_callback.Rd                        |   2 +-
 tests/testthat/helper_autotest.R             |  12 +-
 tests/testthat/test_CallbackSetLRScheduler.R |  84 +++++++++
 19 files changed, 400 insertions(+), 21 deletions(-)
 create mode 100644 R/CallbackSetLRScheduler.R
 create mode 100644 man/mlr_callback_set.lr_scheduler.Rd
 create mode 100644 tests/testthat/test_CallbackSetLRScheduler.R

diff --git a/DESCRIPTION b/DESCRIPTION
index 1a1eccaa..4ecd6bae 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -85,6 +85,7 @@ Collate:
     'CallbackSetCheckpoint.R'
     'CallbackSetEarlyStopping.R'
     'CallbackSetHistory.R'
+    'CallbackSetLRScheduler.R'
     'CallbackSetProgress.R'
     'CallbackSetTB.R'
     'CallbackSetUnfreeze.R'
diff --git a/NAMESPACE b/NAMESPACE
index f68d6a30..492b5139 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -63,6 +63,7 @@ S3method(unmarshal_model,learner_torch_model_marshaled)
 export(CallbackSet)
 export(CallbackSetCheckpoint)
 export(CallbackSetHistory)
+export(CallbackSetLRScheduler)
 export(CallbackSetProgress)
 export(CallbackSetTB)
 export(CallbackSetUnfreeze)
diff --git a/NEWS.md b/NEWS.md
index f0dba4cd..2b77f81b 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -11,6 +11,7 @@
 * feat: Added multimodal melanoma example task
 * feat: Added a callback to iteratively unfreeze parameters for finetuning
 * fix: torch learners can now be used with `AutoTuner`
+* feat: Added different learning rate schedulers as callbacks
 
 # mlr3torch 0.1.2
 
diff --git a/R/CallbackSetLRScheduler.R b/R/CallbackSetLRScheduler.R
new file mode 100644
index 00000000..8269ff6f
--- /dev/null
+++ b/R/CallbackSetLRScheduler.R
@@ -0,0 +1,188 @@
+#' @title Learning Rate Scheduling Callback
+#'
+#' @name mlr_callback_set.lr_scheduler
+#'
+#' @description
+#' Changes the learning rate based on the schedule specified by a `torch::lr_scheduler`.
+#'
+#' As of this writing, the following are available: [torch::lr_cosine_annealing()], [torch::lr_lambda()], [torch::lr_multiplicative()], [torch::lr_one_cycle()],
+#' [torch::lr_reduce_on_plateau()], [torch::lr_step()], and custom schedulers defined with [torch::lr_scheduler()].
+#'
+#' @param .scheduler (`lr_scheduler_generator`)\cr
+#'   The `torch` scheduler generator (e.g. `torch::lr_step`).
+#' @param ... (any)\cr
+#'   The scheduler-specific arguments
+#'
+#' @export
+CallbackSetLRScheduler = R6Class("CallbackSetLRScheduler",
+  inherit = CallbackSet,
+  lock_objects = FALSE,
+  public = list(
+    #' @field scheduler_fn (`lr_scheduler_generator`)\cr
+    #' The `torch` function that creates a learning rate scheduler
+    scheduler_fn = NULL,
+    #' @field scheduler (`LRScheduler`)\cr
+    #' The learning rate scheduler wrapped by this callback
+    scheduler = NULL,
+    #' @description
+    #' Creates a new instance of this [R6][R6::R6Class] class.
+    initialize = function(.scheduler, step_on_epoch, ...) {
+      assert_class(.scheduler, "lr_scheduler_generator")
+      assert_flag(step_on_epoch)
+
+      self$scheduler_fn = .scheduler
+      private$.scheduler_args = list(...)
+      if (step_on_epoch) {
+        self$on_epoch_end = function() self$scheduler$step()
+      } else {
+        self$on_batch_end = function() self$scheduler$step()
+      }
+    },
+    #' @description
+    #' Creates the scheduler using the optimizer from the context
+    on_begin = function() {
+      self$scheduler = invoke(self$scheduler_fn, optimizer = self$ctx$optimizer, .args = private$.scheduler_args)
+    }
+  ),
+  private = list(
+    .scheduler_args = NULL
+  )
+)
+
+# some of the schedulers accept lists
+# so they can treat different parameter groups differently
+check_class_or_list = function(x, classname) {
+  if (is.list(x)) check_list(x, types = classname) else check_class(x, classname)
+}
+
+#' @include TorchCallback.R
+mlr3torch_callbacks$add("lr_cosine_annealing", function() {
+  TorchCallback$new(
+    callback_generator = CallbackSetLRScheduler,
+    param_set = ps(
+      T_max = p_int(tags = c("train", "required")),
+      eta_min = p_dbl(default = 0, tags = "train"),
+      last_epoch = p_int(default = -1, tags = "train"),
+      verbose = p_lgl(default = FALSE, tags = "train")
+    ),
+    id = "lr_cosine_annealing",
+    label = "Cosine Annealing LR Scheduler",
+    man = "mlr3torch::mlr_callback_set.lr_scheduler",
+    additional_args = list(.scheduler = torch::lr_cosine_annealing, step_on_epoch = TRUE)
+  )
+})
+
+#' @include TorchCallback.R
+mlr3torch_callbacks$add("lr_lambda", function() {
+  TorchCallback$new(
+    callback_generator = CallbackSetLRScheduler,
+    param_set = ps(
+      lr_lambda = p_uty(tags = c("train", "required"), custom_check = function(x) check_class_or_list(x, "function")),
+      last_epoch = p_int(default = -1, tags = "train"),
+      verbose = p_lgl(default = FALSE, tags = "train")
+    ),
+    id = "lr_scheduler",
+    label = "Multiplication by Function LR Scheduler",
+    man = "mlr3torch::mlr_callback_set.lr_scheduler",
+    additional_args = list(.scheduler = torch::lr_lambda, step_on_epoch = TRUE)
+  )
+})
+
+#' @include TorchCallback.R
+mlr3torch_callbacks$add("lr_multiplicative", function() {
+  TorchCallback$new(
+    callback_generator = CallbackSetLRScheduler,
+    param_set = ps(
+      lr_lambda = p_uty(tags = c("train", "required"), custom_check = function(x) check_class_or_list(x, "function")),
+      last_epoch = p_int(default = -1, tags = "train"),
+      verbose = p_lgl(default = FALSE, tags = "train")
+    ),
+    id = "lr_multiplicative",
+    label = "Multiplication by Factor LR Scheduler",
+    man = "mlr3torch::mlr_callback_set.lr_scheduler",
+    additional_args = list(.scheduler = torch::lr_multiplicative, step_on_epoch = TRUE)
+  )
+})
+
+#' @include TorchCallback.R
+mlr3torch_callbacks$add("lr_one_cycle", function() {
+  TorchCallback$new(
+    callback_generator = CallbackSetLRScheduler,
+    param_set = ps(
+      max_lr = p_uty(tags = c("train", "required"), custom_check = function(x) check_class_or_list(x, "numeric")),
+      total_steps = p_int(default = NULL, special_vals = list(NULL), tags = "train"),
+      epochs = p_int(default = NULL, special_vals = list(NULL), tags = "train"),
+      steps_per_epoch = p_int(default = NULL, special_vals = list(NULL), tags = "train"),
+      pct_start = p_dbl(default = 0.3, tags = "train"),
+      anneal_strategy = p_fct(default = "cos", levels = c("cos", "linear")), # this is a string in the torch fn
+      cycle_momentum = p_lgl(default = TRUE, tags = "train"),
+      base_momentum = p_uty(default = 0.85, tags = "train", custom_check = function(x) check_class_or_list(x, "numeric")),
+      max_momentum = p_uty(default = 0.95, tags = "train", custom_check = function(x) check_class_or_list(x, "numeric")),
+      div_factor = p_dbl(default = 25, tags = "train"),
+      final_div_factor = p_dbl(default = 1e4, tags = "train"),
+      verbose = p_lgl(default = FALSE, tags = "train")
+    ),
+    id = "lr_one_cycle",
+    label = "1cyle LR Scheduler",
+    man = "mlr3torch::mlr_callback_set.lr_scheduler",
+    additional_args = list(.scheduler = torch::lr_one_cycle, step_on_epoch = FALSE)
+  )
+})
+
+#' @include TorchCallback.R
+mlr3torch_callbacks$add("lr_reduce_on_plateau", function() {
+  TorchCallback$new(
+    callback_generator = CallbackSetLRScheduler,
+    param_set = ps(
+      mode = p_fct(default = "min", levels = c("min", "max"), tags = "train"),
+      factor = p_dbl(default = 0.1, tags = "train"),
+      patience = p_int(default = 10, tags = "train"),
+      threshold = p_dbl(default = 1e-04, tags = "train"),
+      threshold_mode = p_fct(default = "rel", levels = c("rel", "abs"), tags = "train"),
+      cooldown = p_int(default = 0, tags = "train"),
+      min_lr = p_uty(default = 0, tags = "train", custom_check = function(x) check_class_or_list(x, "numeric")),
+      eps = p_dbl(default = 1e-08, tags = "train"),
+      verbose = p_lgl(default = FALSE, tags = "train")
+    ),
+    id = "lr_reduce_on_plateau",
+    label = "Reduce on Plateau LR Scheduler",
+    man = "mlr3torch::mlr_callback_set.lr_scheduler",
+    additional_args = list(.scheduler = torch::lr_reduce_on_plateau, step_on_epoch = TRUE)
+  )
+})
+
+#' @include TorchCallback.R
+mlr3torch_callbacks$add("lr_step", function() {
+  TorchCallback$new(
+    callback_generator = CallbackSetLRScheduler,
+    param_set = ps(
+      step_size = p_int(tags = c("train", "required")),
+      gamma = p_dbl(default = 0.1, tags = "train"),
+      last_epoch = p_int(default = -1, tags = "train")
+    ),
+    id = "lr_step",
+    label = "Step Decay LR Scheduler",
+    man = "mlr3torch::mlr_callback_set.lr_scheduler",
+    additional_args = list(.scheduler = torch::lr_step, step_on_epoch = TRUE)
+  )
+})
+
+#' @param x (`function`)\cr
+#'   The `torch` scheduler generator defined using `torch::lr_scheduler()`.
+#' @param step_on_epoch (`logical(1)`)\cr
+#'   Whether the scheduler steps after every epoch
+as_lr_scheduler = function(x, step_on_epoch) {
+  assert_class(x, "lr_scheduler_generator")
+  assert_flag(step_on_epoch)
+
+  class_name = class(x)[1L]
+
+  TorchCallback$new(
+    callback_generator = CallbackSetLRScheduler,
+    param_set = inferps(x),
+    id = if (class_name == "") "lr_custom" else class_name,
+    label = "Custom LR Scheduler",
+    man = "mlr3torch::mlr_callback_set.lr_scheduler",
+    additional_args = list(.scheduler = x, step_on_epoch = step_on_epoch)
+  )
+}
diff --git a/R/TorchCallback.R b/R/TorchCallback.R
index ce1cf1b8..f7d396d7 100644
--- a/R/TorchCallback.R
+++ b/R/TorchCallback.R
@@ -192,8 +192,10 @@ TorchCallback = R6Class("TorchCallback",
     #' @template param_label
     #' @template param_packages
     #' @template param_man
+    #' @param additional_args (`any`)\cr
+    #'  Additional arguments if necessary. For learning rate schedulers, this is the torch::LRScheduler.
     initialize = function(callback_generator, param_set = NULL, id = NULL,
-      label = NULL, packages = NULL, man = NULL) {
+      label = NULL, packages = NULL, man = NULL, additional_args = NULL) {
       assert_class(callback_generator, "R6ClassGenerator")
 
       param_set = assert_param_set(param_set %??% inferps(callback_generator))
@@ -206,7 +208,8 @@ TorchCallback = R6Class("TorchCallback",
         param_set = param_set,
         packages = union(packages, "mlr3torch"),
         label = label,
-        man = man
+        man = man,
+        additional_args = additional_args
       )
     }
   ),
@@ -215,7 +218,7 @@ TorchCallback = R6Class("TorchCallback",
   )
 )
 
-#' @title Create a Callback Desctiptor
+#' @title Create a Callback Descriptor
 #'
 #' @description
 #' Convenience function to create a custom [`TorchCallback`].
diff --git a/R/TorchDescriptor.R b/R/TorchDescriptor.R
index 1696db95..e8170430 100644
--- a/R/TorchDescriptor.R
+++ b/R/TorchDescriptor.R
@@ -37,7 +37,9 @@ TorchDescriptor = R6Class("TorchDescriptor",
     #' @template param_packages
     #' @template param_label
     #' @template param_man
-    initialize = function(generator, id = NULL, param_set = NULL, packages = NULL, label = NULL, man = NULL) {
+    #' @param additional_args (`list()`)\cr
+    #'  Additional arguments if necessary. For learning rate schedulers, this is the torch::LRScheduler.
+    initialize = function(generator, id = NULL, param_set = NULL, packages = NULL, label = NULL, man = NULL, additional_args = NULL) {
       assert_true(is.function(generator) || inherits(generator, "R6ClassGenerator"))
       self$generator = generator
       self$param_set = assert_r6(param_set, "ParamSet", null.ok = TRUE) %??% inferps(generator)
@@ -63,6 +65,7 @@ TorchDescriptor = R6Class("TorchDescriptor",
       self$id = assert_string(id %??% class(generator)[[1L]], min.chars = 1L)
       self$label = assert_string(label %??% self$id, min.chars = 1L)
       self$packages = assert_names(unique(union(packages, c("torch", "mlr3torch"))), type = "strict")
+      private$.additional_args = assert_list(additional_args, null.ok = TRUE)
     },
     #' @description
     #' Prints the object
@@ -86,9 +89,9 @@ TorchDescriptor = R6Class("TorchDescriptor",
       # The torch generators could also be constructed with the $new() method, but then the return value
       # would be the actual R6 class and not the wrapped function.
       if (is.function(self$generator)) {
-        invoke(self$generator, .args = self$param_set$get_values())
+        invoke(self$generator, .args = c(self$param_set$get_values(), private$.additional_args))
       } else {
-        invoke(self$generator$new, .args = self$param_set$get_values())
+        invoke(self$generator$new, .args = c(self$param_set$get_values(), private$.additional_args))
       }
     },
     #' @description
@@ -107,6 +110,7 @@ TorchDescriptor = R6Class("TorchDescriptor",
     }
   ),
   private = list(
+    .additional_args = NULL,
     .additional_phash_input = function() {
       stopf("Classes inheriting from TorchDescriptor must implement the .additional_phash_input() method.")
     },
diff --git a/man/TorchCallback.Rd b/man/TorchCallback.Rd
index 3af77676..80d8309c 100644
--- a/man/TorchCallback.Rd
+++ b/man/TorchCallback.Rd
@@ -113,7 +113,8 @@ Creates a new instance of this \link[R6:R6Class]{R6} class.
   id = NULL,
   label = NULL,
   packages = NULL,
-  man = NULL
+  man = NULL,
+  additional_args = NULL
 )}\if{html}{\out{</div>}}
 }
 
@@ -138,6 +139,9 @@ The R packages this object depends on.}
 \item{\code{man}}{(\code{character(1)})\cr
 String in the format \verb{[pkg]::[topic]} pointing to a manual page for this object.
 The referenced help package can be opened via method \verb{$help()}.}
+
+\item{\code{additional_args}}{(\code{any})\cr
+Additional arguments if necessary. For learning rate schedulers, this is the torch::LRScheduler.}
 }
 \if{html}{\out{</div>}}
 }
diff --git a/man/TorchDescriptor.Rd b/man/TorchDescriptor.Rd
index 26ad886a..b4871137 100644
--- a/man/TorchDescriptor.Rd
+++ b/man/TorchDescriptor.Rd
@@ -88,7 +88,8 @@ Creates a new instance of this \link[R6:R6Class]{R6} class.
   param_set = NULL,
   packages = NULL,
   label = NULL,
-  man = NULL
+  man = NULL,
+  additional_args = NULL
 )}\if{html}{\out{</div>}}
 }
 
@@ -112,6 +113,9 @@ Label for the new instance.}
 \item{\code{man}}{(\code{character(1)})\cr
 String in the format \verb{[pkg]::[topic]} pointing to a manual page for this object.
 The referenced help package can be opened via method \verb{$help()}.}
+
+\item{\code{additional_args}}{(\code{list()})\cr
+Additional arguments if necessary. For learning rate schedulers, this is the torch::LRScheduler.}
 }
 \if{html}{\out{</div>}}
 }
diff --git a/man/mlr_callback_set.lr_scheduler.Rd b/man/mlr_callback_set.lr_scheduler.Rd
new file mode 100644
index 00000000..9953f5b2
--- /dev/null
+++ b/man/mlr_callback_set.lr_scheduler.Rd
@@ -0,0 +1,92 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/CallbackSetLRScheduler.R
+\name{mlr_callback_set.lr_scheduler}
+\alias{mlr_callback_set.lr_scheduler}
+\alias{CallbackSetLRScheduler}
+\title{Learning Rate Scheduling Callback}
+\description{
+Changes the learning rate based on the schedule specified by a \code{torch::lr_scheduler}.
+
+As of this writing, the following are available: \code{\link[torch:lr_cosine_annealing]{torch::lr_cosine_annealing()}}, \code{\link[torch:lr_lambda]{torch::lr_lambda()}}, \code{\link[torch:lr_multiplicative]{torch::lr_multiplicative()}}, \code{\link[torch:lr_one_cycle]{torch::lr_one_cycle()}},
+\code{\link[torch:lr_reduce_on_plateau]{torch::lr_reduce_on_plateau()}}, \code{\link[torch:lr_step]{torch::lr_step()}}, and custom schedulers defined with \code{\link[torch:lr_scheduler]{torch::lr_scheduler()}}.
+}
+\section{Super class}{
+\code{\link[mlr3torch:CallbackSet]{mlr3torch::CallbackSet}} -> \code{CallbackSetLRScheduler}
+}
+\section{Public fields}{
+\if{html}{\out{<div class="r6-fields">}}
+\describe{
+\item{\code{scheduler_fn}}{(\code{lr_scheduler_generator})\cr
+The \code{torch} function that creates a learning rate scheduler}
+
+\item{\code{scheduler}}{(\code{LRScheduler})\cr
+The learning rate scheduler wrapped by this callback}
+}
+\if{html}{\out{</div>}}
+}
+\section{Methods}{
+\subsection{Public methods}{
+\itemize{
+\item \href{#method-CallbackSetLRScheduler-new}{\code{CallbackSetLRScheduler$new()}}
+\item \href{#method-CallbackSetLRScheduler-on_begin}{\code{CallbackSetLRScheduler$on_begin()}}
+\item \href{#method-CallbackSetLRScheduler-clone}{\code{CallbackSetLRScheduler$clone()}}
+}
+}
+\if{html}{\out{
+<details open><summary>Inherited methods</summary>
+<ul>
+<li><span class="pkg-link" data-pkg="mlr3torch" data-topic="CallbackSet" data-id="load_state_dict"><a href='../../mlr3torch/html/CallbackSet.html#method-CallbackSet-load_state_dict'><code>mlr3torch::CallbackSet$load_state_dict()</code></a></span></li>
+<li><span class="pkg-link" data-pkg="mlr3torch" data-topic="CallbackSet" data-id="print"><a href='../../mlr3torch/html/CallbackSet.html#method-CallbackSet-print'><code>mlr3torch::CallbackSet$print()</code></a></span></li>
+<li><span class="pkg-link" data-pkg="mlr3torch" data-topic="CallbackSet" data-id="state_dict"><a href='../../mlr3torch/html/CallbackSet.html#method-CallbackSet-state_dict'><code>mlr3torch::CallbackSet$state_dict()</code></a></span></li>
+</ul>
+</details>
+}}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-CallbackSetLRScheduler-new"></a>}}
+\if{latex}{\out{\hypertarget{method-CallbackSetLRScheduler-new}{}}}
+\subsection{Method \code{new()}}{
+Creates a new instance of this \link[R6:R6Class]{R6} class.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{CallbackSetLRScheduler$new(.scheduler, step_on_epoch, ...)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{.scheduler}}{(\code{lr_scheduler_generator})\cr
+The \code{torch} scheduler generator (e.g. \code{torch::lr_step}).}
+
+\item{\code{...}}{(any)\cr
+The scheduler-specific arguments}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-CallbackSetLRScheduler-on_begin"></a>}}
+\if{latex}{\out{\hypertarget{method-CallbackSetLRScheduler-on_begin}{}}}
+\subsection{Method \code{on_begin()}}{
+Creates the scheduler using the optimizer from the context
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{CallbackSetLRScheduler$on_begin()}\if{html}{\out{</div>}}
+}
+
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-CallbackSetLRScheduler-clone"></a>}}
+\if{latex}{\out{\hypertarget{method-CallbackSetLRScheduler-clone}{}}}
+\subsection{Method \code{clone()}}{
+The objects of this class are cloneable with this method.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{CallbackSetLRScheduler$clone(deep = FALSE)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{deep}}{Whether to make a deep clone.}
+}
+\if{html}{\out{</div>}}
+}
+}
+}
diff --git a/man/mlr_learners.mlp.Rd b/man/mlr_learners.mlp.Rd
index 276572f1..7f03f5c7 100644
--- a/man/mlr_learners.mlp.Rd
+++ b/man/mlr_learners.mlp.Rd
@@ -107,7 +107,6 @@ Other Learner:
 <details><summary>Inherited methods</summary>
 <ul>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="base_learner"><a href='../../mlr3/html/Learner.html#method-Learner-base_learner'><code>mlr3::Learner$base_learner()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="configure"><a href='../../mlr3/html/Learner.html#method-Learner-configure'><code>mlr3::Learner$configure()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="encapsulate"><a href='../../mlr3/html/Learner.html#method-Learner-encapsulate'><code>mlr3::Learner$encapsulate()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="help"><a href='../../mlr3/html/Learner.html#method-Learner-help'><code>mlr3::Learner$help()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="predict"><a href='../../mlr3/html/Learner.html#method-Learner-predict'><code>mlr3::Learner$predict()</code></a></span></li>
diff --git a/man/mlr_learners.tab_resnet.Rd b/man/mlr_learners.tab_resnet.Rd
index bc7da634..37886d77 100644
--- a/man/mlr_learners.tab_resnet.Rd
+++ b/man/mlr_learners.tab_resnet.Rd
@@ -102,7 +102,6 @@ Other Learner:
 <details><summary>Inherited methods</summary>
 <ul>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="base_learner"><a href='../../mlr3/html/Learner.html#method-Learner-base_learner'><code>mlr3::Learner$base_learner()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="configure"><a href='../../mlr3/html/Learner.html#method-Learner-configure'><code>mlr3::Learner$configure()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="encapsulate"><a href='../../mlr3/html/Learner.html#method-Learner-encapsulate'><code>mlr3::Learner$encapsulate()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="help"><a href='../../mlr3/html/Learner.html#method-Learner-help'><code>mlr3::Learner$help()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="predict"><a href='../../mlr3/html/Learner.html#method-Learner-predict'><code>mlr3::Learner$predict()</code></a></span></li>
diff --git a/man/mlr_learners.torch_featureless.Rd b/man/mlr_learners.torch_featureless.Rd
index 2529bcba..1fb6274a 100644
--- a/man/mlr_learners.torch_featureless.Rd
+++ b/man/mlr_learners.torch_featureless.Rd
@@ -28,7 +28,7 @@ lrn("regr.torch_featureless", ...)
 \item classif: 'response', 'prob'
 \item regr: 'response'
 }
-\item Feature Types: \dQuote{logical}, \dQuote{integer}, \dQuote{numeric}, \dQuote{character}, \dQuote{factor}, \dQuote{ordered}, \dQuote{POSIXct}, \dQuote{Date}, \dQuote{lazy_tensor}
+\item Feature Types: \dQuote{logical}, \dQuote{integer}, \dQuote{numeric}, \dQuote{character}, \dQuote{factor}, \dQuote{ordered}, \dQuote{POSIXct}, \dQuote{lazy_tensor}
 \item Required Packages: \CRANpkg{mlr3}, \CRANpkg{mlr3torch}, \CRANpkg{torch}
 }
 }
@@ -86,7 +86,6 @@ Other Learner:
 <details><summary>Inherited methods</summary>
 <ul>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="base_learner"><a href='../../mlr3/html/Learner.html#method-Learner-base_learner'><code>mlr3::Learner$base_learner()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="configure"><a href='../../mlr3/html/Learner.html#method-Learner-configure'><code>mlr3::Learner$configure()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="encapsulate"><a href='../../mlr3/html/Learner.html#method-Learner-encapsulate'><code>mlr3::Learner$encapsulate()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="help"><a href='../../mlr3/html/Learner.html#method-Learner-help'><code>mlr3::Learner$help()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="predict"><a href='../../mlr3/html/Learner.html#method-Learner-predict'><code>mlr3::Learner$predict()</code></a></span></li>
diff --git a/man/mlr_learners.torchvision.Rd b/man/mlr_learners.torchvision.Rd
index 48f7397d..87883dd9 100644
--- a/man/mlr_learners.torchvision.Rd
+++ b/man/mlr_learners.torchvision.Rd
@@ -42,7 +42,6 @@ number of classes inferred from the \code{\link[mlr3:Task]{Task}}.
 <details><summary>Inherited methods</summary>
 <ul>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="base_learner"><a href='../../mlr3/html/Learner.html#method-Learner-base_learner'><code>mlr3::Learner$base_learner()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="configure"><a href='../../mlr3/html/Learner.html#method-Learner-configure'><code>mlr3::Learner$configure()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="encapsulate"><a href='../../mlr3/html/Learner.html#method-Learner-encapsulate'><code>mlr3::Learner$encapsulate()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="help"><a href='../../mlr3/html/Learner.html#method-Learner-help'><code>mlr3::Learner$help()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="predict"><a href='../../mlr3/html/Learner.html#method-Learner-predict'><code>mlr3::Learner$predict()</code></a></span></li>
diff --git a/man/mlr_learners_torch.Rd b/man/mlr_learners_torch.Rd
index b7a97aa0..6b5cd121 100644
--- a/man/mlr_learners_torch.Rd
+++ b/man/mlr_learners_torch.Rd
@@ -271,7 +271,6 @@ which are varied systematically during tuning (parameter values).}
 <details><summary>Inherited methods</summary>
 <ul>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="base_learner"><a href='../../mlr3/html/Learner.html#method-Learner-base_learner'><code>mlr3::Learner$base_learner()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="configure"><a href='../../mlr3/html/Learner.html#method-Learner-configure'><code>mlr3::Learner$configure()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="encapsulate"><a href='../../mlr3/html/Learner.html#method-Learner-encapsulate'><code>mlr3::Learner$encapsulate()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="help"><a href='../../mlr3/html/Learner.html#method-Learner-help'><code>mlr3::Learner$help()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="predict"><a href='../../mlr3/html/Learner.html#method-Learner-predict'><code>mlr3::Learner$predict()</code></a></span></li>
diff --git a/man/mlr_learners_torch_image.Rd b/man/mlr_learners_torch_image.Rd
index c27e47ce..0797ba25 100644
--- a/man/mlr_learners_torch_image.Rd
+++ b/man/mlr_learners_torch_image.Rd
@@ -36,7 +36,6 @@ Other Learner:
 <details><summary>Inherited methods</summary>
 <ul>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="base_learner"><a href='../../mlr3/html/Learner.html#method-Learner-base_learner'><code>mlr3::Learner$base_learner()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="configure"><a href='../../mlr3/html/Learner.html#method-Learner-configure'><code>mlr3::Learner$configure()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="encapsulate"><a href='../../mlr3/html/Learner.html#method-Learner-encapsulate'><code>mlr3::Learner$encapsulate()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="help"><a href='../../mlr3/html/Learner.html#method-Learner-help'><code>mlr3::Learner$help()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="predict"><a href='../../mlr3/html/Learner.html#method-Learner-predict'><code>mlr3::Learner$predict()</code></a></span></li>
diff --git a/man/mlr_learners_torch_model.Rd b/man/mlr_learners_torch_model.Rd
index 84b24e6f..da6fa008 100644
--- a/man/mlr_learners_torch_model.Rd
+++ b/man/mlr_learners_torch_model.Rd
@@ -92,7 +92,6 @@ The ingress tokens. Must be non-\code{NULL} when calling \verb{$train()}.}
 <details><summary>Inherited methods</summary>
 <ul>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="base_learner"><a href='../../mlr3/html/Learner.html#method-Learner-base_learner'><code>mlr3::Learner$base_learner()</code></a></span></li>
-<li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="configure"><a href='../../mlr3/html/Learner.html#method-Learner-configure'><code>mlr3::Learner$configure()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="encapsulate"><a href='../../mlr3/html/Learner.html#method-Learner-encapsulate'><code>mlr3::Learner$encapsulate()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="help"><a href='../../mlr3/html/Learner.html#method-Learner-help'><code>mlr3::Learner$help()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="predict"><a href='../../mlr3/html/Learner.html#method-Learner-predict'><code>mlr3::Learner$predict()</code></a></span></li>
diff --git a/man/torch_callback.Rd b/man/torch_callback.Rd
index 177fa1df..f2a4e6ef 100644
--- a/man/torch_callback.Rd
+++ b/man/torch_callback.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/TorchCallback.R
 \name{torch_callback}
 \alias{torch_callback}
-\title{Create a Callback Desctiptor}
+\title{Create a Callback Descriptor}
 \usage{
 torch_callback(
   id,
diff --git a/tests/testthat/helper_autotest.R b/tests/testthat/helper_autotest.R
index f07f6014..56f35786 100644
--- a/tests/testthat/helper_autotest.R
+++ b/tests/testthat/helper_autotest.R
@@ -184,7 +184,7 @@ collapse_char_list = function(x) {
 # @param fns (`list()` of `function`s)\cr
 #   The functions whose arguments the parameter set implements.
 # @param exclude (`character`)\cr
-#   The parameter ids and arguments of the functions that are exluded from checking.
+#   The parameter ids and arguments of the functions that are excluded from checking.
 # @param exclude_defaults (`character()`)\cr
 #   For which parameters the defaults should not be checked.
 expect_paramset = function(x, fns, exclude = character(0), exclude_defaults = character(0)) {
@@ -280,7 +280,9 @@ expect_paramtest = function(paramtest) {
 #'   The object to test.
 #' @param check_man (`logical(1)`)\cr
 #'   Whether to check that the manual page exists. Default is `TRUE`.
-expect_torch_callback = function(torch_callback, check_man = TRUE) {
+#' @param check_paramset (`logical(1)`)\cr
+#'   Where to check that the paramset mactches the constructor. Default is `TRUE`.
+expect_torch_callback = function(torch_callback, check_man = TRUE, check_paramset = TRUE) {
   # Checks on descriptor
   expect_class(torch_callback, "TorchCallback")
   expect_string(torch_callback$id)
@@ -299,8 +301,10 @@ expect_torch_callback = function(torch_callback, check_man = TRUE) {
   expect_true(cbgen$cloneable)
   init_fn = get_init(torch_callback$generator)
   if (is.null(init_fn)) init_fn = function() NULL
-  paramtest = expect_paramset(torch_callback$param_set, init_fn)
-  expect_paramtest(paramtest)
+  if (check_paramset) {
+    paramtest = expect_paramset(torch_callback$param_set, init_fn)
+    expect_paramtest(paramtest)
+  }
   implemented_stages = names(cbgen$public_methods)[grepl("^on_", names(cbgen$public_methods))]
   expect_subset(implemented_stages, mlr_reflections$torch$callback_stages)
   expect_true(length(implemented_stages) > 0)
diff --git a/tests/testthat/test_CallbackSetLRScheduler.R b/tests/testthat/test_CallbackSetLRScheduler.R
new file mode 100644
index 00000000..1dfdd2c4
--- /dev/null
+++ b/tests/testthat/test_CallbackSetLRScheduler.R
@@ -0,0 +1,84 @@
+test_that("autotest", {
+  cb_ca = t_clbk("lr_cosine_annealing", T_max = 10)
+  # each LR scheduler has a different paramset, so we don't test them
+  expect_torch_callback(cb_ca, check_paramset = FALSE)
+
+  lambda1 <- function(epoch) epoch %/% 30
+  lambda2 <- function(epoch) 0.95^epoch
+  cb_lambda = t_clbk("lr_lambda", lr_lambda = list(lambda1, lambda2))
+  expect_torch_callback(cb_lambda, check_paramset = FALSE)
+
+  lambda <- function(epoch) 0.95
+  cb_mult = t_clbk("lr_multiplicative", lr_lambda = lambda)
+  expect_torch_callback(cb_mult, check_paramset = FALSE)
+
+  cb_1cycle = t_clbk("lr_one_cycle", max_lr = 0.1)
+  expect_torch_callback(cb_1cycle, check_paramset = FALSE)
+
+  cb_plateau = t_clbk("lr_reduce_on_plateau")
+  expect_torch_callback(cb_plateau, check_paramset = FALSE)
+
+  cb_step = t_clbk("lr_step", step_size = 4)
+  expect_torch_callback(cb_step, check_paramset = FALSE)
+})
+
+test_that("decay works", {
+  cb = t_clbk("lr_step")
+  task = tsk("iris")
+  n_epochs = 10
+
+  mlp = lrn("classif.mlp",
+            callbacks = cb,
+            epochs = n_epochs, batch_size = 150, neurons = 10,
+            measures_train = msrs(c("classif.acc", "classif.ce"))
+  )
+  gamma = 0.5
+  step_size = 2
+
+  mlp$param_set$set_values(cb.lr_step.gamma = gamma)
+  mlp$param_set$set_values(cb.lr_step.step_size = step_size)
+
+  mlp$train(task)
+
+  expect_equal(mlp$model$optimizer$param_groups[[1]]$initial_lr * gamma^(n_epochs / step_size),
+               mlp$model$optimizer$param_groups[[1]]$lr)
+})
+
+test_that("custom LR scheduler works", {
+  # modeled after lr_step
+  lr_subtract <- lr_scheduler(
+    "lr_subtract",
+    initialize = function(optimizer, step_size, delta = 0.1, last_epoch = -1) {
+      self$step_size <- step_size
+      self$delta <- delta
+      super$initialize(optimizer, last_epoch)
+    },
+    get_lr = function() {
+      if ((self$last_epoch == 0) || (self$last_epoch %% self$step_size != 0)) {
+        return(sapply(self$optimizer$param_groups, function(x) x$lr))
+      }
+
+      sapply(self$optimizer$param_groups, function(x) x$lr - self$delta)
+    }
+  )
+  cb = as_lr_scheduler(lr_subtract, step_on_epoch = TRUE)
+  expect_torch_callback(cb, check_paramset = FALSE)
+
+  task = tsk("iris")
+  n_epochs = 10
+
+  mlp = lrn("classif.mlp",
+            callbacks = cb,
+            epochs = n_epochs, batch_size = 150, neurons = 10,
+            measures_train = msrs(c("classif.acc", "classif.ce"))
+  )
+  reduction_amt = 0.00001
+  step_size = 2
+  mlp$param_set$set_values(cb.lr_subtract.delta = reduction_amt)
+  mlp$param_set$set_values(cb.lr_subtract.step_size = step_size)
+
+  mlp$train(task)
+
+  expect_equal(mlp$model$optimizer$param_groups[[1]]$initial_lr - ((n_epochs / step_size) * reduction_amt),
+               mlp$model$optimizer$param_groups[[1]]$lr)
+})