pytorch · vmoens · Sep 30, 2024 · Sep 24, 2024 · Sep 24, 2024 · Sep 26, 2024
diff --git a/test/test_rb.py b/test/test_rb.py
@@ -58,6 +58,11 @@
     SliceSampler,
     SliceSamplerWithoutReplacement,
 )
+from torchrl.data.replay_buffers.scheduler import (
+    LinearScheduler,
+    SchedulerList,
+    StepScheduler,
+)
 
 from torchrl.data.replay_buffers.storages import (
     LazyMemmapStorage,
@@ -99,6 +104,7 @@
     VecNorm,
 )
 
+
 OLD_TORCH = parse(torch.__version__) < parse("2.0.0")
 _has_tv = importlib.util.find_spec("torchvision") is not None
 _has_gym = importlib.util.find_spec("gym") is not None
@@ -3026,6 +3032,51 @@ def test_prioritized_slice_sampler_episodes(device):
     ), "after priority update, only episode 1 and 3 are expected to be sampled"
 
 
+def test_prioritized_parameter_scheduler():
+    INIT_ALPHA = 0.7
+    INIT_BETA = 0.6
+    GAMMA = 0.1
+    EVERY_N_STEPS = 10
+    LINEAR_STEPS = 100
+    TOTAL_STEPS = 200
+    rb = TensorDictPrioritizedReplayBuffer(
+        alpha=INIT_ALPHA, beta=INIT_BETA, storage=ListStorage(max_size=2000)
+    )
+    data = TensorDict({"data": torch.randn(1000, 5)}, batch_size=1000)
+    rb.extend(data)
+    alpha_scheduler = LinearScheduler(
+        rb, param_name="alpha", final_value=0.0, num_steps=LINEAR_STEPS
+    )
+    beta_scheduler = StepScheduler(
+        rb,
+        param_name="beta",
+        gamma=GAMMA,
+        n_steps=EVERY_N_STEPS,
+        max_value=1.0,
+        mode="additive",
+    )
+    scheduler = SchedulerList(schedulers=(alpha_scheduler, beta_scheduler))
+    expected_alpha_vals = np.linspace(INIT_ALPHA, 0.0, num=LINEAR_STEPS + 1)
+    expected_alpha_vals = np.pad(
+        expected_alpha_vals, (0, TOTAL_STEPS - LINEAR_STEPS), constant_values=0.0
+    )
+    expected_beta_vals = [INIT_BETA]
+    for _ in range((TOTAL_STEPS // EVERY_N_STEPS - 1)):
+        expected_beta_vals.append(expected_beta_vals[-1] + GAMMA)
+    expected_beta_vals = (
+        np.atleast_2d(expected_beta_vals).repeat(EVERY_N_STEPS).clip(None, 1.0)
+    )
+    for i in range(TOTAL_STEPS):
+        assert np.isclose(
+            rb.sampler.alpha, expected_alpha_vals[i]
+        ), f"expected {expected_alpha_vals[i]}, got {rb.sampler.alpha}"
+        assert np.isclose(
+            rb.sampler.beta, expected_beta_vals[i]
+        ), f"expected {expected_beta_vals[i]}, got {rb.sampler.beta}"
+        rb.sample(20)
+        scheduler.step()
+
+
 class TestEnsemble:
     def _make_data(self, data_type):
         if data_type is torch.Tensor:

diff --git a/torchrl/data/replay_buffers/samplers.py b/torchrl/data/replay_buffers/samplers.py
@@ -395,6 +395,22 @@ def __repr__(self):
     def max_size(self):
         return self._max_capacity
 
+    @property
+    def alpha(self):
+        return self._alpha
+
+    @alpha.setter
+    def alpha(self, value):
+        self._alpha = value
+
+    @property
+    def beta(self):
+        return self._beta
+
+    @beta.setter
+    def beta(self, value):
+        self._beta = value
+
     def __getstate__(self):
         if get_spawning_popen() is not None:
             raise RuntimeError(

diff --git a/torchrl/data/replay_buffers/scheduler.py b/torchrl/data/replay_buffers/scheduler.py
@@ -0,0 +1,240 @@
+from typing import Any, Callable, Dict
+
+import numpy as np
+
+from .replay_buffers import ReplayBuffer
+from .samplers import Sampler
+
+
+class ParameterScheduler:
+    """Scheduler to adjust the value of a given parameter of a replay buffer's sampler.
+
+    Scheduler can for example be used to alter the alpha and beta values in the PrioritizedSampler.
+
+    Args:
+        rb (ReplayBuffer): the replay buffer whose sampler to adjust
+        param_name (str): the name of the attribute to adjust, e.g. `beta` to adjust the beta parameter
+        min_value (Union[int, float], optional): a lower bound for the parameter to be adjusted
+            Defaults to None.
+        max_value (Union[int, float], optional): an upper bound for the parameter to be adjusted
+            Defaults to None
+
+    """
+
+    def __init__(
+        self,
+        obj: ReplayBuffer | Sampler,
+        param_name: str,
+        min_value: int | float = None,
+        max_value: int | float = None,
+    ):
+        if not isinstance(obj, ReplayBuffer) and not isinstance(obj, Sampler):
+            raise TypeError(
+                f"ParameterScheduler only supports Sampler class. Pass either ReplayBuffer or Sampler object. Got {type(obj)}"
+            )
+        self.sampler = obj.sampler if isinstance(obj, ReplayBuffer) else obj
+        self.param_name = param_name
+        self._min_val = min_value
+        self._max_val = max_value
+        if not hasattr(self.sampler, self.param_name):
+            raise ValueError(
+                f"Provided class {obj.__name__} does not have an attribute {param_name}"
+            )
+        self.initial_val = getattr(self.sampler, self.param_name)
+        self._step_cnt = 0
+
+    def state_dict(self):
+        """Return the state of the scheduler as a :class:`dict`.
+
+        It contains an entry for every variable in self.__dict__ which
+        is not the optimizer.
+        """
+        return {key: value for key, value in self.__dict__.items() if key != "sampler"}
+
+    def load_state_dict(self, state_dict: Dict[str, Any]):
+        """Load the scheduler's state.
+
+        Args:
+            state_dict (dict): scheduler state. Should be an object returned
+                from a call to :meth:`state_dict`.
+        """
+        self.__dict__.update(state_dict)
+
+    def step(self):
+        self._step_cnt += 1
+        # Apply the step function
+        new_value = self._step()
+        # clip value to specified range
+        new_value_clipped = np.clip(new_value, a_min=self._min_val, a_max=self._max_val)
+        # Set the new value of the parameter dynamically
+        setattr(self.sampler, self.param_name, new_value_clipped)
+
+    def _step(self):
+        raise NotImplementedError
+
+
+class LambdaScheduler(ParameterScheduler):
+    """Sets a parameter to its initial value times a given function.
+
+    Similar to torch.optim.LambdaLR.
+
+    Args:
+        obj (ReplayBuffer | Sampler): the replay buffer whose sampler to adjust (or the sampler itself)
+        param_name (str): the name of the attribute to adjust, e.g. `beta` to adjust the
+            beta parameter
+        lambda_fn (function): A function which computes a multiplicative factor given an integer
+            parameter step_count
+        min_value (Union[int, float], optional): a lower bound for the parameter to be adjusted
+            Defaults to None.
+        max_value (Union[int, float], optional): an upper bound for the parameter to be adjusted
+            Defaults to None
+
+    """
+
+    def __init__(
+        self,
+        obj: ReplayBuffer | Sampler,
+        param_name: str,
+        lambda_fn: Callable[[int], float],
+        min_value: int | float = None,
+        max_value: int | float = None,
+    ):
+        super().__init__(obj, param_name, min_value, max_value)
+        self.lambda_fn = lambda_fn
+
+    def _step(self):
+        return self.initial_val * self.lambda_fn(self._step_cnt)
+
+
+class LinearScheduler(ParameterScheduler):
+    """A linear scheduler for gradually altering a parameter in an object over a given number of steps.
+
+    This scheduler linearly interpolates between the initial value of the parameter and a final target value.
+
+    Args:
+        obj (ReplayBuffer | Sampler): the replay buffer whose sampler to adjust (or the sampler itself)
+        param_name (str): the name of the attribute to adjust, e.g. `beta` to adjust the
+            beta parameter
+        final_value (Union[int, float]): The final value that the parameter will reach after the
+            specified number of steps.
+        num_steps (Union[int, float], optional): The total number of steps over which the parameter
+            will be linearly altered.
+
+    Example:
+        >>> # xdoctest: +SKIP
+        >>> # Assuming sampler uses initial beta = 0.6
+        >>> # beta = 0.7   if step  == 1
+        >>> # beta = 0.8   if step  == 2
+        >>> # beta = 0.9   if step  == 3
+        >>> # beta = 1.0   if step  >= 4
+        >>> scheduler = LinearScheduler(sampler, param_name='beta', final_value=1.0, num_steps=4)
+        >>> for epoch in range(100):
+        >>>     train(...)
+        >>>     validate(...)
+        >>>     scheduler.step()
+    """
+
+    def __init__(
+        self,
+        obj: ReplayBuffer | Sampler,
+        param_name: str,
+        final_value: int | float,
+        num_steps: int,
+    ):
+        super().__init__(obj, param_name)
+        self.final_val = final_value
+        self.num_steps = num_steps
+        self._delta = (self.final_val - self.initial_val) / self.num_steps
+
+    def _step(self):
+        if self._step_cnt < self.num_steps:
+            return self.initial_val + (self._delta * self._step_cnt)
+        else:
+            return self.final_val
+
+
+class StepScheduler(ParameterScheduler):
+    """A step scheduler that alters a parameter after every n steps using either multiplicative or additive changes.
+
+    The scheduler can apply:
+    1. Multiplicative changes: `new_val = curr_val * gamma`
+    2. Additive changes: `new_val = curr_val + gamma`
+
+    Args:
+        obj (ReplayBuffer | Sampler): the replay buffer whose sampler to adjust (or the sampler itself)
+        param_name (str): the name of the attribute to adjust, e.g. `beta` to adjust the
+            beta parameter
+        gamma (int | float, optional): The value by which to adjust the parameter,
+            either in a multiplicative or additive way
+        n_steps (int, optional): The number of steps after which the parameter should be altered.
+            Defaults to 1
+        mode (str, optional): The mode of scheduling. Can be either 'multiplicative' or 'additive'.
+            Defaults to 'multiplicative'
+        min_value (int | float, optional): a lower bound for the parameter to be adjusted
+            Defaults to None.
+        max_value (int | float, optional): an upper bound for the parameter to be adjusted
+            Defaults to None
+
+    Example:
+        >>> # xdoctest: +SKIP
+        >>> # Assuming sampler uses initial beta = 0.6
+        >>> # beta = 0.6   if  0 <= step < 10
+        >>> # beta = 0.7   if 10 <= step < 20
+        >>> # beta = 0.8   if 20 <= step < 30
+        >>> # beta = 0.9   if 30 <= step < 40
+        >>> # beta = 1.0   if 40 <= step
+        >>> scheduler = StepScheduler(sampler, param_name='beta', gamma=0.1, mode='additive', max_value=1.0)
+        >>> for epoch in range(100):
+        >>>     train(...)
+        >>>     validate(...)
+        >>>     scheduler.step()
+    """
+
+    def __init__(
+        self,
+        obj: ReplayBuffer | Sampler,
+        param_name: str,
+        gamma: int | float = 0.9,
+        n_steps: int = 1,
+        mode: str = "multiplicative",
+        min_value: int | float = None,
+        max_value: int | float = None,
+    ):
+
+        super().__init__(obj, param_name, min_value, max_value)
+        self.gamma = gamma
+        self.n_steps = n_steps
+        if mode == "additive":
+            operator = np.add
+        elif mode == "multiplicative":
+            operator = np.multiply
+        else:
+            raise ValueError(
+                f"Invalid mode: {self.mode}. Choose 'multiplicative' or 'additive'."
+            )
+        self.operator = operator
+
+    def _step(self):
+        """Applies the scheduling logic to alter the parameter value every `n_steps`."""
+        # Check if the current step count is a multiple of n_steps
+        current_val = getattr(self.sampler, self.param_name)
+        if self._step_cnt % self.n_steps == 0:
+            return self.operator(current_val, self.gamma)
+        else:
+            return current_val
+
+
+class SchedulerList:
+    """Simple container abstracting a list of schedulers."""
+
+    def __init__(self, schedulers: list[ParameterScheduler]) -> None:
+        if isinstance(schedulers, ParameterScheduler):
+            schedulers = [schedulers]
+        self.schedulers = schedulers
+
+    def append(self, scheduler: ParameterScheduler):
+        self.schedulers.append(scheduler)
+
+    def step(self):
+        for scheduler in self.schedulers:
+            scheduler.step()