Fixes DLR-RM#2115. Avoid segmentation fault when loading models with non-portable schedules

akanto · akanto · commit 63cfb2eace12 · 2025-04-27T17:59:59.000+02:00
Previously, using closures (e.g., lambdas) for learning_rate or clip_range caused
segmentation faults when loading models across different platforms (e.g., macOS to Linux), because cloudpickle could not safely serialize/deserialize them.

This commit rewrites:
- `constant_fn` as a `ConstantSchedule` class
- `get_schedule_fn` as a `FloatConverterSchedule` class
- `get_linear_fn` as a `LinearSchedule` class

All schedules are now proper callable classes, making them portable and safely pickleable.
Old functions are kept (marked as  deprecated) for backward compatibility when loading
existing models.
diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst
@@ -15,6 +15,7 @@ New Features:
 Bug Fixes:
 ^^^^^^^^^^
 - Fixed docker GPU image (PyTorch GPU was not installed)
+- Fixed segmentation faults caused by non-portable schedules during model loading (#2115)
 
 `SB3-Contrib`_
 ^^^^^^^^^^^^^^
diff --git a/stable_baselines3/common/base_class.py b/stable_baselines3/common/base_class.py
@@ -25,9 +25,9 @@
 from stable_baselines3.common.save_util import load_from_zip_file, recursive_getattr, recursive_setattr, save_to_zip_file
 from stable_baselines3.common.type_aliases import GymEnv, MaybeCallback, Schedule, TensorDict
 from stable_baselines3.common.utils import (
+    FloatConverterSchedule,
     check_for_correct_spaces,
     get_device,
-    get_schedule_fn,
     get_system_info,
     set_random_seed,
     update_learning_rate,
@@ -273,7 +273,7 @@ def logger(self) -> Logger:
 
     def _setup_lr_schedule(self) -> None:
         """Transform to callable if needed."""
-        self.lr_schedule = get_schedule_fn(self.learning_rate)
+        self.lr_schedule = FloatConverterSchedule(self.learning_rate)
 
     def _update_current_progress_remaining(self, num_timesteps: int, total_timesteps: int) -> None:
         """
diff --git a/stable_baselines3/common/utils.py b/stable_baselines3/common/utils.py
@@ -78,6 +78,35 @@ def update_learning_rate(optimizer: th.optim.Optimizer, learning_rate: float) ->
         param_group["lr"] = learning_rate
 
 
+class FloatConverterSchedule:
+    """
+    Wrapper that ensures the output of a Schedule is cast to float.
+    Can wrap either a constant value or an existing callable Schedule.
+    """
+
+    def __init__(self, value_schedule: Union[Schedule, float]):
+        """
+        :param value_schedule: Constant value or callable schedule
+            (e.g. LinearSchedule, ConstantSchedule)
+        """
+        if isinstance(value_schedule, FloatConverterSchedule):
+            self.value_schedule: Schedule = value_schedule.value_schedule
+        elif isinstance(value_schedule, (float, int)):
+            self.value_schedule = ConstantSchedule(float(value_schedule))
+        else:
+            assert callable(value_schedule)
+            self.value_schedule = value_schedule
+
+    def __call__(self, progress_remaining: float) -> float:
+        # Cast to float to avoid unpickling errors to enable weights_only=True, see GH#1900
+        # Some types are have odd behaviors when part of a Schedule, like numpy floats
+        return float(self.value_schedule(progress_remaining))
+
+    def __repr__(self) -> str:
+        return f"FloatConverterSchedule({self.value_schedule})"
+
+
+# Deprecated: only kept for backward compatibility when unpickling old models, use ScheduleWrapper instead
 def get_schedule_fn(value_schedule: Union[Schedule, float]) -> Schedule:
     """
     Transform (if needed) learning rate and clip range (for PPO)
@@ -98,6 +127,37 @@ def get_schedule_fn(value_schedule: Union[Schedule, float]) -> Schedule:
     return lambda progress_remaining: float(value_schedule(progress_remaining))
 
 
+class LinearSchedule:
+    """
+    LinearSchedule interpolates linearly between start and end
+    between ``progress_remaining`` = 1 and ``progress_remaining`` = ``end_fraction``.
+    This is used in DQN for linearly annealing the exploration fraction
+    (epsilon for the epsilon-greedy strategy).
+    """
+
+    def __init__(self, start: float, end: float, end_fraction: float):
+        """
+        :param start: value to start with if ``progress_remaining`` = 1
+        :param end: value to end with if ``progress_remaining`` = 0
+        :param end_fraction: fraction of ``progress_remaining``
+            where end is reached e.g 0.1 then end is reached after 10%
+            of the complete training process.
+        """
+        self.start = start
+        self.end = end
+        self.end_fraction = end_fraction
+
+    def __call__(self, progress_remaining: float) -> float:
+        if (1 - progress_remaining) > self.end_fraction:
+            return self.end
+        else:
+            return self.start + (1 - progress_remaining) * (self.end - self.start) / self.end_fraction
+
+    def __repr__(self) -> str:
+        return f"LinearSchedule(start={self.start}, end={self.end}, end_fraction={self.end_fraction})"
+
+
+# Deprecated: only kept for backward compatibility when unpickling old models, use LinearSchedule instead
 def get_linear_fn(start: float, end: float, end_fraction: float) -> Schedule:
     """
     Create a function that interpolates linearly between start and end
@@ -122,6 +182,25 @@ def func(progress_remaining: float) -> float:
     return func
 
 
+class ConstantSchedule:
+    """
+    Constant schedule that always returns the same value.
+    Useful for fixed learning rates or clip ranges.
+
+    :param val: constant value
+    """
+
+    def __init__(self, val: float):
+        self.val = val
+
+    def __call__(self, _: float) -> float:
+        return self.val
+
+    def __repr__(self) -> str:
+        return f"ConstantSchedule(val={self.val})"
+
+
+# Deprecated: only kept for backward compatibility when unpickling old models, use ConstantSchedule instead
 def constant_fn(val: float) -> Schedule:
     """
     Create a function that returns a constant
diff --git a/stable_baselines3/dqn/dqn.py b/stable_baselines3/dqn/dqn.py
@@ -10,7 +10,7 @@
 from stable_baselines3.common.off_policy_algorithm import OffPolicyAlgorithm
 from stable_baselines3.common.policies import BasePolicy
 from stable_baselines3.common.type_aliases import GymEnv, MaybeCallback, Schedule
-from stable_baselines3.common.utils import get_linear_fn, get_parameters_by_name, polyak_update
+from stable_baselines3.common.utils import LinearSchedule, get_parameters_by_name, polyak_update
 from stable_baselines3.dqn.policies import CnnPolicy, DQNPolicy, MlpPolicy, MultiInputPolicy, QNetwork
 
 SelfDQN = TypeVar("SelfDQN", bound="DQN")
@@ -146,7 +146,7 @@ def _setup_model(self) -> None:
         # Copy running stats, see GH issue #996
         self.batch_norm_stats = get_parameters_by_name(self.q_net, ["running_"])
         self.batch_norm_stats_target = get_parameters_by_name(self.q_net_target, ["running_"])
-        self.exploration_schedule = get_linear_fn(
+        self.exploration_schedule = LinearSchedule(
             self.exploration_initial_eps,
             self.exploration_final_eps,
             self.exploration_fraction,
diff --git a/stable_baselines3/ppo/ppo.py b/stable_baselines3/ppo/ppo.py
@@ -10,7 +10,7 @@
 from stable_baselines3.common.on_policy_algorithm import OnPolicyAlgorithm
 from stable_baselines3.common.policies import ActorCriticCnnPolicy, ActorCriticPolicy, BasePolicy, MultiInputActorCriticPolicy
 from stable_baselines3.common.type_aliases import GymEnv, MaybeCallback, Schedule
-from stable_baselines3.common.utils import explained_variance, get_schedule_fn
+from stable_baselines3.common.utils import FloatConverterSchedule, explained_variance
 
 SelfPPO = TypeVar("SelfPPO", bound="PPO")
 
@@ -174,12 +174,12 @@ def _setup_model(self) -> None:
         super()._setup_model()
 
         # Initialize schedules for policy/value clipping
-        self.clip_range = get_schedule_fn(self.clip_range)
+        self.clip_range = FloatConverterSchedule(self.clip_range)
         if self.clip_range_vf is not None:
             if isinstance(self.clip_range_vf, (float, int)):
                 assert self.clip_range_vf > 0, "`clip_range_vf` must be positive, " "pass `None` to deactivate vf clipping"
 
-            self.clip_range_vf = get_schedule_fn(self.clip_range_vf)
+            self.clip_range_vf = FloatConverterSchedule(self.clip_range_vf)
 
     def train(self) -> None:
         """
diff --git a/tests/test_save_load.py b/tests/test_save_load.py
@@ -19,7 +19,7 @@
 from stable_baselines3.common.env_util import make_vec_env
 from stable_baselines3.common.envs import FakeImageEnv, IdentityEnv, IdentityEnvBox
 from stable_baselines3.common.save_util import load_from_pkl, open_path, save_to_pkl
-from stable_baselines3.common.utils import get_device
+from stable_baselines3.common.utils import ConstantSchedule, FloatConverterSchedule, get_device
 from stable_baselines3.common.vec_env import DummyVecEnv
 
 MODEL_LIST = [PPO, A2C, TD3, SAC, DQN, DDPG]
@@ -821,3 +821,55 @@ def test_save_load_no_target_params(tmp_path):
     with pytest.warns(UserWarning):
         DQN.load(str(tmp_path / "test_save.zip"), env=env).learn(20)
     os.remove(tmp_path / "test_save.zip")
+
+
+@pytest.mark.parametrize("model_class", [PPO])
+def test_save_load_backward_compatible(tmp_path, model_class):
+    """
+    Test that lambdas are working when saving/loading models.
+    See GH#2115
+    """
+
+    env = DummyVecEnv([lambda: IdentityEnvBox(-1, 1)])
+
+    model = model_class("MlpPolicy", env, learning_rate=lambda _: 0.001, clip_range=lambda _: 0.3)
+    model.learn(total_timesteps=100)
+
+    model.save(tmp_path / "test_schedule_safe.zip")
+
+    model = model_class.load(tmp_path / "test_schedule_safe.zip", env=env)
+
+    assert model.learning_rate(0) == 0.001
+    
+    assert isinstance(model.clip_range, FloatConverterSchedule)
+    assert model.clip_range.value_schedule(0) == 0.3
+
+
+@pytest.mark.parametrize("model_class", [PPO])
+def test_save_load_clip_range_portable(tmp_path, model_class):
+    """
+    Test that models using callable schedule classes (e.g., ConstantSchedule, LinearSchedule)
+    are saved and loaded correctly without segfaults across different machines.
+
+    This ensures that we don't serialize fragile lambda closures.
+    See GH#2115
+    """
+    # Create a simple env
+    env = DummyVecEnv([lambda: IdentityEnvBox(-1, 1)])
+
+    model = model_class("MlpPolicy", env)
+    model.learn(total_timesteps=100)
+
+    # Make sure that classes are used not lambdas by default
+    assert isinstance(model.clip_range, FloatConverterSchedule)
+    assert isinstance(model.clip_range.value_schedule, ConstantSchedule)
+    assert model.clip_range.value_schedule.val == 0.2
+
+    model.save(tmp_path / "test_schedule_safe.zip")
+
+    model = model_class.load(tmp_path / "test_schedule_safe.zip", env=env)
+    
+    # Check that the model is loaded correctly
+    assert isinstance(model.clip_range, FloatConverterSchedule)
+    assert isinstance(model.clip_range.value_schedule, ConstantSchedule)
+    assert model.clip_range.value_schedule.val == 0.2