You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Hello, I use SB3 along with Isaac-lab to train my agent. My SB3 is installed using ./isaaclab.sh --install sb3 , and my IsaacLab version is cloned from araffinfork/feat/sb3-optim .
During my test, I found the same problem as described in #1966
It can be seen from my profiled code in the log output that, some memory is not released correctly and lead to memory exploding.
To Reproduce
# Copyright (c) 2022-2025, The Isaac Lab Project Developers.# All rights reserved.## SPDX-License-Identifier: BSD-3-Clause"""Script to train RL agent with Stable Baselines3.Example: ./isaaclab.sh -p scripts/reinforcement_learning/sb3/train.py --task Isaac-Velocity-Flat-Unitree-A1-v0 --num_envs 2048 --headless --seed 2""""""Launch Isaac Sim Simulator first."""importargparseimportcontextlibimportsignalimportsysfrompathlibimportPathfromisaaclab.appimportAppLauncher# add argparse argumentsparser=argparse.ArgumentParser(description="Train an RL agent with Stable-Baselines3.")
parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).")
parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).")
parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
parser.add_argument("--task", type=str, default=None, help="Name of the task.")
parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment")
parser.add_argument("--log_interval", type=int, default=100_000, help="Log data every n timesteps.")
parser.add_argument("--max_iterations", type=int, default=None, help="RL Policy training iterations.")
parser.add_argument(
"--keep_all_info",
action="store_true",
default=False,
help="Use a slower SB3 wrapper but keep all the extra training info.",
)
# append AppLauncher cli argsAppLauncher.add_app_launcher_args(parser)
# parse the argumentsargs_cli, hydra_args=parser.parse_known_args()
# always enable cameras to record videoifargs_cli.video:
args_cli.enable_cameras=True# clear out sys.argv for Hydrasys.argv= [sys.argv[0]] +hydra_args# launch omniverse appapp_launcher=AppLauncher(args_cli)
simulation_app=app_launcher.appdefcleanup_pbar(*args):
""" A small helper to stop training and cleanup progress bar properly on ctrl+c """importgctqdm_objects= [objforobjingc.get_objects() if"tqdm"intype(obj).__name__]
fortqdm_objectintqdm_objects:
if"tqdm_rich"intype(tqdm_object).__name__:
tqdm_object.close()
raiseKeyboardInterrupt# disable KeyboardInterrupt overridesignal.signal(signal.SIGINT, cleanup_pbar)
"""Rest everything follows."""importgymnasiumasgymimportnumpyasnpimportosimportrandomfromdatetimeimportdatetimefromstable_baselines3importPPO# from e3catch.tasks.direct.learning.policy.ppo_netfree import E3CatchPPOfromstable_baselines3.common.callbacksimportCheckpointCallback, LogEveryNTimestepsfromstable_baselines3.common.vec_envimportVecNormalizefromisaaclab.envsimport (
DirectMARLEnv,
DirectMARLEnvCfg,
DirectRLEnvCfg,
ManagerBasedRLEnvCfg,
multi_agent_to_single_agent,
)
fromisaaclab.utils.dictimportprint_dictfromisaaclab.utils.ioimportdump_pickle, dump_yamlfromisaaclab_rl.sb3importSb3VecEnvWrapper, process_sb3_cfgimportisaaclab_tasks# noqa: F401fromisaaclab_tasks.utils.hydraimporthydra_task_configimporte3catch.tasks# noqa: F401@hydra_task_config(args_cli.task, "sb3_cfg_entry_point")defmain(env_cfg: ManagerBasedRLEnvCfg|DirectRLEnvCfg|DirectMARLEnvCfg, agent_cfg: dict):
"""Train with stable-baselines agent."""# randomly sample a seed if seed = -1ifargs_cli.seed==-1:
args_cli.seed=random.randint(0, 10000)
# override configurations with non-hydra CLI argumentsenv_cfg.scene.num_envs=args_cli.num_envsifargs_cli.num_envsisnotNoneelseenv_cfg.scene.num_envsagent_cfg["seed"] =args_cli.seedifargs_cli.seedisnotNoneelseagent_cfg["seed"]
# max iterations for trainingifargs_cli.max_iterationsisnotNone:
agent_cfg["n_timesteps"] =args_cli.max_iterations*agent_cfg["n_steps"] *env_cfg.scene.num_envs# set the environment seed# note: certain randomizations occur in the environment initialization so we set the seed hereenv_cfg.seed=agent_cfg["seed"]
env_cfg.sim.device=args_cli.deviceifargs_cli.deviceisnotNoneelseenv_cfg.sim.device# directory for logging intorun_info=datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
log_root_path=os.path.abspath(os.path.join("logs", "sb3", args_cli.task))
print(f"[INFO] Logging experiment in directory: {log_root_path}")
print(f"Exact experiment name requested from command line: {run_info}")
log_dir=os.path.join(log_root_path, run_info)
# dump the configuration into log-directorydump_yaml(os.path.join(log_dir, "params", "env.yaml"), env_cfg)
dump_yaml(os.path.join(log_dir, "params", "agent.yaml"), agent_cfg)
dump_pickle(os.path.join(log_dir, "params", "env.pkl"), env_cfg)
dump_pickle(os.path.join(log_dir, "params", "agent.pkl"), agent_cfg)
# save command used to run the scriptcommand=" ".join(sys.orig_argv)
(Path(log_dir) /"command.txt").write_text(command)
# post-process agent configurationagent_cfg=process_sb3_cfg(agent_cfg)
# read configurations about the agent-trainingpolicy_arch=agent_cfg.pop("policy")
n_timesteps=agent_cfg.pop("n_timesteps")
# create isaac environmentenv=gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array"ifargs_cli.videoelseNone)
# convert to single-agent instance if required by the RL algorithmifisinstance(env.unwrapped, DirectMARLEnv):
env=multi_agent_to_single_agent(env)
# wrap for video recordingifargs_cli.video:
video_kwargs= {
"video_folder": os.path.join(log_dir, "videos", "train"),
"step_trigger": lambdastep: step%args_cli.video_interval==0,
"video_length": args_cli.video_length,
"disable_logger": True,
}
print("[INFO] Recording videos during training.")
print_dict(video_kwargs, nesting=4)
env=gym.wrappers.RecordVideo(env, **video_kwargs)
# wrap around environment for stable baselinesenv=Sb3VecEnvWrapper(env, fast_variant=notargs_cli.keep_all_info)
if"normalize_input"inagent_cfg:
env=VecNormalize(
env,
training=True,
norm_obs="normalize_input"inagent_cfgandagent_cfg.pop("normalize_input"),
norm_reward="normalize_value"inagent_cfgandagent_cfg.pop("normalize_value"),
clip_obs="clip_obs"inagent_cfgandagent_cfg.pop("clip_obs"),
gamma=agent_cfg["gamma"],
clip_reward=np.inf,
)
# create agent from stable baselinesagent=PPO(policy_arch, env, verbose=1, tensorboard_log=log_dir, **agent_cfg)
# callbacks for agentcheckpoint_callback=CheckpointCallback(save_freq=1000, save_path=log_dir, name_prefix="model", verbose=2)
callbacks= [checkpoint_callback, LogEveryNTimesteps(n_steps=args_cli.log_interval)]
env.unwrapped.cfg.run_type="train"# set run type to play# train the agentwithcontextlib.suppress(KeyboardInterrupt):
agent.learn(
total_timesteps=n_timesteps,
callback=callbacks,
progress_bar=True,
log_interval=None,
)
# save the final modelagent.save(os.path.join(log_dir, "model"))
print("Saving to:")
print(os.path.join(log_dir, "model.zip"))
ifisinstance(env, VecNormalize):
print("Saving normalization")
env.save(os.path.join(log_dir, "model_vecnormalize.pkl"))
# close the simulatorenv.close()
if__name__=="__main__":
# run the main functionmain()
# close sim appsimulation_app.close()
The reproduction step should be, where Template-StateCatch-Direct-v0 is my custom env and task
Hello everyone, I found that this problem may be related to some calculations in env. I would share my findings when I figure out why is this happened and how to solve it
🐛 Bug
Hello, I use SB3 along with Isaac-lab to train my agent. My SB3 is installed using
./isaaclab.sh --install sb3
, and my IsaacLab version is cloned from araffin fork/feat/sb3-optim .During my test, I found the same problem as described in #1966
It can be seen from my profiled code in the log output that, some memory is not released correctly and lead to memory exploding.
To Reproduce
The reproduction step should be, where
Template-StateCatch-Direct-v0
is my custom env and taskRelevant log output / Error message
System Info
and sb3 output
Checklist
The text was updated successfully, but these errors were encountered: