PPO Agent playing LunarLander-v2
This is a trained model of a PPO agent playing LunarLander-v2 using the stable-baselines3 library.
Usage (with Stable-baselines3)
import gymnasium as gym
from pathlib import Path
from huggingface_sb3 import load_from_hub, package_to_hub
from huggingface_hub import (
notebook_login,
) # To log to our Hugging Face account to be able to upload models to the Hub.
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.monitor import Monitor
from huggingface_sb3 import package_to_hub
# Create environment
env = gym.make("LunarLander-v2")
# Instantiate the agent
# We added some parameters to accelerate the training
model = PPO(
policy="MlpPolicy",
env=env,
n_steps=1024,
batch_size=64,
n_epochs=4,
gamma=0.999,
gae_lambda=0.98,
ent_coef=0.01,
verbose=1,
)
# Train the agent
model_name = Path("ppo-LunarLander-v2.zip")
if not model_name.exists():
model.learn(total_timesteps=1_000_000, progress_bar=True)
# Remove the .zip adds to the filename.
model.save(model_name.stem)
else:
print(f"[+] Model {model_name.stem} exists, skipping the learning.")
# Load the model we just trained.
model = PPO.load(path=model_name.stem, env=env)
print(f"Loaded model: {model_name.stem}")
# Eval env.
# Evaluate
print("Evaluating model")
mean_reward, std_reward = evaluate_policy(
model,
env,
n_eval_episodes=30,
deterministic=True,
)
print(f"Mean reward = {mean_reward:.2f} +/- {std_reward}")
repo_id = "WhatTheFuzz/ppo-LunarLander-v2"
commit_message = "Upload PPO LunarLander-v2 trained agent"
env_id = "LunarLander-v2"
model_architecture = "PPO"
# Create the evaluation env and set the render_mode="rgb_array"
eval_env = DummyVecEnv([lambda: Monitor(gym.make(env_id, render_mode="rgb_array"))])
# PLACE the package_to_hub function you've just filled here
package_to_hub(
model=model, # Our trained model
model_name="ppo-LunarLander-v2", # The name of our trained model
model_architecture=model_architecture, # The model architecture we used: in our case PPO
env_id=env_id, # Name of the environment
eval_env=eval_env, # Evaluation Environment
repo_id=repo_id, # id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance ThomasSimonini/ppo-LunarLander-v2
commit_message=commit_message,
)