Skip to content

Instantly share code, notes, and snippets.

@ViniTheSwan
Last active February 3, 2022 19:59
Show Gist options
  • Save ViniTheSwan/a09546822435d023820ba472b855df81 to your computer and use it in GitHub Desktop.
Save ViniTheSwan/a09546822435d023820ba472b855df81 to your computer and use it in GitHub Desktop.
import gym
from stable_baselines3 import PPO
# Parallel environments
#env = make_vec_env("LunarLander-v2", n_envs=8)
# Create environment
env = gym.make('LunarLander-v2')
# Instantiate the agent
model = PPO('MlpPolicy', env, verbose=1)
# Train the agent
model.learn(total_timesteps=int(2e6))
# Save the agent
model.save("ppo_lunar2")
# Load the trained agent
#model = PPO.load("ppo_lunar", env=env)
# Enjoy trained agent
obs = env.reset()
for i in range(10000):
action, _states = model.predict(obs, deterministic=True)
obs, rewards, dones, info = env.step(action)
env.render()
if dones:
obs = env.reset()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment