LunarLanderとほぼ同じソース.モデルの箇所を変更すると動く.
import imageio
import numpy as np
import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.evaluation import evaluate_policy
PRJ_NAME="BipedalWalker"
VER="-V3"
STEP=1e2
env = gym.make(PRJ_NAME, render_mode="rgb_array")
model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=int(STEP), progress_bar=True)
model.save(PRJ_NAME+VER)
del model # delete trained model to demonstrate loading
model = PPO.load(PRJ_NAME+VER, env=env)
mean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=10)
vec_env = model.get_env()
obs = vec_env.reset()
images=[]
#for i in range(1000):
while True:
action, _states = model.predict(obs, deterministic=True)
obs, rewards, dones, info = vec_env.step(action)
img=vec_env.render("rgb_array")
#img = vec_env.render()
images.append(img)
if dones:
print("done")
break
imageio.mimsave(PRJ_NAME+VER+"-"+str(int(STEP))+.gif", [np.array(img) for i, img in enumerate(images) if i%2 == 0], fps=29)