BipedalWalker-V3

Box2D

LunarLanderとほぼ同じソース.モデルの箇所を変更すると動く.

import imageio
import numpy as np

import gymnasium as gym

from stable_baselines3 import PPO
from stable_baselines3.common.evaluation import evaluate_policy

PRJ_NAME="BipedalWalker"
VER="-V3"
STEP=1e2

env = gym.make(PRJ_NAME, render_mode="rgb_array")

model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=int(STEP), progress_bar=True)
model.save(PRJ_NAME+VER)
del model  # delete trained model to demonstrate loading
model = PPO.load(PRJ_NAME+VER, env=env)

mean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=10)

vec_env = model.get_env()
obs = vec_env.reset()

images=[]
#for i in range(1000):
while True:
    action, _states = model.predict(obs, deterministic=True)
    obs, rewards, dones, info = vec_env.step(action)
    img=vec_env.render("rgb_array")

    #img = vec_env.render()
    images.append(img)

    if dones:
        print("done")
        break
imageio.mimsave(PRJ_NAME+VER+"-"+str(int(STEP))+.gif", [np.array(img) for i, img in enumerate(images) if i%2 == 0], fps=29)
BipedalWalker-V3 学習回数:100回
BipedalWalker-V3 学習回数:10,000回
BipedalWalker-V3 学習回数:100,000回
BipedalWalker-V3 学習回数:1000,000回
タイトルとURLをコピーしました