最近開始嘗試深度強(qiáng)化學(xué)習(xí),很好的一個(gè)環(huán)境平臺(tái)是Gym: https://www.gymlibrary.dev/content/basic_usage/
安裝: pip install gym[all]
強(qiáng)化學(xué)習(xí)、深度強(qiáng)化學(xué)習(xí)資源:
- 教程:
- 深度強(qiáng)化學(xué)習(xí)_王樹森 https://www.bilibili.com/video/BV1rv41167yx
- 莫煩PYTHON https://mofanpy.com/tutorials/machine-learning/reinforcement-learning/intro-RL
- 方向
- 自動(dòng)駕駛模擬仿真 Carla
- 項(xiàng)目
- 強(qiáng)化學(xué)習(xí)git項(xiàng)目總結(jié) https://zhuanlan.zhihu.com/p/24392239
- 強(qiáng)化學(xué)習(xí)開源項(xiàng)目 https://www.zhihu.com/question/49230922/answer/1631248683
- 論文
- 其他資源
Jupyter 中顯示gym渲染窗口及保存為gif
參考:
import numpy as np
import time
import gym
import matplotlib.pyplot as plt
from matplotlib import animation
%matplotlib inline
from IPython import display
# 顯示gym渲染窗口的函數(shù),在運(yùn)行過程中將 env.render() 替換為 show_state(env, step, info).
def show_state(env, step=0, info=""):
plt.figure(3)
plt.clf()
plt.imshow(env.render(mode='rgb_array'))
plt.title("Step: %d %s" % (step, info))
plt.axis('off')
display.clear_output(wait=True)
display.display(plt.gcf())
def display_frames_as_gif(frames, SavePath = './test.gif'):
patch = plt.imshow(frames[0])
plt.axis('off')
def animate(i):
patch.set_data(frames[I])
anim = animation.FuncAnimation(plt.gcf(), animate, frames = len(frames), interval=1)
anim.save(SavePath, writer='ffmpeg', fps=30)
# 運(yùn)行環(huán)境實(shí)例1
import gym
frames=[]
env = gym.make('CartPole-v1')
info = env.reset() # 重置環(huán)境
for step in range(100):
frames.append(env.render(mode='rgb_array')) # 加載各個(gè)時(shí)刻圖像到幀
show_state(env, step, info = 'CartPole_test') # 顯示渲染窗口
action = env.action_space.sample() # 隨機(jī)動(dòng)作,需要學(xué)習(xí)的動(dòng)作模型
# action=np.random.choice(2) # 隨機(jī)返回: 0 小車向左,1 小車向右
observation,reward,done,info = env.step(action) # 執(zhí)行動(dòng)作并返回結(jié)果
env.close()
display_frames_as_gif(frames, SavePath = './CartPole_result.gif') # 保存運(yùn)行結(jié)果動(dòng)圖

CartPole_result.gif
# 運(yùn)行環(huán)境實(shí)例2
import gym
frames=[]
env = gym.make("LunarLander-v2")
env.reset()
env.action_space.seed(42)
observation, info = env.reset(seed=42, return_info=True)
for step in range(100):
frames.append(env.render(mode='rgb_array')) # 加載各個(gè)時(shí)刻圖像到幀
env.render(mode='human') # 這行不能和env定義寫在一行,否則會(huì)報(bào)錯(cuò),原因不明
time.sleep(0.1) # 控制顯示速度變慢
show_state(env, step, info="LunarLander_test")
observation, reward, done, info = env.step(env.action_space.sample())
if done:
observation, info = env.reset(return_info=True)
env.close()
display_frames_as_gif(frames, SavePath = './LunarLander_result.gif') # 保存運(yùn)行結(jié)果動(dòng)圖

LunarLander_result.gif