123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143 |
- import glob
- import json
- import os
- import matplotlib
- matplotlib.use('Agg')
- import matplotlib.pyplot as plt
- import numpy as np
- from scipy.signal import medfilt
- matplotlib.rcParams.update({'font.size': 8})
- def smooth_reward_curve(x, y):
-
- halfwidth = min(31, int(np.ceil(len(x) / 30)))
- k = halfwidth
- xsmoo = x[k:-k]
- ysmoo = np.convolve(y, np.ones(2 * k + 1), mode='valid') / \
- np.convolve(np.ones_like(y), np.ones(2 * k + 1), mode='valid')
- downsample = max(int(np.floor(len(xsmoo) / 1e3)), 1)
- return xsmoo[::downsample], ysmoo[::downsample]
- def fix_point(x, y, interval):
- np.insert(x, 0, 0)
- np.insert(y, 0, 0)
- fx, fy = [], []
- pointer = 0
- ninterval = int(max(x) / interval + 1)
- for i in range(ninterval):
- tmpx = interval * i
- while pointer + 1 < len(x) and tmpx > x[pointer + 1]:
- pointer += 1
- if pointer + 1 < len(x):
- alpha = (y[pointer + 1] - y[pointer]) / \
- (x[pointer + 1] - x[pointer])
- tmpy = y[pointer] + alpha * (tmpx - x[pointer])
- fx.append(tmpx)
- fy.append(tmpy)
- return fx, fy
- def load_data(indir, smooth, bin_size):
- datas = []
- infiles = glob.glob(os.path.join(indir, '*.monitor.csv'))
- for inf in infiles:
- with open(inf, 'r') as f:
- f.readline()
- f.readline()
- for line in f:
- tmp = line.split(',')
- t_time = float(tmp[2])
- tmp = [t_time, int(tmp[1]), float(tmp[0])]
- datas.append(tmp)
- datas = sorted(datas, key=lambda d_entry: d_entry[0])
- result = []
- timesteps = 0
- for i in range(len(datas)):
- result.append([timesteps, datas[i][-1]])
- timesteps += datas[i][1]
- if len(result) < bin_size:
- return [None, None]
- x, y = np.array(result)[:, 0], np.array(result)[:, 1]
- if smooth == 1:
- x, y = smooth_reward_curve(x, y)
- if smooth == 2:
- y = medfilt(y, kernel_size=9)
- x, y = fix_point(x, y, bin_size)
- return [x, y]
- color_defaults = [
- '#1f77b4',
- '#ff7f0e',
- '#2ca02c',
- '#d62728',
- '#9467bd',
- '#8c564b',
- '#e377c2',
- '#7f7f7f',
- '#bcbd22',
- '#17becf'
- ]
- def visdom_plot(viz, win, folder, game, name, bin_size=100, smooth=1):
- tx, ty = load_data(folder, smooth, bin_size)
- if tx is None or ty is None:
- return win
- fig = plt.figure()
- plt.plot(tx, ty, label="{}".format(name))
-
- if game.find('NoFrameskip') > -1:
- plt.xticks([1e6, 2e6, 4e6, 6e6, 8e6, 10e6],
- ["1M", "2M", "4M", "6M", "8M", "10M"])
- plt.xlim(0, 10e6)
- else:
- plt.xticks([1e5, 2e5, 4e5, 6e5, 8e5, 1e5],
- ["0.1M", "0.2M", "0.4M", "0.6M", "0.8M", "1M"])
- plt.xlim(0, 1e6)
- plt.xlabel('Number of Timesteps')
- plt.ylabel('Rewards')
- plt.title(game)
- plt.legend(loc=4)
- plt.show()
- plt.draw()
- image = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='')
- image = image.reshape(fig.canvas.get_width_height()[::-1] + (3, ))
- plt.close(fig)
-
- image = np.transpose(image, (2, 0, 1))
- return viz.image(image, win=win)
- if __name__ == "__main__":
- from visdom import Visdom
- viz = Visdom()
- visdom_plot(viz, None, '/tmp/gym/', 'BreakOut', 'a2c', bin_size=100, smooth=1)
|