"""Collects measurements of an agent's performance.
Authors:
Banafsheh Rafiee, Niko Yasui
"""
import numpy as np
[docs]class Evaluator:
"""Collects measurements of an agent's performance
Args:
gvf_name (str): Name of the GVF used for plotting.
num_features (int): Total number of features in the representation.
alpha_rupee (float): Primary learning rate for RUPEE.
beta0_rupee (float): Initial averaging weight for RUPEE.
use_MSRE (bool): Calculate Mean Squared Return Error.
"""
def __init__(self,
gvf_name,
num_features,
alpha_rupee,
beta0_rupee,
use_MSRE):
MAX_TIME_STEPS = 1000000
# load the state representation and actual return for sample states
if use_MSRE:
data = np.load("actual_return_" + gvf_name + ".npz")
self.samples_phi = data["samples"]
self.samples_G = data["_return"]
self.sample_size = data["sample_size"]
# # initialize the preformance measures
self.MSRE = 0.0
self.MSRE_over_time = np.zeros(MAX_TIME_STEPS)
self.td_error = 0.0
self.avg_td_error = 0.0
self.mean_rho = 0.0
self.mean_squared_rho = 0.0
self.ESS = 0.0
self.use_MSRE = use_MSRE
# See Adam White's PhD Thesis, section 8.4.2
self.alpha_rupee = alpha_rupee
self.beta0_rupee = beta0_rupee
self.tau_rupee = 0.0
self.hhat = np.zeros(num_features)
self.td_elig_avg = np.zeros(num_features)
self.rupee = 0.0
[docs] def update(self, *args, **kwargs):
if self.use_MSRE:
self.compute_MSRE(*args, **kwargs)
self.compute_avg_td_error(*args, **kwargs)
self.compute_rupee(*args, **kwargs)
self.compute_IS_ess(*args, **kwargs)
[docs] def compute_MSRE(self, theta, time_step, *args, **kwargs):
return_error = 0.0
for i, phi in enumerate(self.samples_phi):
estimated_value = np.dot(theta, phi)
return_error = return_error + (estimated_value - self.samples_G[
i]) * (estimated_value - self.samples_G[i])
MSRE = np.sqrt(return_error / self.sample_size)
self.MSRE = MSRE
self.MSRE_over_time[time_step] = MSRE
if time_step % 10 == 0.0:
np.savez("MSRE_over_time.npz",
MSRE=self.MSRE_over_time,
time_step=time_step)
[docs] def compute_rupee(self, tderr_elig, phi, *args, **kwargs):
# update RUPEE
# add condition to change for control gvf
self.hhat += self.alpha_rupee * (tderr_elig - np.inner(self.hhat, phi)
* phi)
self.tau_rupee *= 1 - self.beta0_rupee
self.tau_rupee += self.beta0_rupee
beta_rupee = self.beta0_rupee / self.tau_rupee
self.td_elig_avg *= 1 - beta_rupee
self.td_elig_avg += beta_rupee * tderr_elig
self.rupee = np.sqrt(
np.absolute(np.inner(self.hhat, self.td_elig_avg)))
[docs] def compute_avg_td_error(self, delta, time_step, *args, **kwargs):
self.avg_td_error += (delta - self.avg_td_error) / (time_step + 1)
self.td_error = delta
[docs] def compute_IS_ess(self, rho, time_step, *args, **kwargs):
if time_step != 0:
self.mean_rho += (rho - self.mean_rho) / time_step
self.mean_squared_rho += ((rho**2 - self.mean_squared_rho) /
time_step)
self.ESS = time_step * self.mean_rho**2 / self.mean_squared_rho