when I execute this code the following error occurs: TypeError: unhashable type: ‘dict’
Code:
import random, math
import numpy as np
from collections import defaultdict
class QLearningAgent():
"""
Q-Learning agent
get_q_value, set_q_value
"""
def __init__(self, alpha, epsilon, discount, get_legal_actions):
self.get_legal_actions = get_legal_actions
self._q_values = \
defaultdict(lambda: defaultdict(lambda: 0))
self.alpha = alpha
self.epsilon = epsilon
self.descount = discount
def get_q_value(self, state, action):
return self._q_values[state][action]
def set_qvalue(self, state, action, value):
self._q_values[state][action] = value
# Let's add the ability to our agent to calculate ratings V:
def get_value(self, state):
"""
Returns the value of the utility function,
calculated by Q[state, action],
"""
possible_actions = self.get_legal_actions(state)
value = max([self.get_q_value(state, action) for action in possible_actions])
return value
QLearningAgent.get_value = get_value
# Our agent's strategy will be to choose the best action, according to the estimates Q:
def get_policy(self, state):
"""
Selects the best action according to the strategy.
"""
possible_actions = self.get_legal_actions(state)
# Selects the best action according to the strategy.
best_action = None
for action in possible_actions:
if best_action is None:
best_action = action
elif self.get_q_value(state, action) > self.get_q_value(state, best_action):
best_action = action
return best_action
QLearningAgent.get_policy = get_policy
# For a specific situation, we will select an action using -greedy approach:
def get_action(self, state):
"""
Selects the action to be taken in this
condition, including research (eps greedy)
With probability self.epsilon we take random
action, otherwise action according to strategy
(self.get_policy)
"""
possible_actions = self.get_legal_actions(state)
# Selecting an action using the eps-greedy approach
if np.random.random() < self.epsilon:
action = np.random.choice(possible_actions, 1)[0]
else:
action = self.get_policy(state)
return action
QLearningAgent.get_action = get_action
def update(self, state, action, next_state, reward):
"""
Q-update function
"""
# Perform a Q update,
# use the getQValue and setQValue methods
t = self.alpha*(reward + self.discount*self.get_value(next_state) - self.get_q_value(state, action))
reference_qvalue = self.get_q_value(state, action) + t
self.set_q_value(state, action, reference_qvalue)
QLearningAgent.update = update
## Testing the agent for taxi tasks
import gym
env = gym.make('Taxi-v3')
n_actions = env.action_space.n
def play_and_train(env, agent, t_max=10**4):
"""
The function launches the full game,
using agent strategy (agent.get_action(s))
performs an agent update (agent.update(...))
and returns the total reward
"""
total_reward = 0.0
s = env.reset()
for t in range(t_max):
# Select an action
a = agent.get_action(s)
next_s, r, done, _, _ = env.step(a)
# We update the strategy
agent.update(s, a, next_s, r)
s = next_s
total_reward +=r
if done:
break
return total_reward
import matplotlib.pyplot as plt
from IPython.display import clear_output
agent = QLearningAgent(alpha=0.5, epsilon=0.1,
discount=0.9,
get_legal_actions=lambda s: range(
n_actions))
assert 'get_policy' in dir(agent)
rewards = []
for i in range(5000):
rewards.append(play_and_train(env, agent))
if i % 100 == 0:
clear_output(True)
print('eps =', agent.epsilon,
'mean reward =', np.mean(rewards[-10:]))
print('alpha=', agent.alpha)
plt.plot(rewards)
plt.show()
# And the error itself:
TypeError Traceback (most recent call last)
Cell In[16], line 13
10 rewards = []
12 for i in range(5000):
---> 13 rewards.append(play_and_train(env, agent))
15 if i % 100 == 0:
16 clear_output(True)
Cell In[15], line 19, in play_and_train(env, agent, t_max)
15 s = env.reset()
17 for t in range(t_max):
18 # Выбираем действие
---> 19 a = agent.get_action(s)
20 next_s, r, done, _, _ = env.step(a)
22 # Выполняем обновление стратегии
Cell In[13], line 17, in get_action(self, state)
15 action = np.random.choice(possible_actions, 1)[0]
16 else:
---> 17 action = self.get_policy(state)
19 return action
Cell In[12], line 12, in get_policy(self, state)
10 if best_action is None:
11 best_action = action
---> 12 elif self.get_q_value(state, action) > self.get_q_value(state, best_action):
13 best_action = action
15 return best_action
Cell In[10], line 20, in QLearningAgent.get_q_value(self, state, action)
19 def get_q_value(self, state, action):
---> 20 return self._q_values[state][action]
TypeError: unhashable type: 'dict'
