TypeError: unhashable type: 'dict' in jupyter lab

when I execute this code the following error occurs: TypeError: unhashable type: ‘dict’

Code:

import random, math
import numpy as np
from collections import defaultdict

class QLearningAgent():
    
    """
    Q-Learning agent
 get_q_value, set_q_value
    """
    
    def __init__(self, alpha, epsilon, discount, get_legal_actions):
        self.get_legal_actions = get_legal_actions
        self._q_values = \
        defaultdict(lambda: defaultdict(lambda: 0))
        self.alpha = alpha
        self.epsilon = epsilon
        self.descount = discount

    def get_q_value(self, state, action):
        return self._q_values[state][action]

    def set_qvalue(self, state, action, value):
        self._q_values[state][action] = value



# Let's add the ability to our agent to calculate ratings V:

def get_value(self, state):
    """
     Returns the value of the utility function,
      calculated by Q[state, action],
    """
    possible_actions = self.get_legal_actions(state)
    value = max([self.get_q_value(state, action) for action in possible_actions])
    return value

QLearningAgent.get_value = get_value




# Our agent's strategy will be to choose the best action, according to the estimates Q:

def get_policy(self, state):
    """
    Selects the best action according to the strategy.
    """
    possible_actions = self.get_legal_actions(state)

    # Selects the best action according to the strategy.
    best_action = None
    for action in possible_actions:
        if best_action is None:
            best_action = action
        elif self.get_q_value(state, action) > self.get_q_value(state, best_action):
            best_action = action

    return best_action

QLearningAgent.get_policy = get_policy




# For a specific situation, we will select an action using -greedy approach:

def get_action(self, state):
    """
   Selects the action to be taken in this
       condition, including research (eps greedy)
      
       With probability self.epsilon we take random
       action, otherwise action according to strategy
       (self.get_policy)
    """

    possible_actions = self.get_legal_actions(state)

    # Selecting an action using the eps-greedy approach
    if np.random.random() < self.epsilon:
        action = np.random.choice(possible_actions, 1)[0]
    else:
        action = self.get_policy(state)

    return action

QLearningAgent.get_action = get_action




def update(self, state, action, next_state, reward):
     """
       Q-update function
     """
     # Perform a Q update,
     # use the getQValue and setQValue methods
     t = self.alpha*(reward + self.discount*self.get_value(next_state) - self.get_q_value(state, action))
     reference_qvalue = self.get_q_value(state, action) + t
     self.set_q_value(state, action, reference_qvalue)

QLearningAgent.update = update






## Testing the agent for taxi tasks

import gym
env = gym.make('Taxi-v3')

n_actions = env.action_space.n

def play_and_train(env, agent, t_max=10**4):
     """
       The function launches the full game,
       using agent strategy (agent.get_action(s))
       performs an agent update (agent.update(...))
       and returns the total reward
     """

     total_reward = 0.0
     s = env.reset()

     for t in range(t_max):
         # Select an action
         a = agent.get_action(s)
         next_s, r, done, _, _ = env.step(a)

         # We update the strategy
         agent.update(s, a, next_s, r)

         s = next_s
         total_reward +=r
         if done:
             break

     return total_reward



import matplotlib.pyplot as plt
from IPython.display import clear_output

agent = QLearningAgent(alpha=0.5, epsilon=0.1,
                       discount=0.9,
                       get_legal_actions=lambda s: range(
                           n_actions))

assert 'get_policy' in dir(agent)
rewards = []

for i in range(5000):
    rewards.append(play_and_train(env, agent))

    if i % 100 == 0:
        clear_output(True)
        print('eps =', agent.epsilon,
              'mean reward =', np.mean(rewards[-10:]))
        print('alpha=', agent.alpha)
        plt.plot(rewards)
        plt.show()




# And the error itself:

TypeError                                 Traceback (most recent call last)
Cell In[16], line 13
     10 rewards = []
     12 for i in range(5000):
---> 13     rewards.append(play_and_train(env, agent))
     15     if i % 100 == 0:
     16         clear_output(True)

Cell In[15], line 19, in play_and_train(env, agent, t_max)
     15 s = env.reset()
     17 for t in range(t_max):
     18     # Выбираем действие
---> 19     a = agent.get_action(s)
     20     next_s, r, done, _, _ = env.step(a)
     22     # Выполняем обновление стратегии

Cell In[13], line 17, in get_action(self, state)
     15     action = np.random.choice(possible_actions, 1)[0]
     16 else:
---> 17     action = self.get_policy(state)
     19 return action

Cell In[12], line 12, in get_policy(self, state)
     10     if best_action is None:
     11         best_action = action
---> 12     elif self.get_q_value(state, action) > self.get_q_value(state, best_action):
     13         best_action = action
     15 return best_action

Cell In[10], line 20, in QLearningAgent.get_q_value(self, state, action)
     19 def get_q_value(self, state, action):
---> 20     return self._q_values[state][action]

TypeError: unhashable type: 'dict'

You did not share the error message with us which could have told us the line of code that has the problem.

But I can guess what you are doing. You have used one dictionary as the key for another dictionary. That is not allowed becuase keys to dictionaries must be immutible.

For example I can reproduce the error like this:

% python3.12
Python 3.12.1 (v3.12.1:2305ca5144, Dec  7 2023, 17:23:38) [Clang 13.0.0 (clang-1300.0.29.30)] on darwin
Type "help", "copyright", "credits" or "license" for more information.

:>>> key = {'a': 1}
:>>> d = {}
:>>> d[key] = 'value'
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
TypeError: unhashable type: 'dict'
:>>>

try changing your key from a dict to be a tuple.

1 Like

They must be hashable (as written in the error message), not immutable. As a matter of semantics, hashability should normally imply immutability, but there is no check for this. In particular, object implements __hash__, so all manner of user-defined classes can have their instances used as dict keys, even though it might be a bad idea.

Similarly, just being immutable is not good enough. Builtin and C extension types need to have something in the tp_hash slot, whether they get it by inheritance or any other way. User-defined types can define __hash__ to raise an exception explicitly.

1 Like

I didn’t use dictionaries in the code at all, maybe it’s a matter of libraries? But use defaultdict( defaultdict(lambda: defaultdict(lambda: 0))) The same code works for the person from whom I am learning RL programming

The error message:

TypeError                                 Traceback (most recent call last)
Cell In[24], line 13
     10 rewards = []
     12 for i in range(5000):
---> 13     rewards.append(play_and_train(env, agent))
     15     if i % 100 == 0:
     16         clear_output(True)

Cell In[23], line 19, in play_and_train(env, agent, t_max)
     15 s = env.reset()
     17 for t in range(t_max):
     18     # Выбираем действие
---> 19     a = agent.get_action(s)
     20     next_s, r, done, _, _ = env.step(a)
     22     # Выполняем обновление стратегии

Cell In[21], line 17, in get_action(self, state)
     15     action = np.random.choice(possible_actions, 1)[0]
     16 else:
---> 17     action = self.get_policy(state)
     19 return action

Cell In[20], line 12, in get_policy(self, state)
     10     if best_action is None:
     11         best_action = action
---> 12     elif self.get_q_value(state, action) > self.get_q_value(state, best_action):
     13         best_action = action
     15 return best_action

Cell In[18], line 20, in QLearningAgent.get_q_value(self, state, action)
     19 def get_q_value(self, state, action):
---> 20     return self._q_values[state][action]

TypeError: unhashable type: 'dict'

Either state ir action contains the bad value.
You could add a print into the function to see what is being used.
With that information you may see what needs changing in your code.

1 Like

typo? You use self.discount later so I’m guessing it is?

yes, i use