Saving DRL Module doesnt work

ZYTO · September 4, 2024, 6:13pm

Hello, I have a build a module and cant understand why my module is not being saved although there are several saving mechanisms in the code.

I have a Deep Reinforced Learning hybrid module with A3C,RNN,PPO,LSTM,GAE and the code utilizes the following packages:

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import gym
from gym import spaces
from sklearn.preprocessing import StandardScaler
from collections import deque
import time
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import json

here is my saving code snippet:

class DRLSavingMechanism:
    def __init__(self, agent, env, save_dir='./checkpoints'):
        self.agent = agent
        self.env = env
        self.save_dir = save_dir
        os.makedirs(self.save_dir, exist_ok=True)
        
        self.best_reward = float('-inf')
        self.episode_rewards = []
        self.total_steps = 0

    def save_checkpoint(self, episode):
        checkpoint_path = os.path.join(self.save_dir, f'checkpoint_episode_{episode}')
        
        # Save model weights
        self.agent.actor_critic.save_weights(checkpoint_path + '_weights.h5')
        
        # Save training progress
        progress = {
            'episode': episode,
            'total_steps': self.total_steps,
            'best_reward': self.best_reward,
            'episode_rewards': self.episode_rewards,
        }
        with open(checkpoint_path + '_progress.json', 'w') as f:
            json.dump(progress, f)
        
        # Save environment state
        env_state = {
            'balance': self.env.balance,
            'equity': self.env.equity,
            'position': self.env.position,
            'used_margin': self.env.used_margin,
            'free_margin': self.env.free_margin,
            'unrealized_pnl': self.env.unrealized_pnl,
            'last_trade_price': self.env.last_trade_price,
            'overnight_fee': self.env.overnight_fee,
        }
        with open(checkpoint_path + '_env_state.json', 'w') as f:
            json.dump(env_state, f)
        
        # Save a sample of recent experiences
        recent_experiences = {
            'states': self.agent.memory.states[-1000:],
            'actions': self.agent.memory.actions[-1000:],
            'rewards': self.agent.memory.rewards[-1000:],
        }
        np.savez(checkpoint_path + '_experiences.npz', **recent_experiences)

    def load_checkpoint(self, episode):
        checkpoint_path = os.path.join(self.save_dir, f'checkpoint_episode_{episode}')
        
        # Load model weights
        self.agent.actor_critic.load_weights(checkpoint_path + '_weights.h5')
        
        # Load training progress
        with open(checkpoint_path + '_progress.json', 'r') as f:
            progress = json.load(f)
        self.total_steps = progress['total_steps']
        self.best_reward = progress['best_reward']
        self.episode_rewards = progress['episode_rewards']
        
        # Load environment state
        with open(checkpoint_path + '_env_state.json', 'r') as f:
            env_state = json.load(f)
        for key, value in env_state.items():
            setattr(self.env, key, value)
        
        # Note: We don't load experiences back into memory as PPO is typically on-policy

    def update(self, episode, reward, steps):
        self.total_steps += steps
        self.episode_rewards.append(reward)
        
        if reward > self.best_reward:
            self.best_reward = reward
            self.save_checkpoint(episode)
        
        # You can add additional logic here, e.g., save every N episodes
        if episode % 100 == 0:
            self.save_checkpoint(episode)

The module does not create any file. I dont undertsnd why.

barry-scott · September 4, 2024, 7:21pm

I would assume becuase you do not call the save_checkpoing function.

You can add print() statements to show where your code is going and
confirm that its working as you expect. At some point what the code is
doing and what you expect will be different and then you will know where
to fix your code.

Start with a print() in save_checkpoint and see if it is ever called.