Hello, I have a build a module and cant understand why my module is not being saved although there are several saving mechanisms in the code.
I have a Deep Reinforced Learning hybrid module with A3C,RNN,PPO,LSTM,GAE and the code utilizes the following packages:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import gym
from gym import spaces
from sklearn.preprocessing import StandardScaler
from collections import deque
import time
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import json
here is my saving code snippet:
class DRLSavingMechanism:
def __init__(self, agent, env, save_dir='./checkpoints'):
self.agent = agent
self.env = env
self.save_dir = save_dir
os.makedirs(self.save_dir, exist_ok=True)
self.best_reward = float('-inf')
self.episode_rewards = []
self.total_steps = 0
def save_checkpoint(self, episode):
checkpoint_path = os.path.join(self.save_dir, f'checkpoint_episode_{episode}')
# Save model weights
self.agent.actor_critic.save_weights(checkpoint_path + '_weights.h5')
# Save training progress
progress = {
'episode': episode,
'total_steps': self.total_steps,
'best_reward': self.best_reward,
'episode_rewards': self.episode_rewards,
}
with open(checkpoint_path + '_progress.json', 'w') as f:
json.dump(progress, f)
# Save environment state
env_state = {
'balance': self.env.balance,
'equity': self.env.equity,
'position': self.env.position,
'used_margin': self.env.used_margin,
'free_margin': self.env.free_margin,
'unrealized_pnl': self.env.unrealized_pnl,
'last_trade_price': self.env.last_trade_price,
'overnight_fee': self.env.overnight_fee,
}
with open(checkpoint_path + '_env_state.json', 'w') as f:
json.dump(env_state, f)
# Save a sample of recent experiences
recent_experiences = {
'states': self.agent.memory.states[-1000:],
'actions': self.agent.memory.actions[-1000:],
'rewards': self.agent.memory.rewards[-1000:],
}
np.savez(checkpoint_path + '_experiences.npz', **recent_experiences)
def load_checkpoint(self, episode):
checkpoint_path = os.path.join(self.save_dir, f'checkpoint_episode_{episode}')
# Load model weights
self.agent.actor_critic.load_weights(checkpoint_path + '_weights.h5')
# Load training progress
with open(checkpoint_path + '_progress.json', 'r') as f:
progress = json.load(f)
self.total_steps = progress['total_steps']
self.best_reward = progress['best_reward']
self.episode_rewards = progress['episode_rewards']
# Load environment state
with open(checkpoint_path + '_env_state.json', 'r') as f:
env_state = json.load(f)
for key, value in env_state.items():
setattr(self.env, key, value)
# Note: We don't load experiences back into memory as PPO is typically on-policy
def update(self, episode, reward, steps):
self.total_steps += steps
self.episode_rewards.append(reward)
if reward > self.best_reward:
self.best_reward = reward
self.save_checkpoint(episode)
# You can add additional logic here, e.g., save every N episodes
if episode % 100 == 0:
self.save_checkpoint(episode)
The module does not create any file. I dont undertsnd why.