PLEASE HELP DEBUG DQN AGENT

I keep getting an error at load function. I was wondering if anyone could help me debug the program. Im at wits end. The DQN agent should get data from kucoin and kaggle, calculate and use technical indicators decide to open a long or short position. If 3 or more point to proce going up the ai opens a long. if 3 or more point to proce gong down, opens a short. all positiosn are opened with a 20% stop loss and a 30% take profit. The positions are only closed once it hit one or the other. A reward for take profit and -1 for a stop loss. Epsilon-greedy system. Please give me some insights.

import gym
import numpy as np
import tensorflow as tf
import os
import urllib.request
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from collections import deque
import random
import pandas as pd
from ta.trend import SMAIndicator
from ta.momentum import RSIIndicator
from ta.volume import VolumeWeightedAveragePrice
from ta.trend import MACD
from kucoin.client import Client
import kaggle

Define the DQN model class

class DQN:
def init(self, state_size, action_size):
self.state_size = state_size
self.action_size = action_size
self.memory = deque(maxlen=2000)
self.gamma = 0.95 # discount rate
self.epsilon = 1.0 # exploration rate
self.epsilon_min = 0.01
self.epsilon_decay = 0.995
self.learning_rate = 0.001
self.model = self._build_model()

def _build_model(self):

    model = Sequential()

    model.add(Dense(24, input_dim=self.state_size, activation='relu'))

    model.add(Dense(24, activation='relu'))

    model.add(Dense(self.action_size, activation='linear'))

    model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(lr=self.learning_rate))

    return model

def remember(self, state, action, reward, next_state, done):

    self.memory.append((state, action, reward, next_state, done))

def act(self, state):

    if np.random.rand() <= self.epsilon:

        return random.randrange(self.action_size)

    act_values = self.model.predict(state)

    return np.argmax(act_values[0])

def replay(self, batch_size):

    minibatch = random.sample(self.memory, batch_size)

    for state, action, reward, next_state, done in minibatch:

        target = reward

        if not done:

            target = (reward + self.gamma * np.amax(self.model.predict(next_state)[0]))

        target_f = self.model.predict(state)

        target_f[0][action] = target

        self.model.fit(state, target_f, epochs=1, verbose=0)

    if self.epsilon > self.epsilon_min:

        self.epsilon *= self.epsilon_decay

Define the necessary variables and hyperparameters

state_size = 15 # Number of features in the state (OHCL data + indicators)
num_indicators = 5 # Number of additional indicators to include in the state
action_size = 2 # Number of possible actions (long or short)
batch_size = 32 # Batch size for replay memory
num_episodes = 1000 # Total number of episodes for training

Initialize the KuCoin client

client = Client(api_key='YOUR_API_KEY', api_secret='YOUR_API_SECRET', passphrase='YOUR_PASSPHRASE')

Function to load and preprocess the historical data

def load_data():
# Load historical data from Kaggle using the Kaggle API
dataset_id = 'jeremijenkins/kucoin'
dataset_path = 'kucoin.csv'

# Download the dataset using the Kaggle API

kaggle.api.dataset_download_files(dataset_id, path='.', unzip=True)

  
  
  Read the dataset file into a DataFrame


df = pd.read_csv(dataset_path)

  
  
  Load historical data from KuCoin API


data = client.get_kline_data('BTC-USDT', '1day', start_time=1567296000000, end_time=1621440000000)

  
  
  Create a DataFrame from the data


df = pd.DataFrame(data, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])

  
  
  Calculate additional technical indicators


df['sma_20'] = SMAIndicator(df['close'], window=20).sma_indicator()

df['sma_50'] = SMAIndicator(df['close'], window=50).sma_indicator()

df['rsi'] = RSIIndicator(df['close'], window=14).rsi()

df['vwap'] = VolumeWeightedAveragePrice(df['close'], df['volume'], window=14).vwap()

df['macd'] = MACD(df['close']).macd()

df['signal_line'] = MACD(df['close']).macd_signal()

  
  
  Drop rows with NaN values


df.dropna(inplace=True)

return df

Function to create the state from the current time step

def create_state(data, t, num_indicators):
state = data.iloc[t - num_indicators + 1:t + 1][['open', 'high', 'low', 'close', 'volume', 'sma_20', 'sma_50',
'rsi', 'vwap', 'macd', 'signal_line']]
return state.values.flatten()

Function to execute the trading strategy

def execute_strategy(data, model):
state = create_state(data, 0, num_indicators)
total_profit = 0
position = None # None: no position, 'long': long position, 'short': short position
entry_price = 0
stop_loss = 0
take_profit = 0

for t in range(num_indicators, len(data) - 1):

    action = model.act(np.array([state]))

    next_state = create_state(data, t + 1, num_indicators)

if position is None:
    indicators_up = sum(
        data.iloc[t][['sma_20', 'sma_50', 'rsi', 'vwap', 'macd']] &gt; data.iloc[t - 1][
            ['sma_20', 'sma_50', 'rsi', 'vwap', 'macd']])
    indicators_down = sum(
        data.iloc[t][['sma_20', 'sma_50', 'rsi', 'vwap', 'macd']] &lt; data.iloc[t - 1][
            ['sma_20', 'sma_50', 'rsi', 'vwap', 'macd']])

    if action == 0:  # Buy
        if indicators_up &gt;= 3:
            position = 'long'
            entry_price = data.iloc[t]['close']
            stop_loss = entry_price * 0.8
            take_profit = entry_price * 1.3

    elif action == 1:  # Sell
        if indicators_down &gt;= 3:
            position = 'short'
            entry_price = data.iloc[t]['close']
            stop_loss = entry_price * 1.2
            take_profit = entry_price * 0.7

elif position == 'long':
    if data.iloc[t]['close'] &lt;= stop_loss or data.iloc[t]['close'] &gt;= take_profit:
        profit = data.iloc[t]['close'] - entry_price
        total_profit += profit
        position = None

elif position == 'short':
    if data.iloc[t]['close'] &gt;= stop_loss or data.iloc[t]['close'] &lt;= take_profit:
        profit = entry_price - data.iloc[t]['close']
        total_profit += profit
        position = None

model.remember(state, action, 0, next_state, False)
state = next_state



    

    




return total_profit

Initialize the DQN agent

agent = DQN(state_size, action_size)

Training loop

for episode in range(num_episodes):
data = load_data() # Load the data
total_profit = execute_strategy(data, agent)
agent.replay(batch_size)
print(f"Episode: {episode + 1}, Total Profit: {total_profit}")

Test the trained model

test_data = load_data() # Load the test data
test_total_profit = execute_strategy(test_data, agent)
print(f"Test Total Profit: {test_total_profit}")

CodeNewbie Community 🌱

PLEASE HELP DEBUG DQN AGENT

Define the DQN model class

Define the necessary variables and hyperparameters

Initialize the KuCoin client

Function to load and preprocess the historical data

Read the dataset file into a DataFrame

Load historical data from KuCoin API

Create a DataFrame from the data

Calculate additional technical indicators

Drop rows with NaN values

Function to create the state from the current time step

Function to execute the trading strategy

Initialize the DQN agent

Training loop

Test the trained model

Top comments (0)

Read next

Ollama Vulnerability: Unauthenticated Access Risk to Your LLM Models

Why Is SafeLine WAF So Popular? Here's What Makes It Stand Out

Testing SafeLine WAF: Can It Block SQL Injection?

New