CodeNewbie Community 🌱

Jayj3nks
Jayj3nks

Posted on

PLEASE HELP DEBUG DQN AGENT

I keep getting an error at load function. I was wondering if anyone could help me debug the program. Im at wits end. The DQN agent should get data from kucoin and kaggle, calculate and use technical indicators decide to open a long or short position. If 3 or more point to proce going up the ai opens a long. if 3 or more point to proce gong down, opens a short. all positiosn are opened with a 20% stop loss and a 30% take profit. The positions are only closed once it hit one or the other. A reward for take profit and -1 for a stop loss. Epsilon-greedy system. Please give me some insights.

import gym
import numpy as np
import tensorflow as tf
import os
import urllib.request
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from collections import deque
import random
import pandas as pd
from ta.trend import SMAIndicator
from ta.momentum import RSIIndicator
from ta.volume import VolumeWeightedAveragePrice
from ta.trend import MACD
from kucoin.client import Client
import kaggle

Define the DQN model class

class DQN:
def init(self, state_size, action_size):
self.state_size = state_size
self.action_size = action_size
self.memory = deque(maxlen=2000)
self.gamma = 0.95 # discount rate
self.epsilon = 1.0 # exploration rate
self.epsilon_min = 0.01
self.epsilon_decay = 0.995
self.learning_rate = 0.001
self.model = self._build_model()

def _build_model(self):
model = Sequential()
model.add(Dense(24, input_dim=self.state_size, activation='relu'))
model.add(Dense(24, activation='relu'))
model.add(Dense(self.action_size, activation='linear'))
model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(lr=self.learning_rate))
return model

def remember(self, state, action, reward, next_state, done):
self.memory.append((state, action, reward, next_state, done))

def act(self, state):
if np.random.rand() <= self.epsilon:
return random.randrange(self.action_size)
act_values = self.model.predict(state)
return np.argmax(act_values[0])

def replay(self, batch_size):
minibatch = random.sample(self.memory, batch_size)
for state, action, reward, next_state, done in minibatch:
target = reward
if not done:
target = (reward + self.gamma * np.amax(self.model.predict(next_state)[0]))
target_f = self.model.predict(state)
target_f[0][action] = target
self.model.fit(state, target_f, epochs=1, verbose=0)
if self.epsilon > self.epsilon_min:
self.epsilon *= self.epsilon_decay

Enter fullscreen mode Exit fullscreen mode




Define the necessary variables and hyperparameters

state_size = 15 # Number of features in the state (OHCL data + indicators)
num_indicators = 5 # Number of additional indicators to include in the state
action_size = 2 # Number of possible actions (long or short)
batch_size = 32 # Batch size for replay memory
num_episodes = 1000 # Total number of episodes for training

Initialize the KuCoin client

client = Client(api_key='YOUR_API_KEY', api_secret='YOUR_API_SECRET', passphrase='YOUR_PASSPHRASE')

Function to load and preprocess the historical data

def load_data():
# Load historical data from Kaggle using the Kaggle API
dataset_id = 'jeremijenkins/kucoin'
dataset_path = 'kucoin.csv'

# Download the dataset using the Kaggle API
kaggle.api.dataset_download_files(dataset_id, path='.', unzip=True)

Read the dataset file into a DataFrame

df = pd.read_csv(dataset_path)

Load historical data from KuCoin API

data = client.get_kline_data('BTC-USDT', '1day', start_time=1567296000000, end_time=1621440000000)

Create a DataFrame from the data

df = pd.DataFrame(data, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])

Calculate additional technical indicators

df['sma_20'] = SMAIndicator(df['close'], window=20).sma_indicator()
df['sma_50'] = SMAIndicator(df['close'], window=50).sma_indicator()
df['rsi'] = RSIIndicator(df['close'], window=14).rsi()
df['vwap'] = VolumeWeightedAveragePrice(df['close'], df['volume'], window=14).vwap()
df['macd'] = MACD(df['close']).macd()
df['signal_line'] = MACD(df['close']).macd_signal()

Drop rows with NaN values

df.dropna(inplace=True)

return df

Enter fullscreen mode Exit fullscreen mode




Function to create the state from the current time step

def create_state(data, t, num_indicators):
state = data.iloc[t - num_indicators + 1:t + 1][['open', 'high', 'low', 'close', 'volume', 'sma_20', 'sma_50',
'rsi', 'vwap', 'macd', 'signal_line']]
return state.values.flatten()

Function to execute the trading strategy

def execute_strategy(data, model):
state = create_state(data, 0, num_indicators)
total_profit = 0
position = None # None: no position, 'long': long position, 'short': short position
entry_price = 0
stop_loss = 0
take_profit = 0

for t in range(num_indicators, len(data) - 1):
action = model.act(np.array([state]))
next_state = create_state(data, t + 1, num_indicators)
if position is None:
    indicators_up = sum(
        data.iloc[t][['sma_20', 'sma_50', 'rsi', 'vwap', 'macd']] &gt; data.iloc[t - 1][
            ['sma_20', 'sma_50', 'rsi', 'vwap', 'macd']])
    indicators_down = sum(
        data.iloc[t][['sma_20', 'sma_50', 'rsi', 'vwap', 'macd']] &lt; data.iloc[t - 1][
            ['sma_20', 'sma_50', 'rsi', 'vwap', 'macd']])

    if action == 0:  # Buy
        if indicators_up &gt;= 3:
            position = 'long'
            entry_price = data.iloc[t]['close']
            stop_loss = entry_price * 0.8
            take_profit = entry_price * 1.3

    elif action == 1:  # Sell
        if indicators_down &gt;= 3:
            position = 'short'
            entry_price = data.iloc[t]['close']
            stop_loss = entry_price * 1.2
            take_profit = entry_price * 0.7

elif position == 'long':
    if data.iloc[t]['close'] &lt;= stop_loss or data.iloc[t]['close'] &gt;= take_profit:
        profit = data.iloc[t]['close'] - entry_price
        total_profit += profit
        position = None

elif position == 'short':
    if data.iloc[t]['close'] &gt;= stop_loss or data.iloc[t]['close'] &lt;= take_profit:
        profit = entry_price - data.iloc[t]['close']
        total_profit += profit
        position = None

model.remember(state, action, 0, next_state, False)
state = next_state
Enter fullscreen mode Exit fullscreen mode

return total_profit

Enter fullscreen mode Exit fullscreen mode




Initialize the DQN agent

agent = DQN(state_size, action_size)

Training loop

for episode in range(num_episodes):
data = load_data() # Load the data
total_profit = execute_strategy(data, agent)
agent.replay(batch_size)
print(f"Episode: {episode + 1}, Total Profit: {total_profit}")

Test the trained model

test_data = load_data() # Load the test data
test_total_profit = execute_strategy(test_data, agent)
print(f"Test Total Profit: {test_total_profit}")

Top comments (0)