From 143ebc713233f0e49fe4b771212edd05cf5bac7f Mon Sep 17 00:00:00 2001 From: Max Zuo Date: Mon, 2 Aug 2021 01:22:39 -0400 Subject: [PATCH] Faster Q value update for DDDQN - TF 2 Faster implementation for calculating q_target for training the DDDQN - in your video you mention the slow speed at which it runs. With this small change, it should run significantly faster. I've tested it using `np.array_equal` and produces the same results as the original method. --- .../DeepQLearning/dueling_ddqn_tf2.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/ReinforcementLearning/DeepQLearning/dueling_ddqn_tf2.py b/ReinforcementLearning/DeepQLearning/dueling_ddqn_tf2.py index ebc840f..bb54ef1 100644 --- a/ReinforcementLearning/DeepQLearning/dueling_ddqn_tf2.py +++ b/ReinforcementLearning/DeepQLearning/dueling_ddqn_tf2.py @@ -40,7 +40,7 @@ def __init__(self, max_size, input_shape): dtype=np.float32) self.action_memory = np.zeros(self.mem_size, dtype=np.int32) self.reward_memory = np.zeros(self.mem_size, dtype=np.float32) - self.terminal_memory = np.zeros(self.mem_size, dtype=np.bool) + self.terminal_memory = np.zeros(self.mem_size, dtype=bool) #np.bool deprecated def store_transition(self, state, action, reward, state_, done): index = self.mem_cntr % self.mem_size @@ -118,12 +118,10 @@ def learn(self): q_target = q_pred.numpy() max_actions = tf.math.argmax(self.q_eval(states_), axis=1) - # improve on my solution! - for idx, terminal in enumerate(dones): - #if terminal: - #q_next[idx] = 0.0 - q_target[idx, actions[idx]] = rewards[idx] + \ - self.gamma*q_next[idx, max_actions[idx]]*(1-int(dones[idx])) + # faster numpy implementation: + q_target[np.arange(self.batch_size),actions] = rewards + self.gamma * \ + q_next.numpy()[np.arange(self.batch_size),max_actions] * (1-dones) + self.q_eval.train_on_batch(states, q_target) self.epsilon = self.epsilon - self.eps_dec if self.epsilon > \