@@ -72,13 +72,13 @@ def get_action(self, state):
7272 return np .argmax (q_value [0 ])
7373
7474 # save sample <s,a,r,s'> to the replay memory
75- def replay_memory (self , state , action , reward , next_state , done ):
75+ def append_sample (self , state , action , reward , next_state , done ):
7676 self .memory .append ((state , action , reward , next_state , done ))
7777 if self .epsilon > self .epsilon_min :
7878 self .epsilon *= self .epsilon_decay
7979
8080 # pick samples randomly from replay memory (with batch_size)
81- def train_replay (self ):
81+ def train_model (self ):
8282 if len (self .memory ) < self .train_start :
8383 return
8484 batch_size = min (self .batch_size , len (self .memory ))
@@ -140,9 +140,9 @@ def train_replay(self):
140140 reward = reward if not done or score == 499 else - 100
141141
142142 # save the sample <s, a, r, s'> to the replay memory
143- agent .replay_memory (state , action , reward , next_state , done )
143+ agent .append_sample (state , action , reward , next_state , done )
144144 # every time step do the training
145- agent .train_replay ()
145+ agent .train_model ()
146146 score += reward
147147 state = next_state
148148
0 commit comments