Updating test configs for multi-pass.

michaelschaarschmidt · michaelschaarschmidt · commit d240395986e2 · 2017-12-23T10:39:45.000Z
diff --git a/README.md b/README.md
@@ -51,7 +51,7 @@ Features
 --------
 
 TensorForce currently integrates with the OpenAI Gym API, OpenAI
-Universe, DeepMind lab, ALE and Maze explorer. The following algorithms are available (all
+Universe, the Unreal Engine (game engine), DeepMind lab, ALE and Maze explorer. The following algorithms are available (all
 policy methods both continuous/discrete and using a Beta distribution for bounded actions). 
 
 -  A3C using distributed TensorFlow or a multithreaded runner - now as part of our generic Model
diff --git a/tensorforce/meta_parameter_recorder.py b/tensorforce/meta_parameter_recorder.py
@@ -104,7 +104,7 @@ def convert_dictionary_to_string(self, data, indent=0, format_type=0, separator=
         if separator is None:
             separator = ", "
         
-        #This should not ever occur but here as a catch
+        # This should not ever occur but here as a catch
         if type(data) is not dict:
             raise TensorForceError(
                 "Error:  MetaParameterRecorder Dictionary conversion was passed a type {}"
@@ -140,7 +140,7 @@ def convert_list_to_string(self, data, indent=0, format_type=0, eol=None, count=
         if eol is None:
             eol = os.linesep
 
-        #This should not ever occur but here as a catch
+        # This should not ever occur but here as a catch
         if type(data) is not list:
             raise TensorForceError(
                 "Error:  MetaParameterRecorder List conversion was passed a type {}"
@@ -171,7 +171,7 @@ def convert_ndarray_to_md(self, data, format_type=0, eol=None):
         if eol is None:
             eol = os.linesep
 
-        #This should not ever occur but here as a catch
+        # This should not ever occur but here as a catch
         if type(data) is not np.ndarray: 
             raise TensorForceError(
                 "Error:  MetaParameterRecorder ndarray conversion was passed"
@@ -254,7 +254,7 @@ def build_metagraph_list(self):
 
         self.ignore_unknown_dtypes = True
         for key in sorted(self.meta_params):
-            value=self.convert_data_to_string(self.meta_params[key])
+            value = self.convert_data_to_string(self.meta_params[key])
 
             if len(value) == 0:
                 continue
diff --git a/tensorforce/tests/base_agent_test.py b/tensorforce/tests/base_agent_test.py
@@ -193,7 +193,7 @@ def tf_apply(self, x, internals, update, return_internals=False):
             name='multi',
             environment=environment,
             network_spec=CustomNetwork,
-            **self.__class__.kwargs
+            **self.__class__.multi_kwargs
         )
 
     def test_lstm(self):
diff --git a/tensorforce/tests/test_dqfd_agent.py b/tensorforce/tests/test_dqfd_agent.py
@@ -107,3 +107,20 @@ def pre_run(self, agent, environment):
 
         agent.import_demonstrations(demonstrations)
         agent.pretrain(steps=1000)
+
+    multi_kwargs = dict(
+        memory=dict(
+            type='replay',
+            capacity=1000
+        ),
+        optimizer=dict(
+            type="adam",
+            learning_rate=0.01
+        ),
+        repeat_update=1,
+        batch_size=16,
+        first_update=16,
+        target_sync_frequency=10,
+        demo_memory_capacity=100,
+        demo_sampling_ratio=0.2
+    )
diff --git a/tensorforce/tests/test_dqn_agent.py b/tensorforce/tests/test_dqn_agent.py
@@ -38,7 +38,7 @@ class TestDQNAgent(BaseAgentTest, unittest.TestCase):
             learning_rate=0.002
         ),
         # Comment in to test exploration types
-        # exploration=dict(
+        # explorations_spec=dict(
         #     type="epsilon_decay",
         #     initial_epsilon=1.0,
         #     final_epsilon=0.1,
@@ -58,3 +58,18 @@ class TestDQNAgent(BaseAgentTest, unittest.TestCase):
 
     exclude_float = True
     exclude_bounded = True
+
+    multi_kwargs = dict(
+        memory=dict(
+            type='replay',
+            capacity=1000
+        ),
+        optimizer=dict(
+            type="adam",
+            learning_rate=0.01
+        ),
+        repeat_update=1,
+        batch_size=16,
+        first_update=16,
+        target_sync_frequency=10
+    )
diff --git a/tensorforce/tests/test_dqn_memories.py b/tensorforce/tests/test_dqn_memories.py
@@ -40,8 +40,9 @@ def test_replay(self):
                 type='replay',
                 capacity=1000
             ),
-            batch_size=8,
-            first_update=10,
+            repeat_update=4,
+            batch_size=32,
+            first_update=64,
             target_sync_frequency=10
         )
 
@@ -64,8 +65,9 @@ def test_prioritized_replay(self):
                 type='prioritized_replay',
                 capacity=1000
             ),
-            batch_size=8,
-            first_update=10,
+            repeat_update=4,
+            batch_size=32,
+            first_update=64,
             target_sync_frequency=10
         )
 
@@ -87,8 +89,9 @@ def test_naive_prioritized_replay(self):
                 type='naive_prioritized_replay',
                 capacity=1000
             ),
-            batch_size=8,
-            first_update=10,
+            repeat_update=4,
+            batch_size=32,
+            first_update=64,
             target_sync_frequency=10
         )
 
diff --git a/tensorforce/tests/test_dqn_nstep_agent.py b/tensorforce/tests/test_dqn_nstep_agent.py
@@ -23,7 +23,6 @@
 from tensorforce.agents import DQNNstepAgent
 
 
-
 class TestDQNNstepAgent(BaseAgentTest, unittest.TestCase):
 
     agent = DQNNstepAgent
@@ -33,9 +32,10 @@ class TestDQNNstepAgent(BaseAgentTest, unittest.TestCase):
         batch_size=8,
         optimizer=dict(
             type='adam',
-            learning_rate=1e-2
+            learning_rate=0.01
         )
     )
 
     exclude_float = True
     exclude_bounded = True
+    exclude_multi = True
diff --git a/tensorforce/tests/test_ppo_agent.py b/tensorforce/tests/test_ppo_agent.py
@@ -30,3 +30,11 @@ class TestPPOAgent(BaseAgentTest, unittest.TestCase):
     kwargs = dict(
         batch_size=8
     )
+
+    multi_kwargs = dict(
+        batch_size=32,
+        step_optimizer=dict(
+            type='adam',
+            learning_rate=0.001
+        )
+    )
diff --git a/tensorforce/tests/test_trpo_agent.py b/tensorforce/tests/test_trpo_agent.py
@@ -31,3 +31,8 @@ class TestTRPOAgent(BaseAgentTest, unittest.TestCase):
     kwargs = dict(
         batch_size=8
     )
+
+    multi_kwargs = dict(
+        batch_size=64,
+        learning_rate=0.1
+    )
diff --git a/tensorforce/tests/test_vpg_agent.py b/tensorforce/tests/test_vpg_agent.py
@@ -30,3 +30,12 @@ class TestVPGAgent(BaseAgentTest, unittest.TestCase):
     kwargs = dict(
         batch_size=8
     )
+
+    multi_kwargs = dict(
+        batch_size=64,
+        optimizer=dict(
+            type='adam',
+            learning_rate=0.01
+        )
+    )
+

Original file line number	Diff line number	Diff line change
`@@ -193,7 +193,7 @@ def tf_apply(self, x, internals, update, return_internals=False):`
`193`	`193`	`name='multi',`
`194`	`194`	`environment=environment,`
`195`	`195`	`network_spec=CustomNetwork,`
`196`		`- **self.__class__.kwargs`
	`196`	`+ **self.__class__.multi_kwargs`
`197`	`197`	`)`
`198`	`198`
`199`	`199`	`def test_lstm(self):`
Original file line number	Diff line number	Diff line change
`@@ -31,3 +31,8 @@ class TestTRPOAgent(BaseAgentTest, unittest.TestCase):`
`31`	`31`	`kwargs = dict(`
`32`	`32`	`batch_size=8`
`33`	`33`	`)`
	`34`	`+`
	`35`	`+ multi_kwargs = dict(`
	`36`	`+ batch_size=64,`
	`37`	`+ learning_rate=0.1`
	`38`	`+ )`