pythonAI
diff --git a/‎examples/threaded_ale.py‎
Lines changed: 3 additions & 3 deletions b/‎examples/threaded_ale.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎examples/unreal_engine.py‎
Lines changed: 2 additions & 2 deletions b/‎examples/unreal_engine.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎tensorforce/agents/agent.py‎
Lines changed: 1 addition & 1 deletion b/‎tensorforce/agents/agent.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tensorforce/contrib/unreal_engine.py‎
Lines changed: 3 additions & 1 deletion b/‎tensorforce/contrib/unreal_engine.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎tensorforce/core/preprocessing/grayscale.py‎
Lines changed: 1 addition & 1 deletion b/‎tensorforce/core/preprocessing/grayscale.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tensorforce/environments/__init__.py‎
Lines changed: 7 additions & 1 deletion b/‎tensorforce/environments/__init__.py‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎tensorforce/environments/environment.py‎
Lines changed: 16 additions & 0 deletions b/‎tensorforce/environments/environment.py‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎tensorforce/execution/__init__.py‎
Lines changed: 4 additions & 3 deletions b/‎tensorforce/execution/__init__.py‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎tensorforce/execution/base_runner.py‎
Lines changed: 112 additions & 0 deletions b/‎tensorforce/execution/base_runner.py‎
Lines changed: 112 additions & 0 deletions
@@ -175,18 +175,18 @@ def summary_report(r):
 
     # Create runners
     threaded_runner = ThreadedRunner(
-        agents, environments,
+        agents,
+        environments,
         repeat_actions=1,
         save_path=args.save,
         save_episodes=args.save_episodes
     )
 
     logger.info("Starting {agent} for Environment '{env}'".format(agent=agent, env=environments[0]))
     threaded_runner.run(summary_interval=100, episode_finished=episode_finished, summary_report=summary_report)
+    threaded_runner.close()
     logger.info("Learning finished. Total episodes: {ep}".format(ep=threaded_runner.global_episode))
 
-    [environments[t].close() for t in range(args.workers)]
-
 
 if __name__ == '__main__':
     main()
@@ -65,7 +65,7 @@ def main():
 
     args = parser.parse_args()
 
-    #logging.basicConfig(filename="logfile.txt", level=logging.INFO)
+    # logging.basicConfig(filename="logfile.txt", level=logging.INFO)
     logging.basicConfig(stream=sys.stderr)
     logger = logging.getLogger(__name__)
     logger.setLevel(logging.DEBUG)
@@ -83,7 +83,7 @@ def main():
     if args.random_test_run:
         # Reset the env.
         s = environment.reset()
-        img = Image.fromarray(s, "RGB")
+        img = Image.fromarray(s, "RGB" if len(environment.states["shape"]) == 3 else "L")
         # Save first received image as a sanity-check.
         img.save("reset.png")
         for i in range(1000):
 
@@ -110,7 +110,7 @@ def initialize_model(self):
     def reset(self):
         """
         Reset the agent to its initial state (e.g. on experiment start). Updates the Model's internal episode and
-        timestep counter, internal states, and resets preprocessors.
+        time step counter, internal states, and resets preprocessors.
         """
         self.episode, self.timestep, self.next_internals = self.model.reset()
         self.current_internals = self.next_internals
 
@@ -28,7 +28,7 @@ class UE4Environment(RemoteEnvironment, StateSettableEnvironment):
     """
     A special RemoteEnvironment for UE4 game connections.
     Communicates with the remote to receive information on the definitions of action- and observation spaces.
-    Sends UE4 Action- and Axis-mappings as RL-actions and receives observations back defined by ducandu plugin Observer
+    Sends UE4 Action- and Axis-mappings as RL-actions and receives observations back defined by MLObserver
     objects placed in the Game
     (these could be camera pixels or other observations, e.g. a x/y/z position of some game actor).
     """
@@ -86,6 +86,8 @@ def connect(self):
             raise TensorForceError("ERROR in UE4Environment.connect: no observation- or action-space-desc sent "
                                    "by remote server!")
 
+        # Game's name
+        self.game_name = response.get("game_name")  # keep non-mandatory for now
         # Observers
         self.observation_space_desc = response["observation_space_desc"]
         # Action-mappings
 
@@ -34,7 +34,7 @@ def __init__(self, weights=(0.299, 0.587, 0.114), scope='grayscale', summary_lab
 
     def tf_process(self, tensor):
         weights = tf.reshape(tensor=self.weights, shape=(tuple(1 for _ in range(util.rank(tensor) - 1)) + (3,)))
-        return tf.reduce_sum(input_tensor=(weights * tensor), axis=-1, keep_dims=True)
+        return tf.reduce_sum(input_tensor=(weights * tensor), axis=-1, keepdims=True)
 
     def processed_shape(self, shape):
         return tuple(shape[:-1]) + (1,)
@@ -15,5 +15,11 @@
 
 
 from tensorforce.environments.environment import Environment
+from tensorforce.environments.minimal_test import MinimalTest
 
-__all__ = ['Environment']
+
+environments = dict(
+    minimal_test=MinimalTest,
+)
+
+__all__ = ['Environment', 'MinimalTest']
@@ -18,6 +18,9 @@
 from __future__ import print_function
 from __future__ import division
 
+import tensorforce.environments
+import tensorforce.util
+
 
 class Environment(object):
     """
@@ -84,3 +87,16 @@ def actions(self):
 
         """
         raise NotImplementedError
+
+    @staticmethod
+    def from_spec(spec,     kwargs):
+        """
+        Creates an environment from a specification dict.
+        """
+        env = tensorforce.util.get_object(
+            obj=spec,
+            predefined_objects=tensorforce.environments.environments,
+            kwargs=kwargs
+        )
+        assert isinstance(env, Environment)
+        return env
@@ -13,7 +13,8 @@
 # limitations under the License.
 # ==============================================================================
 
-from tensorforce.execution.runner import Runner
-from tensorforce.execution.threaded_runner import ThreadedRunner
+from tensorforce.execution.base_runner import BaseRunner
+from tensorforce.execution.runner import Runner, SingleRunner, DistributedTFRunner
+from tensorforce.execution.threaded_runner import ThreadedRunner, WorkerAgentGenerator
 
-__all__ = ['Runner', 'ThreadedRunner']
+__all__ = ['BaseRunner', 'SingleRunner', 'DistributedTFRunner', 'Runner', 'ThreadedRunner', 'WorkerAgentGenerator']
@@ -0,0 +1,112 @@
+# Copyright 2017 reinforce.io. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import division
+
+
+class BaseRunner(object):
+    """
+    Base class for all runner classes.
+    Implements the `run` method.
+    """
+    def __init__(self, agent, environment, repeat_actions=1, history=None):
+        """
+        Args:
+            agent (Agent): Agent object (or list of Agent objects) to use for the run.
+            environment (Environment): Environment object (or list of Environment objects) to use for the run.
+            repeat_actions (int): How many times the same given action will be repeated in subsequent calls to
+                Environment's `execute` method. Rewards collected in these calls are accumulated and reported
+                as a sum in the following call to Agent's `observe` method.
+            history (dict): A dictionary containing an already run experiment's results. Keys should be:
+                episode_rewards (list of rewards), episode_timesteps (lengths of episodes), episode_times (run-times)
+        """
+        self.agent = agent
+        self.environment = environment
+        self.repeat_actions = repeat_actions
+
+        self.global_episode = None  # the global episode number (across all (parallel) agents)
+        self.global_timestep = None  # the global time step (across all (parallel) agents)
+
+        self.start_time = None  # TODO: is this necessary here? global start time (episode?, overall?)
+
+        # lists of episode data (rewards, wall-times/timesteps)
+        self.episode_rewards = None  # list of accumulated episode rewards
+        self.episode_timesteps = None  # list of total timesteps taken in the episodes
+        self.episode_times = None  # list of durations for the episodes
+
+        self.reset(history)
+
+    def reset(self, history=None):
+        """
+        Resets the Runner's internal stats counters.
+        If history is empty, use default values in history.get().
+
+        Args:
+            history (dict): A dictionary containing an already run experiment's results. Keys should be:
+                episode_rewards (list of rewards), episode_timesteps (lengths of episodes), episode_times (run-times)
+        """
+        if not history:
+            history = dict()
+
+        self.episode_rewards = history.get("episode_rewards", list())
+        self.episode_timesteps = history.get("episode_timesteps", list())
+        self.episode_times = history.get("episode_times", list())
+
+    def close(self):
+        """
+        Should perform clean up operations on Runner's Agent(s) and Environment(s).
+        """
+        raise NotImplementedError
+
+    def run(self, num_episodes, num_timesteps, max_episode_timesteps, deterministic, episode_finished, summary_report,
+            summary_interval):
+        """
+        Executes this runner by starting to act (via Agent(s)) in the given Environment(s).
+        Stops execution according to certain conditions (e.g. max. number of episodes, etc..).
+        Calls callback functions after each episode and/or after some summary criteria are met.
+
+        Args:
+            num_episodes (int): Max. number of episodes to run globally in total (across all threads/workers).
+            num_timesteps (int): Max. number of time steps to run globally in total (across all threads/workers)
+            max_episode_timesteps (int): Max. number of timesteps per episode.
+            deterministic (bool): Whether to use exploration when selecting actions.
+            episode_finished (callable): A function to be called once an episodes has finished. Should take
+                a BaseRunner object and some worker ID (e.g. thread-ID or task-ID). Can decide for itself
+                every how many episodes it should report something and what to report.
+            summary_report (callable): Deprecated; Function that could produce a summary over the training
+                progress so far.
+            summary_interval (int): Deprecated; The number of time steps to execute (globally)
+                before summary_report is called.
+        """
+        raise NotImplementedError
+
+    # keep backwards compatibility
+    @property
+    def episode(self):
+        """
+        Deprecated property `episode` -> global_episode.
+        """
+        return self.global_episode
+
+    @property
+    def timestep(self):
+        """
+        Deprecated property `timestep` -> global_timestep.
+        """
+        return self.global_timestep
+
+