|
| 1 | +# Copyright 2017 reinforce.io. All Rights Reserved. |
| 2 | +# |
| 3 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +# you may not use this file except in compliance with the License. |
| 5 | +# You may obtain a copy of the License at |
| 6 | +# |
| 7 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +# |
| 9 | +# Unless required by applicable law or agreed to in writing, software |
| 10 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +# See the License for the specific language governing permissions and |
| 13 | +# limitations under the License. |
| 14 | +# ============================================================================== |
| 15 | + |
| 16 | +from __future__ import absolute_import |
| 17 | +from __future__ import print_function |
| 18 | +from __future__ import division |
| 19 | + |
| 20 | + |
| 21 | +class BaseRunner(object): |
| 22 | + """ |
| 23 | + Base class for all runner classes. |
| 24 | + Implements the `run` method. |
| 25 | + """ |
| 26 | + def __init__(self, agent, environment, repeat_actions=1, history=None): |
| 27 | + """ |
| 28 | + Args: |
| 29 | + agent (Agent): Agent object (or list of Agent objects) to use for the run. |
| 30 | + environment (Environment): Environment object (or list of Environment objects) to use for the run. |
| 31 | + repeat_actions (int): How many times the same given action will be repeated in subsequent calls to |
| 32 | + Environment's `execute` method. Rewards collected in these calls are accumulated and reported |
| 33 | + as a sum in the following call to Agent's `observe` method. |
| 34 | + history (dict): A dictionary containing an already run experiment's results. Keys should be: |
| 35 | + episode_rewards (list of rewards), episode_timesteps (lengths of episodes), episode_times (run-times) |
| 36 | + """ |
| 37 | + self.agent = agent |
| 38 | + self.environment = environment |
| 39 | + self.repeat_actions = repeat_actions |
| 40 | + |
| 41 | + self.global_episode = None # the global episode number (across all (parallel) agents) |
| 42 | + self.global_timestep = None # the global time step (across all (parallel) agents) |
| 43 | + |
| 44 | + self.start_time = None # TODO: is this necessary here? global start time (episode?, overall?) |
| 45 | + |
| 46 | + # lists of episode data (rewards, wall-times/timesteps) |
| 47 | + self.episode_rewards = None # list of accumulated episode rewards |
| 48 | + self.episode_timesteps = None # list of total timesteps taken in the episodes |
| 49 | + self.episode_times = None # list of durations for the episodes |
| 50 | + |
| 51 | + self.reset(history) |
| 52 | + |
| 53 | + def reset(self, history=None): |
| 54 | + """ |
| 55 | + Resets the Runner's internal stats counters. |
| 56 | + If history is empty, use default values in history.get(). |
| 57 | +
|
| 58 | + Args: |
| 59 | + history (dict): A dictionary containing an already run experiment's results. Keys should be: |
| 60 | + episode_rewards (list of rewards), episode_timesteps (lengths of episodes), episode_times (run-times) |
| 61 | + """ |
| 62 | + if not history: |
| 63 | + history = dict() |
| 64 | + |
| 65 | + self.episode_rewards = history.get("episode_rewards", list()) |
| 66 | + self.episode_timesteps = history.get("episode_timesteps", list()) |
| 67 | + self.episode_times = history.get("episode_times", list()) |
| 68 | + |
| 69 | + def close(self): |
| 70 | + """ |
| 71 | + Should perform clean up operations on Runner's Agent(s) and Environment(s). |
| 72 | + """ |
| 73 | + raise NotImplementedError |
| 74 | + |
| 75 | + def run(self, num_episodes, num_timesteps, max_episode_timesteps, deterministic, episode_finished, summary_report, |
| 76 | + summary_interval): |
| 77 | + """ |
| 78 | + Executes this runner by starting to act (via Agent(s)) in the given Environment(s). |
| 79 | + Stops execution according to certain conditions (e.g. max. number of episodes, etc..). |
| 80 | + Calls callback functions after each episode and/or after some summary criteria are met. |
| 81 | +
|
| 82 | + Args: |
| 83 | + num_episodes (int): Max. number of episodes to run globally in total (across all threads/workers). |
| 84 | + num_timesteps (int): Max. number of time steps to run globally in total (across all threads/workers) |
| 85 | + max_episode_timesteps (int): Max. number of timesteps per episode. |
| 86 | + deterministic (bool): Whether to use exploration when selecting actions. |
| 87 | + episode_finished (callable): A function to be called once an episodes has finished. Should take |
| 88 | + a BaseRunner object and some worker ID (e.g. thread-ID or task-ID). Can decide for itself |
| 89 | + every how many episodes it should report something and what to report. |
| 90 | + summary_report (callable): Deprecated; Function that could produce a summary over the training |
| 91 | + progress so far. |
| 92 | + summary_interval (int): Deprecated; The number of time steps to execute (globally) |
| 93 | + before summary_report is called. |
| 94 | + """ |
| 95 | + raise NotImplementedError |
| 96 | + |
| 97 | + # keep backwards compatibility |
| 98 | + @property |
| 99 | + def episode(self): |
| 100 | + """ |
| 101 | + Deprecated property `episode` -> global_episode. |
| 102 | + """ |
| 103 | + return self.global_episode |
| 104 | + |
| 105 | + @property |
| 106 | + def timestep(self): |
| 107 | + """ |
| 108 | + Deprecated property `timestep` -> global_timestep. |
| 109 | + """ |
| 110 | + return self.global_timestep |
| 111 | + |
| 112 | + |
0 commit comments