Skip to content

Commit 720461b

Browse files
committed
completed alpha universe integration
1 parent a9d357e commit 720461b

2 files changed

Lines changed: 33 additions & 11 deletions

File tree

examples/openai_universe.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def main():
5454
parser.add_argument('-c', '--agent-config', help="Agent configuration file")
5555
parser.add_argument('-n', '--network-config', help="Network configuration file")
5656
parser.add_argument('-e', '--episodes', type=int, default=50000, help="Number of episodes")
57-
parser.add_argument('-t', '--max-timesteps', type=int, default=2000, help="Maximum number of timesteps per episode")
57+
parser.add_argument('-t', '--max-timesteps', type=int, default=2000*60, help="Maximum number of timesteps per episode")
5858
# parser.add_argument('-m', '--monitor', help="Save results to this directory")
5959
# parser.add_argument('-ms', '--monitor-safe', action='store_true', default=False, help="Do not overwrite previous results")
6060
# parser.add_argument('-mv', '--monitor-video', type=int, default=0, help="Save video every x steps (0 = disabled)")
@@ -90,6 +90,10 @@ def main():
9090
else:
9191
raise TensorForceError("Error: No network configuration provided.")
9292

93+
if args.debug:
94+
print("Configuration:")
95+
print(config)
96+
9397
logger = logging.getLogger(__name__)
9498
logger.setLevel(log_levels[config['loglevel']])
9599

tensorforce/environments/openai_universe.py

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,11 @@ def reset(self):
5757
state = self.env.reset()
5858
if state == [None]:
5959
state, r, t = self._wait_state(state, None, None)
60-
return state
60+
61+
if isinstance(state[0], dict):
62+
state[0].pop('text', None) # We can't handle string states right now, so omit the text state for now
63+
64+
return state[0]
6165

6266
def execute(self, action):
6367
"""
@@ -74,14 +78,26 @@ def _execute(self, action):
7478
pass_actions.append(key_event)
7579
elif action_name == 'button':
7680
btn_event = self._int_to_btn(value)
77-
x, y = action.get('position', (0, 0))
81+
x, y = self._int_to_pos(action.get('position', 0))
7882
pass_actions.append(universe.spaces.PointerEvent(x, y, btn_event))
7983

8084
state, reward, terminal, _ = self.env.step([pass_actions])
81-
print("Got new state")
82-
print("-"*32)
83-
print(state)
84-
return state[0], reward, terminal
85+
86+
if isinstance(state[0], dict):
87+
state[0].pop('text', None) # We can't handle string states right now, so omit the text state for now
88+
89+
return state[0], reward[0], terminal[0]
90+
91+
def _int_to_pos(self, flat_position):
92+
"""return x, y from flat_position integer
93+
94+
Args:
95+
flat_position: flattened position integer
96+
97+
Returns: x, y
98+
99+
"""
100+
return flat_position % self.env.action_space.screen_shape[0], flat_position % self.env.action_space.screen_shape[1]
85101

86102
def _key_to_int(self, key_event):
87103
return self.env.action_space.keys.index(key_event)
@@ -90,16 +106,17 @@ def _int_to_key(self, key_value):
90106
return self.env.action_space.keys[key_value]
91107

92108
def _btn_to_int(self, btn_event):
93-
self.env.action_space.buttonmasks.index(btn_event)
109+
return self.env.action_space.buttonmasks.index(btn_event)
94110

95111
def _int_to_btn(self, btn_value):
96-
self.env.action_space.buttonmasks[btn_value]
112+
return self.env.action_space.buttonmasks[btn_value]
97113

98114
def _wait_state(self, state, reward, terminal):
99115
"""Wait until there is a state
100116
"""
101117
while state == [None] or not state:
102118
state, reward, terminal = self._execute(dict(key=0))
119+
103120
return state, reward, terminal
104121

105122
def configure(self, *args, **kwargs):
@@ -113,8 +130,9 @@ def states(self):
113130
print(self.env.observation_space)
114131
if isinstance(self.env.observation_space, VNCObservationSpace):
115132
return dict(
116-
vision=dict(type=float, shape=(self.env.action_space.screen_shape[1], self.env.action_space.screen_shape[0], 3))
117-
#text=dict(type=int, shape=(1,))
133+
vision=dict(type=float, shape=(768, 1024, 3)) # VNCObeservationSpace seems to be hardcoded to 1024x768
134+
# vision = dict(type=float, shape=(self.env.action_space.screen_shape[1], self.env.action_space.screen_shape[0], 3))
135+
# text=dict(type=str, shape=(1,)) # TODO: implement string states
118136
)
119137
elif isinstance(self.env.observation_space, Discrete):
120138
return dict(shape=(), type='float')

0 commit comments

Comments
 (0)