Skip to content

Commit e292c70

Browse files
author
Alexander Kuhnle
committed
Removed Optimizer being a subclass of TensorFlow optimizer class, few fixes
1 parent 1a28dbd commit e292c70

7 files changed

Lines changed: 36 additions & 123 deletions

File tree

tensorforce/contrib/ale.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,12 +88,12 @@ def reset(self):
8888
self.gamescreen = np.empty(self.gamescreen.shape, dtype=np.uint8)
8989
return self.current_state
9090

91-
def execute(self, action):
91+
def execute(self, actions):
9292
# convert action to ale action
93-
ale_action = self.action_inds[action]
93+
ale_actions = self.action_inds[actions]
9494

9595
# get reward and process terminal & next state
96-
rew = self.ale.act(ale_action)
96+
rew = self.ale.act(ale_actions)
9797
if self.loss_of_life_termination or self.loss_of_life_reward != 0:
9898
new_lives = self.ale.lives()
9999
if new_lives < self.cur_lives:

tensorforce/contrib/deepmind_lab.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -85,22 +85,22 @@ def reset(self):
8585
self.level.reset() # optional: episode=-1, seed=None
8686
return self.level.observations()[self.state_attribute]
8787

88-
def execute(self, action):
88+
def execute(self, actions):
8989
"""
9090
Pass action to universe environment, return reward, next step, terminal state and additional info.
9191
9292
:param action: action to execute as numpy array, should have dtype np.intc and should adhere to the specification given in DeepMindLabEnvironment.action_spec(level_id)
9393
:return: dict containing the next state, the reward, and a boolean indicating if the next state is a terminal state
9494
"""
95-
actions = list()
95+
adjusted_actions = list()
9696
for action_spec in self.level.action_spec():
9797
if action_spec['min'] == -1 and action_spec['max'] == 1:
98-
actions.append(action[action_spec['name']] - 1)
98+
adjusted_actions.append(actions[action_spec['name']] - 1)
9999
else:
100-
actions.append(action[action_spec['name']]) # clip?
101-
action = np.array(actions, dtype=np.intc)
100+
adjusted_actions.append(actions[action_spec['name']]) # clip?
101+
actions = np.array(adjusted_actions, dtype=np.intc)
102102

103-
reward = self.level.step(action=action, num_steps=self.repeat_action)
103+
reward = self.level.step(action=actions, num_steps=self.repeat_action)
104104
state = self.level.observations()['RGB_INTERLACED']
105105
terminal = not self.level.is_running()
106106
return state, terminal, reward

tensorforce/contrib/maze_explorer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,8 @@ def reset(self):
5050
# TODO: Reset to `ones`?
5151
return self.engine.reset()
5252

53-
def execute(self, action):
54-
state, reward, terminal, _ = self.engine.act(action)
53+
def execute(self, actions):
54+
state, reward, terminal, _ = self.engine.act(actions)
5555
return state, terminal, reward
5656

5757
@property

tensorforce/contrib/openai_gym.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,8 @@ def close(self):
6060
def reset(self):
6161
return self.gym.reset()
6262

63-
def execute(self, action):
64-
state, reward, terminal, _ = self.gym.step(action)
63+
def execute(self, actions):
64+
state, reward, terminal, _ = self.gym.step(actions)
6565
return state, terminal, reward
6666

6767
@property

tensorforce/contrib/openai_universe.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -58,25 +58,25 @@ def reset(self):
5858

5959
return state[0]
6060

61-
def execute(self, action):
62-
state, terminal, reward = self._execute(action)
61+
def execute(self, actions):
62+
state, terminal, reward = self._execute(actions)
6363
return self._wait_state(state, terminal, reward)
6464

65-
def _execute(self, action):
65+
def _execute(self, actions):
6666
pass_actions = []
67-
for action_name, value in action.items():
67+
for action_name, value in actions.items():
6868
if action_name == 'key':
6969
key_event = self._int_to_key(value)
7070
pass_actions.append(key_event)
7171
elif action_name == 'button':
7272
btn_event = self._int_to_btn(value)
73-
x, y = self._int_to_pos(action.get('position', 0))
73+
x, y = self._int_to_pos(actions.get('position', 0))
7474
pass_actions.append(universe.spaces.PointerEvent(x, y, btn_event))
7575

7676
state, reward, terminal, _ = self.env.step([pass_actions])
7777

7878
if isinstance(state[0], dict):
79-
state[0].pop('text', None) # We can't handle string states right now, so omit the text state for now
79+
state[0].pop('text', None) # We can't handle string states right now, so omit the text state for now
8080

8181
return state[0], terminal[0], reward[0]
8282

tensorforce/core/optimizers/optimizer.py

Lines changed: 13 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
import tensorforce.core.optimizers
2424

2525

26-
class Optimizer(tf.train.GradientDescentOptimizer):
26+
class Optimizer(object):
2727
"""
2828
Generic TensorFlow optimizer which minimizes a not yet further specified expression, usually
2929
some kind of loss function. More generally, an optimizer can be considered as some method of
@@ -34,10 +34,6 @@ def __init__(self):
3434
"""
3535
Creates a new optimizer instance.
3636
"""
37-
super(Optimizer, self).__init__(self._learning_rate, use_locking=False, name='TensorForceOptimizer')
38-
39-
self._learning_rate = -1.0
40-
4137
self.variables = dict()
4238

4339
def custom_getter(getter, name, registered=False, **kwargs):
@@ -109,100 +105,17 @@ def from_spec(spec, kwargs=None):
109105
assert isinstance(optimizer, Optimizer)
110106
return optimizer
111107

112-
# modified minimize
113-
def apply_step(
114-
self,
115-
variables,
116-
deltas,
117-
global_step=None,
118-
gate_gradients=None,
119-
aggregation_method=None,
120-
colocate_gradients_with_ops=False,
121-
name=None,
122-
grad_loss=None
123-
):
124-
125-
deltas_and_vars = self.compute_deltas(
126-
deltas=deltas,
127-
var_list=variables,
128-
gate_gradients=gate_gradients,
129-
aggregation_method=aggregation_method,
130-
colocate_gradients_with_ops=colocate_gradients_with_ops,
131-
grad_loss=grad_loss
132-
)
133-
134-
vars_with_delta = [v for g, v in deltas_and_vars if g is not None]
135-
if not vars_with_delta:
136-
raise TensorForceError(
137-
"No gradients provided for any variable, check your graph for ops that do not "
138-
"support gradients, between variables {} and loss {}".format(
139-
[str(v) for _, v in deltas_and_vars], deltas
140-
)
141-
)
142-
143-
return super(Optimizer, self).apply_gradients(deltas_and_vars, global_step=global_step, name=name)
144-
145-
def compute_gradients(self, *args, **kwargs):
146-
raise NotImplementedError
147-
148-
def apply_gradients(self, *args, **kwargs):
149-
raise NotImplementedError
150-
151-
# Modified compute_gradients
152-
def compute_deltas(
153-
self,
154-
deltas,
155-
var_list=None,
156-
gate_gradients=None,
157-
aggregation_method=None,
158-
colocate_gradients_with_ops=False,
159-
grad_loss=None
160-
):
161-
if aggregation_method is not None or colocate_gradients_with_ops or grad_loss is not None:
162-
raise TensorForceError("'aggregation_method', colocate_gradients_with_ops' and 'grad_loss' arguments are not supported.")
163-
if gate_gradients is None:
164-
gate_gradients = Optimizer.GATE_OP
165-
if gate_gradients not in (Optimizer.GATE_NONE, Optimizer.GATE_OP, Optimizer.GATE_GRAPH):
166-
raise TensorForceError("'gate_gradients' must be one of: Optimizer.GATE_NONE, Optimizer.GATE_OP, Optimizer.GATE_GRAPH. Not {}".format(gate_gradients))
167-
# if isinstance(loss, tf.Tensor):
168-
# self._assert_valid_dtypes([loss])
169-
# else:
170-
# self._assert_valid_dtypes(loss)
171-
# if var_list is None:
172-
# var_list = tf.trainable_variables() + tf.get_collection(tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES)
173-
# else:
174-
# var_list = tf.python.util.nest.flatten(var_list)
175-
var_list += tf.get_collection(tf.GraphKeys._STREAMING_MODEL_PORTS)
176-
if not var_list:
177-
raise TensorForceError("No variables to optimize.")
178-
# processors = [tf.train.Optimizer._get_processor(v) for v in var_list]
179-
# var_refs = [p.target() for p in processors]
180-
# grads = gradients.gradients(loss, var_refs, grad_ys=grad_loss, gate_gradients=(gate_gradients == Optimizer.GATE_OP), aggregation_method=aggregation_method, colocate_gradients_with_ops=colocate_gradients_with_ops)
181-
182-
if gate_gradients == Optimizer.GATE_GRAPH:
183-
deltas = tf.tuple(deltas)
184-
deltas_and_vars = list(zip(deltas, var_list))
185-
self._assert_valid_dtypes([v for g, v in deltas_and_vars if g is not None and v.dtype != tf.resource])
186-
return deltas_and_vars
187-
188-
# Below, we just pass through tf optimizers
189-
def _prepare(self):
190-
return tf.train.GradientDescentOptimizer._prepare(self=self)
191-
192-
def _apply_dense(self, grad, var):
193-
return tf.train.GradientDescentOptimizer._apply_dense(self=self, grad=grad, var=var)
194-
195-
def _apply_sparse_duplicate_indices(self, grad, var):
196-
return tf.train.GradientDescentOptimizer._apply_sparse_duplicate_indices(
197-
self=self, grad=grad, var=var
198-
)
108+
def apply_step(self, variables, deltas):
109+
"""
110+
Applies step deltas to variable values.
199111
200-
def _resource_apply_dense(self, grad, handle):
201-
return tf.train.GradientDescentOptimizer._resource_apply_dense(
202-
self=self, grad=grad, handle=handle
203-
)
112+
Args:
113+
variables: List of variables.
114+
deltas: List of deltas of same length.
204115
205-
def _resource_apply_sparse_duplicate_indices(self, grad, handle, indices):
206-
return tf.train.GradientDescentOptimizer._resource_apply_sparse_duplicate_indices(
207-
self=self, grad=grad, handle=handle
208-
)
116+
Returns:
117+
The step-applied operation.
118+
"""
119+
if len(variables) != len(deltas):
120+
raise TensorForceError("Invalid variables and deltas lists.")
121+
return tf.group(*(variable.assign_add(delta=delta) for variable, delta in zip(variables, deltas)))

tensorforce/environments/minimal_test.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,18 +59,18 @@ def reset(self):
5959

6060
def execute(self, actions):
6161
if self.single_state_action:
62-
action = (action,)
62+
actions = (actions,)
6363
else:
64-
action = tuple(action[name] for name in sorted(action))
64+
actions = tuple(actions[name] for name in sorted(actions))
6565

6666
reward = 0.0
6767
for n, (action_type, shape) in enumerate(self.specification):
6868
if action_type == 'bool' or action_type == 'int':
69-
correct = np.sum(action[n])
69+
correct = np.sum(actions[n])
7070
overall = util.prod(shape)
7171
self.state[n] = ((overall - correct) / overall, correct / overall)
7272
elif action_type == 'float' or action_type == 'bounded-float':
73-
step = np.sum(action[n]) / util.prod(shape)
73+
step = np.sum(actions[n]) / util.prod(shape)
7474
self.state[n] = max(self.state[n][0] - step, 0.0), min(self.state[n][1] + step, 1.0)
7575
reward += max(min(self.state[n][1], 1.0), 0.0)
7676

0 commit comments

Comments
 (0)