Skip to content

Commit 02fb71f

Browse files
author
Alexander Kuhnle
committed
NAF added, regularization loss handling improved, various fixes and formatting
1 parent 7020bd6 commit 02fb71f

33 files changed

Lines changed: 708 additions & 364 deletions

tensorforce/agents/__init__.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@
2323
from tensorforce.agents.ppo_agent import PPOAgent
2424
from tensorforce.agents.dqn_agent import DQNAgent
2525
from tensorforce.agents.dqn_nstep_agent import DQNNstepAgent
26+
from tensorforce.agents.naf_agent import NAFAgent
2627
from tensorforce.agents.dqfd_agent import DQFDAgent
27-
# from tensorforce.agents.naf_agent import NAFAgent
2828
# from tensorforce.agents.categorical_dqn_agent import CategoricalDQNAgent
2929

3030
agents = dict(
@@ -35,8 +35,8 @@
3535
ppo_agent=PPOAgent,
3636
dqn_agent=DQNAgent,
3737
dqn_nstep_agent=DQNNstepAgent,
38-
# naf_agent=NAFAgent,
39-
dqfd_agent=DQFDAgent,
38+
naf_agent=NAFAgent,
39+
dqfd_agent=DQFDAgent
4040
# PPOAgent=PPOAgent,
4141
# CategoricalDQNAgent=CategoricalDQNAgent,
4242
)
@@ -51,7 +51,8 @@
5151
'TRPOAgent',
5252
'PPOAgent',
5353
'DQNAgent',
54-
'DQFDAgent',
5554
'DQNNstepAgent',
55+
'DQFDAgent',
56+
'NAFAgent',
5657
'agents'
5758
]

tensorforce/agents/dqfd_agent.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
from tensorforce.agents import MemoryAgent
2323
from tensorforce.core.memories import Replay
24-
from tensorforce.models import DQFDModel
24+
from tensorforce.models import QDemoModel
2525

2626

2727
class DQFDAgent(MemoryAgent):
@@ -126,12 +126,16 @@ def __init__(self, states_spec, actions_spec, network_spec, config):
126126
'demo_batch_size is positive. (Calculated {} based on current' \
127127
' parameters)'.format(self.demo_batch_size)
128128

129-
super(DQFDAgent, self).__init__(states_spec, actions_spec, config)
130-
131129
# This is the demonstration memory that we will fill with observations before starting
132130
# the main training loop
133131
self.demo_memory = Replay(self.demo_memory_capacity, self.states_spec, self.actions_spec)
134132

133+
super(DQFDAgent, self).__init__(
134+
states_spec=states_spec,
135+
actions_spec=actions_spec,
136+
config=config
137+
)
138+
135139
def observe(self, reward, terminal):
136140
"""
137141
Adds observations, updates via sampling from memories according to update rate.
@@ -194,7 +198,7 @@ def set_demonstrations(self, batch):
194198
)
195199

196200
def initialize_model(self, states_spec, actions_spec, config):
197-
return DQFDModel(
201+
return QDemoModel(
198202
states_spec=states_spec,
199203
actions_spec=actions_spec,
200204
network_spec=self.network_spec,

tensorforce/agents/naf_agent.py

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
# Copyright 2017 reinforce.io. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
# ==============================================================================
15+
16+
from __future__ import absolute_import
17+
from __future__ import print_function
18+
from __future__ import division
19+
20+
from tensorforce.agents import MemoryAgent
21+
from tensorforce.models import QNAFModel
22+
23+
24+
class NAFAgent(MemoryAgent):
25+
"""
26+
NAF: https://arxiv.org/abs/1603.00748
27+
28+
### Configuration options
29+
30+
#### General:
31+
32+
* `scope`: TensorFlow variable scope name (default: 'vpg')
33+
34+
#### Hyperparameters:
35+
36+
* `batch_size`: Positive integer (**mandatory**)
37+
* `learning_rate`: positive float (default: 1e-3)
38+
* `discount`: Positive float, at most 1.0 (default: 0.99)
39+
* `normalize_rewards`: Boolean (default: false)
40+
* `entropy_regularization`: None or positive float (default: none)
41+
42+
#### Optimizer:
43+
44+
* `optimizer`: Specification dict (default: Adam with learning rate 1e-3)
45+
46+
#### Pre-/post-processing:
47+
48+
* `state_preprocessing`: None or dict with (default: none)
49+
* `exploration`: None or dict with (default: none)
50+
* `reward_preprocessing`: None or dict with (default: none)
51+
52+
#### Logging:
53+
54+
* `log_level`: Logging level, one of the following values (default: 'info')
55+
+ 'info', 'debug', 'critical', 'warning', 'fatal'
56+
57+
#### TensorFlow Summaries:
58+
* `summary_logdir`: None or summary directory string (default: none)
59+
* `summary_labels`: List of summary labels to be reported, some possible values below (default: 'total-loss')
60+
+ 'total-loss'
61+
+ 'losses'
62+
+ 'variables'
63+
+ 'activations'
64+
+ 'relu'
65+
* `summary_frequency`: Positive integer (default: 1)
66+
"""
67+
68+
default_config = dict(
69+
# Agent
70+
preprocessing=None,
71+
exploration=None,
72+
reward_preprocessing=None,
73+
# MemoryAgent
74+
# missing, not documented!
75+
# Model
76+
optimizer=dict(
77+
type='adam',
78+
learning_rate=1e-3
79+
),
80+
discount=0.99,
81+
normalize_rewards=False,
82+
# DistributionModel
83+
distributions=None, # not documented!!!
84+
entropy_regularization=None,
85+
# QModel
86+
target_sync_frequency=10000, # not documented!!!
87+
target_update_weight=1.0, # not documented!!!
88+
double_dqn=False, # not documented!!!
89+
huber_loss=0.0, # not documented!!!
90+
# Logging
91+
log_level='info',
92+
model_directory=None,
93+
save_frequency=600, # TensorFlow default
94+
summary_labels=['total-loss'],
95+
summary_frequency=120, # TensorFlow default
96+
# TensorFlow distributed configuration
97+
cluster_spec=None,
98+
parameter_server=False,
99+
task_index=0,
100+
device=None,
101+
local_model=False,
102+
replica_model=False,
103+
scope='naf'
104+
)
105+
106+
def __init__(self, states_spec, actions_spec, network_spec, config):
107+
self.network_spec = network_spec
108+
config = config.copy()
109+
config.default(self.__class__.default_config)
110+
super(NAFAgent, self).__init__(states_spec, actions_spec, config)
111+
112+
def initialize_model(self, states_spec, actions_spec, config):
113+
return QNAFModel(
114+
states_spec=states_spec,
115+
actions_spec=actions_spec,
116+
network_spec=self.network_spec,
117+
config=config
118+
)

tensorforce/core/baselines/aggregated_baseline.py

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,32 @@ def tf_predict(self, states):
5959
prediction = self.linear.apply(x=predictions)
6060
return tf.squeeze(input=prediction, axis=1)
6161

62+
def tf_regularization_loss(self):
63+
if super(AggregatedBaseline, self).tf_regularization_loss() is None:
64+
losses = list()
65+
else:
66+
losses = [super(AggregatedBaseline, self).tf_regularization_loss()]
67+
68+
for baseline in self.baseline.values():
69+
if baseline.regularization_loss() is not None:
70+
losses.append(baseline.regularization_loss())
71+
72+
if self.linear.get_regularization_loss() is not None:
73+
losses.append(self.linear.get_regularization_loss())
74+
75+
if len(losses) > 0:
76+
return tf.add_n(inputs=losses)
77+
else:
78+
return None
79+
6280
def get_variables(self, include_non_trainable=False):
63-
return super(AggregatedBaseline, self).get_variables(include_non_trainable=include_non_trainable) + \
64-
self.linear.get_variables(include_non_trainable=include_non_trainable) + \
65-
[variable for name in sorted(self.baselines) for variable in self.baselines[name].get_variables(
66-
include_non_trainable=include_non_trainable
67-
)]
81+
baseline_variables = super(AggregatedBaseline, self).get_variables(include_non_trainable=include_non_trainable)
82+
83+
baselines_variables = [
84+
variable for name in sorted(self.baselines)
85+
for variable in self.baselines[name].get_variables(include_non_trainable=include_non_trainable)
86+
]
87+
88+
linear_variables = self.linear.get_variables(include_non_trainable=include_non_trainable)
89+
90+
return baseline_variables + baselines_variables + linear_variables

tensorforce/core/baselines/baseline.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,15 @@ def tf_loss(self, states, reward):
8181
prediction = self.predict(states=states)
8282
return tf.nn.l2_loss(t=(prediction - reward))
8383

84+
def tf_regularization_loss(self):
85+
"""
86+
Creates the TensorFlow operations for the baseline regularization loss
87+
88+
Returns:
89+
Regularization loss tensor
90+
"""
91+
return None
92+
8493
def get_variables(self, include_non_trainable=False):
8594
"""
8695
Returns the TensorFlow variables used by the baseline

tensorforce/core/baselines/network_baseline.py

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,34 @@ def tf_predict(self, states):
4949
prediction = self.linear.apply(x=embedding)
5050
return tf.squeeze(input=prediction, axis=1)
5151

52+
def tf_regularization_loss(self):
53+
"""
54+
Creates the TensorFlow operations for the baseline regularization loss
55+
56+
Returns:
57+
Regularization loss tensor
58+
"""
59+
if super(NetworkBaseline, self).tf_regularization_loss() is None:
60+
losses = list()
61+
else:
62+
losses = [super(NetworkBaseline, self).tf_regularization_loss()]
63+
64+
if self.network.get_regularization_loss() is not None:
65+
losses.append(self.network.get_regularization_loss())
66+
67+
if self.linear.get_regularization_loss() is not None:
68+
losses.append(self.linear.get_regularization_loss())
69+
70+
if len(losses) > 0:
71+
return tf.add_n(inputs=losses)
72+
else:
73+
return None
74+
5275
def get_variables(self, include_non_trainable=False):
53-
return super(NetworkBaseline, self).get_variables(include_non_trainable=include_non_trainable) + \
54-
self.network.get_variables(include_non_trainable=include_non_trainable) + \
55-
self.linear.get_variables(include_non_trainable=include_non_trainable)
76+
baseline_variables = super(NetworkBaseline, self).get_variables(include_non_trainable=include_non_trainable)
77+
78+
network_variables = self.network.get_variables(include_non_trainable=include_non_trainable)
79+
80+
layer_variables = self.linear.get_variables(include_non_trainable=include_non_trainable)
81+
82+
return baseline_variables + network_variables + layer_variables

tensorforce/core/distributions/bernoulli.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,23 @@ def tf_kl_divergence(self, distr_params1, distr_params2):
102102
false_log_prob_ratio = false_logit1 - false_logit2
103103
return probability1 * true_log_prob_ratio + (1.0 - probability1) * false_log_prob_ratio
104104

105+
def tf_regularization_loss(self):
106+
if super(Bernoulli, self).tf_regularization_loss() is None:
107+
losses = list()
108+
else:
109+
losses = [super(Bernoulli, self).tf_regularization_loss()]
110+
111+
if self.logit.regularization_loss() is not None:
112+
losses.append(self.logit.regularization_loss())
113+
114+
if len(losses) > 0:
115+
return tf.add_n(inputs=losses)
116+
else:
117+
return None
118+
105119
def get_variables(self, include_non_trainable=False):
106-
return super(Bernoulli, self).get_variables(include_non_trainable=include_non_trainable) + \
107-
self.logit.get_variables(include_non_trainable=include_non_trainable)
120+
distribution_variables = super(Bernoulli, self).get_variables(include_non_trainable=include_non_trainable)
121+
122+
logit_variables = self.logit.get_variables(include_non_trainable=include_non_trainable)
123+
124+
return distribution_variables + logit_variables

tensorforce/core/distributions/beta.py

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,28 @@ def tf_kl_divergence(self, distr_params1, distr_params2):
105105
return log_norm2 - log_norm1 - tf.digamma(x=beta1) * (beta2 - beta1) - \
106106
tf.digamma(x=alpha1) * (alpha2 - alpha1) + tf.digamma(x=alpha_beta1) * (alpha_beta2 - alpha_beta1)
107107

108+
def tf_regularization_loss(self):
109+
if super(Beta, self).tf_regularization_loss() is None:
110+
losses = list()
111+
else:
112+
losses = [super(Beta, self).tf_regularization_loss()]
113+
114+
if self.alpha.regularization_loss() is not None:
115+
losses.append(self.alpha.regularization_loss())
116+
117+
if self.beta.regularization_loss() is not None:
118+
losses.append(self.beta.regularization_loss())
119+
120+
if len(losses) > 0:
121+
return tf.add_n(inputs=losses)
122+
else:
123+
return None
124+
108125
def get_variables(self, include_non_trainable=False):
109-
return super(Beta, self).get_variables(include_non_trainable=include_non_trainable) + \
110-
self.alpha.get_variables(include_non_trainable=include_non_trainable) + \
111-
self.beta.get_variables(include_non_trainable=include_non_trainable)
126+
distribution_variables = super(Beta, self).get_variables(include_non_trainable=include_non_trainable)
127+
128+
alpha_variables = self.alpha.get_variables(include_non_trainable=include_non_trainable)
129+
130+
beta_variables = self.beta.get_variables(include_non_trainable=include_non_trainable)
131+
132+
return distribution_variables + alpha_variables + beta_variables

tensorforce/core/distributions/categorical.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,23 @@ def tf_kl_divergence(self, distr_params1, distr_params2):
108108
log_prob_ratio = logits1 - logits2
109109
return tf.reduce_sum(input_tensor=(probabilities1 * log_prob_ratio), axis=-1)
110110

111+
def tf_regularization_loss(self):
112+
if super(Categorical, self).tf_regularization_loss() is None:
113+
losses = list()
114+
else:
115+
losses = [super(Categorical, self).tf_regularization_loss()]
116+
117+
if self.logits.regularization_loss() is not None:
118+
losses.append(self.logits.regularization_loss())
119+
120+
if len(losses) > 0:
121+
return tf.add_n(inputs=losses)
122+
else:
123+
return None
124+
111125
def get_variables(self, include_non_trainable=False):
112-
return super(Categorical, self).get_variables(include_non_trainable=include_non_trainable) + \
113-
self.logits.get_variables(include_non_trainable=include_non_trainable)
126+
distribution_variables = super(Categorical, self).get_variables(include_non_trainable=include_non_trainable)
127+
128+
logits_variables = self.logits.get_variables(include_non_trainable=include_non_trainable)
129+
130+
return distribution_variables + logits_variables

tensorforce/core/distributions/distribution.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,11 @@ def custom_getter(getter, name, registered=False, **kwargs):
7272
func_=self.tf_kl_divergence,
7373
custom_getter_=custom_getter
7474
)
75+
self.regularization_loss = tf.make_template(
76+
name_='regularization-loss',
77+
func_=self.tf_regularization_loss,
78+
custom_getter_=custom_getter
79+
)
7580

7681
def tf_parameters(self, x):
7782
"""
@@ -136,6 +141,15 @@ def tf_kl_divergence(self, distr_params1, distr_params2):
136141
"""
137142
raise NotImplementedError
138143

144+
def tf_regularization_loss(self):
145+
"""
146+
Creates the TensorFlow operations for the distribution regularization loss
147+
148+
Returns:
149+
Regularization loss tensor
150+
"""
151+
return None
152+
139153
def get_variables(self, include_non_trainable=False):
140154
"""
141155
Returns the TensorFlow variables used by the distribution

0 commit comments

Comments
 (0)