pythonAI
diff --git a/‎tensorforce/agents/__init__.py‎
Lines changed: 5 additions & 4 deletions b/‎tensorforce/agents/__init__.py‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎tensorforce/agents/dqfd_agent.py‎
Lines changed: 8 additions & 4 deletions b/‎tensorforce/agents/dqfd_agent.py‎
Lines changed: 8 additions & 4 deletions
diff --git a/‎tensorforce/agents/naf_agent.py‎
Lines changed: 118 additions & 0 deletions b/‎tensorforce/agents/naf_agent.py‎
Lines changed: 118 additions & 0 deletions
diff --git a/‎tensorforce/core/baselines/aggregated_baseline.py‎
Lines changed: 28 additions & 5 deletions b/‎tensorforce/core/baselines/aggregated_baseline.py‎
Lines changed: 28 additions & 5 deletions
diff --git a/‎tensorforce/core/baselines/baseline.py‎
Lines changed: 9 additions & 0 deletions b/‎tensorforce/core/baselines/baseline.py‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎tensorforce/core/baselines/network_baseline.py‎
Lines changed: 30 additions & 3 deletions b/‎tensorforce/core/baselines/network_baseline.py‎
Lines changed: 30 additions & 3 deletions
diff --git a/‎tensorforce/core/distributions/bernoulli.py‎
Lines changed: 19 additions & 2 deletions b/‎tensorforce/core/distributions/bernoulli.py‎
Lines changed: 19 additions & 2 deletions
diff --git a/‎tensorforce/core/distributions/beta.py‎
Lines changed: 24 additions & 3 deletions b/‎tensorforce/core/distributions/beta.py‎
Lines changed: 24 additions & 3 deletions
diff --git a/‎tensorforce/core/distributions/categorical.py‎
Lines changed: 19 additions & 2 deletions b/‎tensorforce/core/distributions/categorical.py‎
Lines changed: 19 additions & 2 deletions
diff --git a/‎tensorforce/core/distributions/distribution.py‎
Lines changed: 14 additions & 0 deletions b/‎tensorforce/core/distributions/distribution.py‎
Lines changed: 14 additions & 0 deletions
@@ -23,8 +23,8 @@
 from tensorforce.agents.ppo_agent import PPOAgent
 from tensorforce.agents.dqn_agent import DQNAgent
 from tensorforce.agents.dqn_nstep_agent import DQNNstepAgent
+from tensorforce.agents.naf_agent import NAFAgent
 from tensorforce.agents.dqfd_agent import DQFDAgent
-# from tensorforce.agents.naf_agent import NAFAgent
 # from tensorforce.agents.categorical_dqn_agent import CategoricalDQNAgent
 
 agents = dict(
@@ -35,8 +35,8 @@
     ppo_agent=PPOAgent,
     dqn_agent=DQNAgent,
     dqn_nstep_agent=DQNNstepAgent,
-    # naf_agent=NAFAgent,
-    dqfd_agent=DQFDAgent,
+    naf_agent=NAFAgent,
+    dqfd_agent=DQFDAgent
     # PPOAgent=PPOAgent,
     # CategoricalDQNAgent=CategoricalDQNAgent,
 )
@@ -51,7 +51,8 @@
     'TRPOAgent',
     'PPOAgent',
     'DQNAgent',
-    'DQFDAgent',
     'DQNNstepAgent',
+    'DQFDAgent',
+    'NAFAgent',
     'agents'
 ]
@@ -21,7 +21,7 @@
 
 from tensorforce.agents import MemoryAgent
 from tensorforce.core.memories import Replay
-from tensorforce.models import DQFDModel
+from tensorforce.models import QDemoModel
 
 
 class DQFDAgent(MemoryAgent):
@@ -126,12 +126,16 @@ def __init__(self, states_spec, actions_spec, network_spec, config):
                                          'demo_batch_size is positive. (Calculated {} based on current' \
                                          ' parameters)'.format(self.demo_batch_size)
 
-        super(DQFDAgent, self).__init__(states_spec, actions_spec, config)
-
         # This is the demonstration memory that we will fill with observations before starting
         # the main training loop
         self.demo_memory = Replay(self.demo_memory_capacity, self.states_spec, self.actions_spec)
 
+        super(DQFDAgent, self).__init__(
+            states_spec=states_spec,
+            actions_spec=actions_spec,
+            config=config
+        )
+
     def observe(self, reward, terminal):
         """
         Adds observations, updates via sampling from memories according to update rate.
@@ -194,7 +198,7 @@ def set_demonstrations(self, batch):
         )
 
     def initialize_model(self, states_spec, actions_spec, config):
-        return DQFDModel(
+        return QDemoModel(
             states_spec=states_spec,
             actions_spec=actions_spec,
             network_spec=self.network_spec,
 
@@ -0,0 +1,118 @@
+# Copyright 2017 reinforce.io. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import division
+
+from tensorforce.agents import MemoryAgent
+from tensorforce.models import QNAFModel
+
+
+class NAFAgent(MemoryAgent):
+    """
+    NAF: https://arxiv.org/abs/1603.00748
+
+    ### Configuration options
+
+    #### General:
+
+    * `scope`: TensorFlow variable scope name (default: 'vpg')
+
+    #### Hyperparameters:
+
+    * `batch_size`: Positive integer (**mandatory**)
+    * `learning_rate`: positive float (default: 1e-3)
+    * `discount`: Positive float, at most 1.0 (default: 0.99)
+    * `normalize_rewards`: Boolean (default: false)
+    * `entropy_regularization`: None or positive float (default: none)
+
+    #### Optimizer:
+
+    * `optimizer`: Specification dict (default: Adam with learning rate 1e-3)
+
+    #### Pre-/post-processing:
+
+    * `state_preprocessing`: None or dict with (default: none)
+    * `exploration`: None or dict with (default: none)
+    * `reward_preprocessing`: None or dict with (default: none)
+
+    #### Logging:
+
+    * `log_level`: Logging level, one of the following values (default: 'info')
+        + 'info', 'debug', 'critical', 'warning', 'fatal'
+
+    #### TensorFlow Summaries:
+    * `summary_logdir`: None or summary directory string (default: none)
+    * `summary_labels`: List of summary labels to be reported, some possible values below (default: 'total-loss')
+        + 'total-loss'
+        + 'losses'
+        + 'variables'
+        + 'activations'
+        + 'relu'
+    * `summary_frequency`: Positive integer (default: 1)
+    """
+
+    default_config = dict(
+        # Agent
+        preprocessing=None,
+        exploration=None,
+        reward_preprocessing=None,
+        # MemoryAgent
+        # missing, not documented!
+        # Model
+        optimizer=dict(
+            type='adam',
+            learning_rate=1e-3
+        ),
+        discount=0.99,
+        normalize_rewards=False,
+        # DistributionModel
+        distributions=None,  # not documented!!!
+        entropy_regularization=None,
+        # QModel
+        target_sync_frequency=10000,  # not documented!!!
+        target_update_weight=1.0,  # not documented!!!
+        double_dqn=False,  # not documented!!!
+        huber_loss=0.0,  # not documented!!!
+        # Logging
+        log_level='info',
+        model_directory=None,
+        save_frequency=600,  # TensorFlow default
+        summary_labels=['total-loss'],
+        summary_frequency=120,  # TensorFlow default
+        # TensorFlow distributed configuration
+        cluster_spec=None,
+        parameter_server=False,
+        task_index=0,
+        device=None,
+        local_model=False,
+        replica_model=False,
+        scope='naf'
+    )
+
+    def __init__(self, states_spec, actions_spec, network_spec, config):
+        self.network_spec = network_spec
+        config = config.copy()
+        config.default(self.__class__.default_config)
+        super(NAFAgent, self).__init__(states_spec, actions_spec, config)
+
+    def initialize_model(self, states_spec, actions_spec, config):
+        return QNAFModel(
+            states_spec=states_spec,
+            actions_spec=actions_spec,
+            network_spec=self.network_spec,
+            config=config
+        )
@@ -59,9 +59,32 @@ def tf_predict(self, states):
         prediction = self.linear.apply(x=predictions)
         return tf.squeeze(input=prediction, axis=1)
 
+    def tf_regularization_loss(self):
+        if super(AggregatedBaseline, self).tf_regularization_loss() is None:
+            losses = list()
+        else:
+            losses = [super(AggregatedBaseline, self).tf_regularization_loss()]
+
+        for baseline in self.baseline.values():
+            if baseline.regularization_loss() is not None:
+                losses.append(baseline.regularization_loss())
+
+        if self.linear.get_regularization_loss() is not None:
+            losses.append(self.linear.get_regularization_loss())
+
+        if len(losses) > 0:
+            return tf.add_n(inputs=losses)
+        else:
+            return None
+
     def get_variables(self, include_non_trainable=False):
-        return super(AggregatedBaseline, self).get_variables(include_non_trainable=include_non_trainable) + \
-            self.linear.get_variables(include_non_trainable=include_non_trainable) + \
-            [variable for name in sorted(self.baselines) for variable in self.baselines[name].get_variables(
-                include_non_trainable=include_non_trainable
-            )]
+        baseline_variables =  super(AggregatedBaseline, self).get_variables(include_non_trainable=include_non_trainable)
+
+        baselines_variables = [
+            variable for name in sorted(self.baselines)
+            for variable in self.baselines[name].get_variables(include_non_trainable=include_non_trainable)
+        ]
+
+        linear_variables = self.linear.get_variables(include_non_trainable=include_non_trainable)
+
+        return baseline_variables + baselines_variables + linear_variables
@@ -81,6 +81,15 @@ def tf_loss(self, states, reward):
         prediction = self.predict(states=states)
         return tf.nn.l2_loss(t=(prediction - reward))
 
+    def tf_regularization_loss(self):
+        """
+        Creates the TensorFlow operations for the baseline regularization loss
+
+        Returns:
+            Regularization loss tensor
+        """
+        return None
+
     def get_variables(self, include_non_trainable=False):
         """
         Returns the TensorFlow variables used by the baseline
 
@@ -49,7 +49,34 @@ def tf_predict(self, states):
         prediction = self.linear.apply(x=embedding)
         return tf.squeeze(input=prediction, axis=1)
 
+    def tf_regularization_loss(self):
+        """
+        Creates the TensorFlow operations for the baseline regularization loss
+
+        Returns:
+            Regularization loss tensor
+        """
+        if super(NetworkBaseline, self).tf_regularization_loss() is None:
+            losses = list()
+        else:
+            losses = [super(NetworkBaseline, self).tf_regularization_loss()]
+
+        if self.network.get_regularization_loss() is not None:
+            losses.append(self.network.get_regularization_loss())
+
+        if self.linear.get_regularization_loss() is not None:
+            losses.append(self.linear.get_regularization_loss())
+
+        if len(losses) > 0:
+            return tf.add_n(inputs=losses)
+        else:
+            return None
+
     def get_variables(self, include_non_trainable=False):
-        return super(NetworkBaseline, self).get_variables(include_non_trainable=include_non_trainable) + \
-            self.network.get_variables(include_non_trainable=include_non_trainable) + \
-            self.linear.get_variables(include_non_trainable=include_non_trainable)
+        baseline_variables = super(NetworkBaseline, self).get_variables(include_non_trainable=include_non_trainable)
+
+        network_variables = self.network.get_variables(include_non_trainable=include_non_trainable)
+
+        layer_variables = self.linear.get_variables(include_non_trainable=include_non_trainable)
+
+        return baseline_variables + network_variables + layer_variables
@@ -102,6 +102,23 @@ def tf_kl_divergence(self, distr_params1, distr_params2):
         false_log_prob_ratio = false_logit1 - false_logit2
         return probability1 * true_log_prob_ratio + (1.0 - probability1) * false_log_prob_ratio
 
+    def tf_regularization_loss(self):
+        if super(Bernoulli, self).tf_regularization_loss() is None:
+            losses = list()
+        else:
+            losses = [super(Bernoulli, self).tf_regularization_loss()]
+
+        if self.logit.regularization_loss() is not None:
+            losses.append(self.logit.regularization_loss())
+
+        if len(losses) > 0:
+            return tf.add_n(inputs=losses)
+        else:
+            return None
+
     def get_variables(self, include_non_trainable=False):
-        return super(Bernoulli, self).get_variables(include_non_trainable=include_non_trainable) + \
-            self.logit.get_variables(include_non_trainable=include_non_trainable)
+        distribution_variables = super(Bernoulli, self).get_variables(include_non_trainable=include_non_trainable)
+
+        logit_variables = self.logit.get_variables(include_non_trainable=include_non_trainable)
+
+        return distribution_variables + logit_variables
@@ -105,7 +105,28 @@ def tf_kl_divergence(self, distr_params1, distr_params2):
         return log_norm2 - log_norm1 - tf.digamma(x=beta1) * (beta2 - beta1) - \
             tf.digamma(x=alpha1) * (alpha2 - alpha1) + tf.digamma(x=alpha_beta1) * (alpha_beta2 - alpha_beta1)
 
+    def tf_regularization_loss(self):
+        if super(Beta, self).tf_regularization_loss() is None:
+            losses = list()
+        else:
+            losses = [super(Beta, self).tf_regularization_loss()]
+
+        if self.alpha.regularization_loss() is not None:
+            losses.append(self.alpha.regularization_loss())
+
+        if self.beta.regularization_loss() is not None:
+            losses.append(self.beta.regularization_loss())
+
+        if len(losses) > 0:
+            return tf.add_n(inputs=losses)
+        else:
+            return None
+
     def get_variables(self, include_non_trainable=False):
-        return super(Beta, self).get_variables(include_non_trainable=include_non_trainable) + \
-            self.alpha.get_variables(include_non_trainable=include_non_trainable) + \
-            self.beta.get_variables(include_non_trainable=include_non_trainable)
+        distribution_variables = super(Beta, self).get_variables(include_non_trainable=include_non_trainable)
+
+        alpha_variables = self.alpha.get_variables(include_non_trainable=include_non_trainable)
+
+        beta_variables = self.beta.get_variables(include_non_trainable=include_non_trainable)
+
+        return distribution_variables + alpha_variables + beta_variables
@@ -108,6 +108,23 @@ def tf_kl_divergence(self, distr_params1, distr_params2):
         log_prob_ratio = logits1 - logits2
         return tf.reduce_sum(input_tensor=(probabilities1 * log_prob_ratio), axis=-1)
 
+    def tf_regularization_loss(self):
+        if super(Categorical, self).tf_regularization_loss() is None:
+            losses = list()
+        else:
+            losses = [super(Categorical, self).tf_regularization_loss()]
+
+        if self.logits.regularization_loss() is not None:
+            losses.append(self.logits.regularization_loss())
+
+        if len(losses) > 0:
+            return tf.add_n(inputs=losses)
+        else:
+            return None
+
     def get_variables(self, include_non_trainable=False):
-        return super(Categorical, self).get_variables(include_non_trainable=include_non_trainable) + \
-            self.logits.get_variables(include_non_trainable=include_non_trainable)
+        distribution_variables = super(Categorical, self).get_variables(include_non_trainable=include_non_trainable)
+
+        logits_variables = self.logits.get_variables(include_non_trainable=include_non_trainable)
+
+        return distribution_variables + logits_variables
@@ -72,6 +72,11 @@ def custom_getter(getter, name, registered=False, **kwargs):
                 func_=self.tf_kl_divergence,
                 custom_getter_=custom_getter
             )
+            self.regularization_loss = tf.make_template(
+                name_='regularization-loss',
+                func_=self.tf_regularization_loss,
+                custom_getter_=custom_getter
+            )
 
     def tf_parameters(self, x):
         """
@@ -136,6 +141,15 @@ def tf_kl_divergence(self, distr_params1, distr_params2):
         """
         raise NotImplementedError
 
+    def tf_regularization_loss(self):
+        """
+        Creates the TensorFlow operations for the distribution regularization loss
+
+        Returns:
+            Regularization loss tensor
+        """
+        return None
+
     def get_variables(self, include_non_trainable=False):
         """
         Returns the TensorFlow variables used by the distribution