# Copyright 2017 reinforce.io. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

from tensorforce import util
from tensorforce.models import Model


class RandomModel(Model):
    """
    Utility class to return random actions of a desired shape and with given bounds.
    """

    def __init__(
        self,
        states,
        actions,
        scope,
        device,
        saver,
        summarizer,
        distributed,
        batching_capacity
    ):
        super(RandomModel, self).__init__(
            states=states,
            actions=actions,
            scope=scope,
            device=device,
            saver=saver,
            summarizer=summarizer,
            distributed=distributed,
            batching_capacity=batching_capacity,
            variable_noise=None,
            states_preprocessing=None,
            actions_exploration=None,
            reward_preprocessing=None
        )

    def tf_actions_and_internals(self, states, internals, deterministic):
        assert len(internals) == 0

        actions = dict()
        for name, action in self.actions_spec.items():
            shape = (tf.shape(input=next(iter(states.values())))[0],) + action['shape']

            if action['type'] == 'bool':
                actions[name] = (tf.random_uniform(shape=shape) < 0.5)

            elif action['type'] == 'int':
                actions[name] = tf.random_uniform(shape=shape, maxval=action['num_actions'], dtype=util.tf_dtype('int'))

            elif action['type'] == 'float':
                if 'min_value' in action:
                    actions[name] = tf.random_uniform(
                        shape=shape,
                        minval=action['min_value'],
                        maxval=action['max_value']
                    )

                else:
                    actions[name] = tf.random_normal(shape=shape)

        return actions, dict()

    def tf_observe_timestep(self, states, internals, actions, terminal, reward):
        return tf.no_op()