dm_control: Import of refs/pull/121/head

hartikainen · alimuldal · commit dd5a7925f94f · 2019-12-06T23:59:21.000Z
Closes #121 PiperOrigin-RevId: 284276886 Change-Id: I3ac8abb8314f0cc8a15d740b516f06ec0f2ba1f2
diff --git a/dm_control/suite/wrappers/action_scale.py b/dm_control/suite/wrappers/action_scale.py
@@ -0,0 +1,107 @@
+# Copyright 2019 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Wrapper that scales actions to a specific range."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import dm_env
+from dm_env import specs
+import numpy as np
+
+_ACTION_SPEC_MUST_BE_BOUNDED_ARRAY = (
+    "`env.action_spec()` must return a single `BoundedArray`, got: {}.")
+_MUST_BE_FINITE = "All values in `{name}` must be finite, got: {bounds}."
+_MUST_BROADCAST = (
+    "`{name}` must be broadcastable to shape {shape}, got: {bounds}.")
+
+
+class Wrapper(dm_env.Environment):
+  """Wraps a control environment to rescale actions to a specific range."""
+  __slots__ = ("_action_spec", "_env", "_transform")
+
+  def __init__(self, env, minimum, maximum):
+    """Initializes a new action scale Wrapper.
+
+    Args:
+      env: Instance of `dm_env.Environment` to wrap. Its `action_spec` must
+        consist of a single `BoundedArray` with all-finite bounds.
+      minimum: Scalar or array-like specifying element-wise lower bounds
+        (inclusive) for the `action_spec` of the wrapped environment. Must be
+        finite and broadcastable to the shape of the `action_spec`.
+      maximum: Scalar or array-like specifying element-wise upper bounds
+        (inclusive) for the `action_spec` of the wrapped environment. Must be
+        finite and broadcastable to the shape of the `action_spec`.
+
+    Raises:
+      ValueError: If `env.action_spec()` is not a single `BoundedArray`.
+      ValueError: If `env.action_spec()` has non-finite bounds.
+      ValueError: If `minimum` or `maximum` contain non-finite values.
+      ValueError: If `minimum` or `maximum` are not broadcastable to
+        `env.action_spec().shape`.
+    """
+    action_spec = env.action_spec()
+    if not isinstance(action_spec, specs.BoundedArray):
+      raise ValueError(_ACTION_SPEC_MUST_BE_BOUNDED_ARRAY.format(action_spec))
+
+    minimum = np.array(minimum)
+    maximum = np.array(maximum)
+    shape = action_spec.shape
+    orig_minimum = action_spec.minimum
+    orig_maximum = action_spec.maximum
+    orig_dtype = action_spec.dtype
+
+    def validate(bounds, name):
+      if not np.all(np.isfinite(bounds)):
+        raise ValueError(_MUST_BE_FINITE.format(name=name, bounds=bounds))
+      try:
+        np.broadcast_to(bounds, shape)
+      except ValueError:
+        raise ValueError(_MUST_BROADCAST.format(
+            name=name, bounds=bounds, shape=shape))
+
+    validate(minimum, "minimum")
+    validate(maximum, "maximum")
+    validate(orig_minimum, "env.action_spec().minimum")
+    validate(orig_maximum, "env.action_spec().maximum")
+
+    scale = (orig_maximum - orig_minimum) / (maximum - minimum)
+
+    def transform(action):
+      new_action = orig_minimum + scale * (action - minimum)
+      return new_action.astype(orig_dtype, copy=False)
+
+    dtype = np.result_type(minimum, maximum, orig_dtype)
+    self._action_spec = action_spec.replace(
+        minimum=minimum, maximum=maximum, dtype=dtype)
+    self._env = env
+    self._transform = transform
+
+  def step(self, action):
+    return self._env.step(self._transform(action))
+
+  def reset(self):
+    return self._env.reset()
+
+  def observation_spec(self):
+    return self._env.observation_spec()
+
+  def action_spec(self):
+    return self._action_spec
+
+  def __getattr__(self, name):
+    return getattr(self._env, name)
diff --git a/dm_control/suite/wrappers/action_scale_test.py b/dm_control/suite/wrappers/action_scale_test.py
@@ -0,0 +1,163 @@
+# Copyright 2019 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Tests for the action scale wrapper."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+# Internal dependencies.
+from absl.testing import absltest
+from absl.testing import parameterized
+from dm_control.rl import control
+from dm_control.suite.wrappers import action_scale
+from dm_env import specs
+import mock
+import numpy as np
+
+
+def make_action_spec(lower=(-1.,), upper=(1.,)):
+  lower, upper = np.broadcast_arrays(lower, upper)
+  return specs.BoundedArray(
+      shape=lower.shape, dtype=float, minimum=lower, maximum=upper)
+
+
+def make_mock_env(action_spec):
+  action_spec = action_spec
+  env = mock.Mock(spec=control.Environment)
+  env.action_spec.return_value = action_spec
+  return env
+
+
+class ActionScaleTest(parameterized.TestCase):
+
+  def assertStepCalledOnceWithCorrectAction(self, env, expected_action):
+    # NB: `assert_called_once_with()` doesn't support numpy arrays.
+    env.step.assert_called_once()
+    actual_action = env.step.call_args_list[0][0][0]
+    np.testing.assert_array_equal(expected_action, actual_action)
+
+  @parameterized.parameters(
+      {
+          'minimum': np.r_[-1., -1.],
+          'maximum': np.r_[1., 1.],
+          'scaled_minimum': np.r_[-2., -2.],
+          'scaled_maximum': np.r_[2., 2.],
+      },
+      {
+          'minimum': np.r_[-2., -2.],
+          'maximum': np.r_[2., 2.],
+          'scaled_minimum': np.r_[-1., -1.],
+          'scaled_maximum': np.r_[1., 1.],
+      },
+      {
+          'minimum': np.r_[-1., -1.],
+          'maximum': np.r_[1., 1.],
+          'scaled_minimum': np.r_[-2., -2.],
+          'scaled_maximum': np.r_[1., 1.],
+      },
+      {
+          'minimum': np.r_[-1., -1.],
+          'maximum': np.r_[1., 1.],
+          'scaled_minimum': np.r_[-1., -1.],
+          'scaled_maximum': np.r_[2., 2.],
+      },
+  )
+  def test_step(self, minimum, maximum, scaled_minimum, scaled_maximum):
+    action_spec = make_action_spec(lower=minimum, upper=maximum)
+    env = make_mock_env(action_spec=action_spec)
+    wrapped_env = action_scale.Wrapper(
+        env, minimum=scaled_minimum, maximum=scaled_maximum)
+
+    time_step = wrapped_env.step(scaled_minimum)
+    self.assertStepCalledOnceWithCorrectAction(env, minimum)
+    self.assertIs(time_step, env.step(minimum))
+
+    env.reset_mock()
+
+    time_step = wrapped_env.step(scaled_maximum)
+    self.assertStepCalledOnceWithCorrectAction(env, maximum)
+    self.assertIs(time_step, env.step(maximum))
+
+  @parameterized.parameters(
+      {
+          'minimum': np.r_[-1., -1.],
+          'maximum': np.r_[1., 1.],
+      },
+      {
+          'minimum': np.r_[0, 1],
+          'maximum': np.r_[2, 3],
+      },
+  )
+  def test_correct_action_spec(self, minimum, maximum):
+    original_action_spec = make_action_spec(
+        lower=np.r_[-2., -2.], upper=np.r_[2., 2.])
+    env = make_mock_env(action_spec=original_action_spec)
+    wrapped_env = action_scale.Wrapper(env, minimum=minimum, maximum=maximum)
+    new_action_spec = wrapped_env.action_spec()
+    np.testing.assert_array_equal(new_action_spec.minimum, minimum)
+    np.testing.assert_array_equal(new_action_spec.maximum, maximum)
+
+  @parameterized.parameters('reset', 'observation_spec', 'control_timestep')
+  def test_method_delegated_to_underlying_env(self, method_name):
+    env = make_mock_env(action_spec=make_action_spec())
+    wrapped_env = action_scale.Wrapper(env, minimum=0, maximum=1)
+    env_method = getattr(env, method_name)
+    wrapper_method = getattr(wrapped_env, method_name)
+    out = wrapper_method()
+    env_method.assert_called_once_with()
+    self.assertIs(out, env_method())
+
+  def test_invalid_action_spec_type(self):
+    action_spec = [make_action_spec()] * 2
+    env = make_mock_env(action_spec=action_spec)
+    with self.assertRaisesWithLiteralMatch(
+        ValueError,
+        action_scale._ACTION_SPEC_MUST_BE_BOUNDED_ARRAY.format(action_spec)):
+      action_scale.Wrapper(env, minimum=0, maximum=1)
+
+  @parameterized.parameters(
+      {'name': 'minimum', 'bounds': np.r_[np.nan]},
+      {'name': 'minimum', 'bounds': np.r_[-np.inf]},
+      {'name': 'maximum', 'bounds': np.r_[np.inf]},
+  )
+  def test_non_finite_bounds(self, name, bounds):
+    kwargs = {'minimum': np.r_[-1.], 'maximum': np.r_[1.]}
+    kwargs[name] = bounds
+    env = make_mock_env(action_spec=make_action_spec())
+    with self.assertRaisesWithLiteralMatch(
+        ValueError,
+        action_scale._MUST_BE_FINITE.format(name=name, bounds=bounds)):
+      action_scale.Wrapper(env, **kwargs)
+
+  @parameterized.parameters(
+      {'name': 'minimum', 'bounds': np.r_[1., 2., 3.]},
+      {'name': 'minimum', 'bounds': np.r_[[1.], [2.], [3.]]},
+  )
+  def test_invalid_bounds_shape(self, name, bounds):
+    shape = (2,)
+    kwargs = {'minimum': np.zeros(shape), 'maximum': np.ones(shape)}
+    kwargs[name] = bounds
+    action_spec = make_action_spec(lower=[-1, -1], upper=[2, 3])
+    env = make_mock_env(action_spec=action_spec)
+    with self.assertRaisesWithLiteralMatch(
+        ValueError,
+        action_scale._MUST_BROADCAST.format(
+            name=name, bounds=bounds, shape=shape)):
+      action_scale.Wrapper(env, **kwargs)
+
+if __name__ == '__main__':
+  absltest.main()