Introduce a multiturn variant of the task.

liusiqi43 · copybara-github · commit 537877871f4f · 2021-05-12T05:21:47.000-07:00
PiperOrigin-RevId: 373348349
Change-Id: Ie2a58c09149fe160b9c6c902064ba3180d8cbaaf
diff --git a/dm_control/locomotion/soccer/__init__.py b/dm_control/locomotion/soccer/__init__.py
@@ -30,6 +30,7 @@
 from dm_control.locomotion.soccer.pitch import RandomizedPitch
 from dm_control.locomotion.soccer.soccer_ball import regulation_soccer_ball
 from dm_control.locomotion.soccer.soccer_ball import SoccerBall
+from dm_control.locomotion.soccer.task import MultiturnTask
 from dm_control.locomotion.soccer.task import Task
 from dm_control.locomotion.soccer.team import Player
 from dm_control.locomotion.soccer.team import Team
diff --git a/dm_control/locomotion/soccer/task.py b/dm_control/locomotion/soccer/task.py
@@ -216,3 +216,52 @@ def before_step(self, physics, actions, random_state):
   def action_spec(self, physics):
     """Return multi-agent action_spec."""
     return [player.walker.action_spec for player in self.players]
+
+
+class MultiturnTask(Task):
+  """Continuous game play through scoring events until timeout."""
+
+  def __init__(self,
+               players,
+               arena,
+               ball=None,
+               initializer=None,
+               observables=None,
+               disable_walker_contacts=False,
+               nconmax_per_player=200,
+               njmax_per_player=200,
+               control_timestep=0.025,
+               tracking_cameras=()):
+    """See base class."""
+    super().__init__(
+        players,
+        arena,
+        ball=ball,
+        initializer=initializer,
+        observables=observables,
+        disable_walker_contacts=disable_walker_contacts,
+        nconmax_per_player=nconmax_per_player,
+        njmax_per_player=njmax_per_player,
+        control_timestep=control_timestep,
+        tracking_cameras=tracking_cameras)
+
+    # If `True`, reset ball entity trackers before the next step.
+    self._should_reset = False
+
+  def should_terminate_episode(self, physics):
+    return False
+
+  def get_discount(self, physics):
+    return np.ones((), np.float32)
+
+  def before_step(self, physics, actions, random_state):
+    super(MultiturnTask, self).before_step(physics, actions, random_state)
+    if self._should_reset:
+      self.ball.initialize_entity_trackers()
+      self._should_reset = False
+
+  def after_step(self, physics, random_state):
+    super(MultiturnTask, self).after_step(physics, random_state)
+    if self.arena.detected_goal():
+      self._initializer(self, physics, random_state)
+      self._should_reset = True
diff --git a/dm_control/locomotion/soccer/task_test.py b/dm_control/locomotion/soccer/task_test.py
@@ -558,5 +558,62 @@ def test_ball_velocity(self):
     ball_velocity = env.physics.bind(ball_root_joint).qvel
     np.testing.assert_array_equal(ball_velocity, 0.)
 
+
+class _ScoringInitializer(soccer.Initializer):
+  """Initialize the ball for home team to repeatedly score goals."""
+
+  def __init__(self):
+    self._num_calls = 0
+
+  @property
+  def num_calls(self):
+    return self._num_calls
+
+  def __call__(self, task, physics, random_state):
+    # Initialize `ball` along the y-axis with a positive y-velocity.
+    task.ball.set_pose(physics, [2.0, 0.0, 1.5])
+    task.ball.set_velocity(
+        physics, velocity=[100.0, 0.0, 0.0], angular_velocity=0.)
+    for i, player in enumerate(task.players):
+      player.walker.reinitialize_pose(physics, random_state)
+      (_, _, z), quat = player.walker.get_pose(physics)
+      player.walker.set_pose(physics, [-i * 5, 0.0, z], quat)
+      player.walker.set_velocity(physics, velocity=0., angular_velocity=0.)
+
+    self._num_calls += 1
+
+
+class MultiturnTaskTest(parameterized.TestCase):
+
+  def test_multiple_goals(self):
+    initializer = _ScoringInitializer()
+    time_limit = 1.0
+    control_timestep = 0.025
+    env = composer.Environment(
+        task=soccer.MultiturnTask(
+            players=_home_team(1) + _away_team(1),
+            arena=soccer.Pitch((20, 15), field_box=True),  # disable throw-in.
+            initializer=initializer,
+            control_timestep=control_timestep),
+        time_limit=time_limit)
+
+    timestep = env.reset()
+    num_steps = 0
+    rewards = [np.zeros(s.shape, s.dtype) for s in env.reward_spec()]
+    while not timestep.last():
+      timestep = env.step([spec.generate_value() for spec in env.action_spec()])
+      for reward, r_t in zip(rewards, timestep.reward):
+        reward += r_t
+      num_steps += 1
+    self.assertEqual(num_steps, time_limit / control_timestep)
+
+    num_scores = initializer.num_calls - 1  # discard initialization.
+    self.assertEqual(num_scores, 6)
+    self.assertEqual(rewards, [
+        np.full((), num_scores, np.float32),
+        np.full((), -num_scores, np.float32)
+    ])
+
+
 if __name__ == "__main__":
   absltest.main()
diff --git a/setup.py b/setup.py
@@ -177,7 +177,7 @@ def is_excluded(s):
 
 setup(
     name='dm_control',
-    version='0.0.372528912',
+    version='0.0.373348349',
     description='Continuous control environments and MuJoCo Python bindings.',
     author='DeepMind',
     license='Apache License, Version 2.0',