feat: Remove ACT in TRM

ValerianRey · ValerianRey · commit a6df540b1ba3 · 2025-12-22T16:57:09.000+01:00
diff --git a/src/recursion/models/recursive_reasoning/trm.py b/src/recursion/models/recursive_reasoning/trm.py
@@ -356,39 +356,6 @@ def forward(
 
             halted = is_last_step
 
-            # if training, and ACT is enabled
-            if self.training and (self.config.halt_max_steps > 1):
-
-                # Halt signal
-                # NOTE: During evaluation, always use max steps, this is to guarantee the same halting steps inside a batch for batching purposes
-
-                if self.config.no_ACT_continue:
-                    halted = halted | (q_halt_logits > 0)
-                else:
-                    halted = halted | (q_halt_logits > q_continue_logits)
-
-                # Exploration
-                min_halt_steps = (
-                    torch.rand_like(q_halt_logits) < self.config.halt_exploration_prob
-                ) * torch.randint_like(new_steps, low=2, high=self.config.halt_max_steps + 1)
-                halted = halted & (new_steps >= min_halt_steps)
-
-                if not self.config.no_ACT_continue:
-                    # Compute target Q
-                    # NOTE: No replay buffer and target networks for computing target Q-value.
-                    # As batch_size is large, there're many parallel envs.
-                    # Similar concept as PQN https://arxiv.org/abs/2407.04811
-                    _, _, (next_q_halt_logits, next_q_continue_logits), _, _ = self.inner(
-                        new_inner_carry, new_current_data
-                    )
-                    outputs["target_q_continue"] = torch.sigmoid(
-                        torch.where(
-                            is_last_step,
-                            next_q_halt_logits,
-                            torch.maximum(next_q_halt_logits, next_q_continue_logits),
-                        )
-                    )
-
         return (
             TinyRecursiveReasoningModel_ACTV1Carry(
                 new_inner_carry, new_steps, halted, new_current_data