From 2abf37cf0656725ef4227d81be388ed47b8b072a Mon Sep 17 00:00:00 2001
From: maximpavliv <maxim.pavliv@gmail.com>
Date: Mon, 21 Jul 2025 14:10:06 +0200
Subject: [PATCH 01/34] Trim superanimal_humanbody.yaml default project config

---
 .../modelzoo/project_configs/superanimal_humanbody.yaml   | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/deeplabcut/modelzoo/project_configs/superanimal_humanbody.yaml b/deeplabcut/modelzoo/project_configs/superanimal_humanbody.yaml
index 5b6e05a76d..d1e665c17f 100644
--- a/deeplabcut/modelzoo/project_configs/superanimal_humanbody.yaml
+++ b/deeplabcut/modelzoo/project_configs/superanimal_humanbody.yaml
@@ -65,10 +65,10 @@ alphavalue:
 colormap: rainbow
 
 # Training,Evaluation and Analysis configuration
-TrainingFraction: [0.95]
-iteration: 0
-default_net_type: rtmpose_x
-default_augmenter: 
+TrainingFraction:
+iteration:
+default_net_type:
+default_augmenter:
 snapshotindex:
 detector_snapshotindex: -1
 batch_size: 1

From a1a6be126ad56e038807f835dd5f6f79c62308b8 Mon Sep 17 00:00:00 2001
From: maximpavliv <maxim.pavliv@gmail.com>
Date: Mon, 21 Jul 2025 14:12:26 +0200
Subject: [PATCH 02/34] Trim superanimal_humanbody_colors

---
 deeplabcut/modelzoo/utils.py | 29 -----------------------------
 1 file changed, 29 deletions(-)

diff --git a/deeplabcut/modelzoo/utils.py b/deeplabcut/modelzoo/utils.py
index 99f9be61d8..99b4c7b7f6 100644
--- a/deeplabcut/modelzoo/utils.py
+++ b/deeplabcut/modelzoo/utils.py
@@ -383,35 +383,6 @@ def get_superanimal_colormaps():
                 [220, 255, 0],
                 [180, 255, 0],
                 [140, 255, 0],
-                [100, 255, 0],
-                [60, 255, 0],
-                [20, 255, 0],
-                [0, 255, 0],
-                [0, 255, 40],
-                [0, 255, 80],
-                [0, 255, 120],
-                [0, 255, 160],
-                [0, 255, 200],
-                [0, 255, 240],
-                [0, 220, 255],
-                [0, 180, 255],
-                [0, 140, 255],
-                [0, 100, 255],
-                [0, 60, 255],
-                [0, 20, 255],
-                [0, 0, 255],
-                [40, 0, 255],
-                [80, 0, 255],
-                [120, 0, 255],
-                [160, 0, 255],
-                [200, 0, 255],
-                [240, 0, 255],
-                [255, 0, 240],
-                [255, 0, 200],
-                [255, 0, 160],
-                [255, 0, 120],
-                [255, 0, 80],
-                [255, 0, 40],
             ]
         )
         / 255

From dfbce1d61fa24ac735ca7760ccf3c9a5244e34c2 Mon Sep 17 00:00:00 2001
From: maximpavliv <maxim.pavliv@gmail.com>
Date: Mon, 21 Jul 2025 14:18:11 +0200
Subject: [PATCH 03/34] Correct get_checkpoint_epoch

---
 deeplabcut/modelzoo/video_inference.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/deeplabcut/modelzoo/video_inference.py b/deeplabcut/modelzoo/video_inference.py
index 5b597275b2..46c4fdbfd8 100644
--- a/deeplabcut/modelzoo/video_inference.py
+++ b/deeplabcut/modelzoo/video_inference.py
@@ -45,9 +45,8 @@ def get_checkpoint_epoch(checkpoint_path):
     Returns:
         int: Current epoch number, or 0 if not found
     """
-    # Use CUDA if available, otherwise use CPU
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    checkpoint = torch.load(checkpoint_path, map_location=device)
+    # For reading metadata, it is recommended to load onto the CPU
+    checkpoint = torch.load(checkpoint_path, map_location="cpu")
     if "metadata" in checkpoint and "epoch" in checkpoint["metadata"]:
         return checkpoint["metadata"]["epoch"]
     else:

From 1432a73ac5a72fa4ca01d8c8fdfdf7549922cf41 Mon Sep 17 00:00:00 2001
From: maximpavliv <maxim.pavliv@gmail.com>
Date: Mon, 21 Jul 2025 14:26:17 +0200
Subject: [PATCH 04/34] Add rtmpose_x modelzoo model config

---
 .../modelzoo/model_configs/rtmpose_x.yaml     | 159 ++++++++++++++++++
 1 file changed, 159 insertions(+)
 create mode 100644 deeplabcut/modelzoo/model_configs/rtmpose_x.yaml

diff --git a/deeplabcut/modelzoo/model_configs/rtmpose_x.yaml b/deeplabcut/modelzoo/model_configs/rtmpose_x.yaml
new file mode 100644
index 0000000000..9a7df70196
--- /dev/null
+++ b/deeplabcut/modelzoo/model_configs/rtmpose_x.yaml
@@ -0,0 +1,159 @@
+data:
+  colormode: RGB
+  inference:
+    normalize_images: true
+    top_down_crop:
+      width: 288
+      height: 384
+  train:
+    affine:
+      p: 0.5
+      rotation: 30
+      scaling:
+      - 1.0
+      - 1.0
+      translation: 0
+    collate:
+    covering: false
+    gaussian_noise: 12.75
+    hist_eq: false
+    motion_blur: false
+    normalize_images: true
+    top_down_crop:
+      width: 288
+      height: 384
+detector: null
+device: auto
+metadata:
+  project_path: null
+  pose_config_path: rtmpose_x_body7_pytorch_config.yaml
+  bodyparts:
+  - nose
+  - left_eye
+  - right_eye
+  - left_ear
+  - right_ear
+  - left_shoulder
+  - right_shoulder
+  - left_elbow
+  - right_elbow
+  - left_wrist
+  - right_wrist
+  - left_hip
+  - right_hip
+  - left_knee
+  - right_knee
+  - left_ankle
+  - right_ankle
+  unique_bodyparts: []
+  individuals:
+  - idv0
+  - idv1
+  - idv2
+  - idv3
+  - idv4
+  - idv5
+  - idv6
+  - idv7
+  - idv8
+  - idv9
+  with_identity: false
+method: td
+model:
+  backbone:
+    type: CSPNeXt
+    model_name: cspnext_p5
+    freeze_bn_stats: false
+    freeze_bn_weights: false
+    expand_ratio: 0.5
+    deepen_factor: 1.33
+    widen_factor: 1.25
+    channel_attention: true
+    norm_layer: SyncBN
+    activation_fn: SiLU
+  backbone_output_channels: 1280
+  heads:
+    bodypart:
+      type: RTMCCHead
+      weight_init: RTMPose
+      target_generator:
+        type: SimCCGenerator
+        input_size:
+        - 288
+        - 384
+        smoothing_type: gaussian
+        sigma:
+        - 6.0
+        - 6.93
+        simcc_split_ratio: 2.0
+        label_smooth_weight: 0.0
+        normalize: false
+      criterion:
+        x:
+          type: KLDiscreteLoss
+          use_target_weight: true
+          beta: 10.0
+          label_softmax: true
+        y:
+          type: KLDiscreteLoss
+          use_target_weight: true
+          beta: 10.0
+          label_softmax: true
+      predictor:
+        type: SimCCPredictor
+        simcc_split_ratio: 2.0
+      input_size:
+      - 288
+      - 384
+      in_channels: 1280
+      out_channels: 17
+      in_featuremap_size:
+      - 9
+      - 12
+      simcc_split_ratio: 2.0
+      final_layer_kernel_size: 7
+      gau_cfg:
+        hidden_dims: 256
+        s: 128
+        expansion_factor: 2
+        dropout_rate: 0
+        drop_path: 0.0
+        act_fn: SiLU
+        use_rel_bias: false
+        pos_enc: false
+net_type: rtmpose_x
+runner:
+  type: PoseTrainingRunner
+  gpus:
+  key_metric: test.mAP
+  key_metric_asc: true
+  eval_interval: 10
+  optimizer:
+    type: AdamW
+    params:
+      lr: 0.0005
+  scheduler:
+    type: SequentialLR
+    params:
+      schedulers:
+      - type: ConstantLR
+        params:
+          factor: 0.001
+          total_iters: 5
+      - type: CosineAnnealingLR
+        params:
+          T_max: 250
+          eta_min: 1e-05
+      milestones:
+      - 100
+  snapshots:
+    max_snapshots: 5
+    save_epochs: 25
+    save_optimizer_state: false
+train_settings:
+  batch_size: 1
+  dataloader_workers: 0
+  dataloader_pin_memory: false
+  display_iters: 500
+  epochs: 200
+  seed: 42

From a4d74ccdb2e3c7533bd9d5f6f09f3fd91a07ef63 Mon Sep 17 00:00:00 2001
From: maximpavliv <maxim.pavliv@gmail.com>
Date: Mon, 21 Jul 2025 14:30:33 +0200
Subject: [PATCH 05/34] Add FilteredDetector

---
 .../models/detectors/filtered_detector.py     | 40 +++++++++++++++++++
 1 file changed, 40 insertions(+)
 create mode 100644 deeplabcut/pose_estimation_pytorch/models/detectors/filtered_detector.py

diff --git a/deeplabcut/pose_estimation_pytorch/models/detectors/filtered_detector.py b/deeplabcut/pose_estimation_pytorch/models/detectors/filtered_detector.py
new file mode 100644
index 0000000000..e4278dba65
--- /dev/null
+++ b/deeplabcut/pose_estimation_pytorch/models/detectors/filtered_detector.py
@@ -0,0 +1,40 @@
+import torch
+from torch import nn
+
+
+class FilteredDetector(nn.Module):
+    def __init__(self, base_model: nn.Module, class_id: int):
+        """
+        Wrap a torchvision detector to return predictions only for a single class.
+
+        Args:
+            base_model: A torchvision-style object detector.
+            class_id: The integer class ID to keep (e.g., 1 for 'person' in COCO).
+        """
+        super().__init__()
+        self.base_model = base_model
+        self.class_id = class_id
+
+    def forward(self, images: list[torch.Tensor]) -> list[dict[str, torch.Tensor]]:
+        """
+        Arguments:
+            images: list of input images as Tensors
+
+        Returns:
+            List of dicts, each containing boxes/scores/labels filtered to the specified class.
+        """
+        with torch.no_grad():
+            outputs = self.base_model(images)
+
+        filtered_outputs = []
+        for output in outputs:
+            mask = output["labels"] == self.class_id
+            filtered_output = {
+                "boxes": output["boxes"][mask],
+                "scores": output["scores"][mask],
+                "labels": output["labels"][mask],
+            }
+            filtered_outputs.append(filtered_output)
+
+        losses = {}
+        return losses, filtered_outputs
\ No newline at end of file

From 0cbfe59fd5193dac13d1fb14102bb03f530345c7 Mon Sep 17 00:00:00 2001
From: maximpavliv <maxim.pavliv@gmail.com>
Date: Mon, 21 Jul 2025 14:39:41 +0200
Subject: [PATCH 06/34] Add get_filtered_coco_detector_inference_runner()
 method

---
 .../pose_estimation_pytorch/apis/utils.py     | 123 ++++++++++++++++++
 1 file changed, 123 insertions(+)

diff --git a/deeplabcut/pose_estimation_pytorch/apis/utils.py b/deeplabcut/pose_estimation_pytorch/apis/utils.py
index ea36e92178..6751b25a61 100644
--- a/deeplabcut/pose_estimation_pytorch/apis/utils.py
+++ b/deeplabcut/pose_estimation_pytorch/apis/utils.py
@@ -19,6 +19,15 @@
 import numpy as np
 import pandas as pd
 
+from torchvision.models import detection
+from torchvision.models.detection import (
+    fasterrcnn_resnet50_fpn,
+    fasterrcnn_mobilenet_v3_large_fpn,
+    FasterRCNN_ResNet50_FPN_Weights,
+    FasterRCNN_ResNet50_FPN_V2_Weights,
+    FasterRCNN_MobileNet_V3_Large_FPN_Weights,
+)
+
 from deeplabcut.core.config import read_config_as_dict
 from deeplabcut.core.engine import Engine
 from deeplabcut.pose_estimation_pytorch.data.ctd import CondFromModel
@@ -38,6 +47,7 @@
 )
 from deeplabcut.pose_estimation_pytorch.data.transforms import build_transforms
 from deeplabcut.pose_estimation_pytorch.models import DETECTORS, PoseModel
+from deeplabcut.pose_estimation_pytorch.models.detectors.filtered_detector import FilteredDetector
 from deeplabcut.pose_estimation_pytorch.runners import (
     build_inference_runner,
     CTDTrackingConfig,
@@ -713,6 +723,119 @@ def get_detector_inference_runner(
     return runner
 
 
+TORCHVISION_DETECTORS = {
+    "fasterrcnn_resnet50_fpn": {
+        "fn": fasterrcnn_resnet50_fpn,
+        "weights": FasterRCNN_ResNet50_FPN_Weights.DEFAULT,
+    },
+    "fasterrcnn_resnet50_fpn_v2": {
+        "fn": detection.fasterrcnn_resnet50_fpn_v2,
+        "weights": FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT,
+    },
+    "fasterrcnn_mobilenet_v3_large_fpn": {
+        "fn": fasterrcnn_mobilenet_v3_large_fpn,
+        "weights": FasterRCNN_MobileNet_V3_Large_FPN_Weights.DEFAULT,
+    },
+}
+def get_filtered_coco_detector_inference_runner(
+    model_name: str,
+    category_id: int,
+    batch_size: int = 1,
+    device: str | None = None,
+    box_score_thresh: float = 0.6,
+    max_individuals: int | None = None,
+    color_mode: str | None = None,
+    model_config: dict | None = None,
+    transform: A.BaseCompose | None = None,
+) -> DetectorInferenceRunner:
+    """
+    Builds a detector inference runner using a pretrained COCO detector from torchvision.
+
+    This function loads a pretrained object detection model from `torchvision.models.detection`,
+    wraps it in a `FilteredDetector` that keeps only detections for a specified COCO category,
+    and packages it into a `DetectorInferenceRunner` ready for inference.
+
+    You can optionally provide a model configuration dictionary to resolve `device`, `max_individuals`,
+    and `color_mode`. If no `model_config` is given, these must be specified explicitly.
+
+    Args:
+        model_name (str): Name of the torchvision detection model to load.
+                          Supported values include:
+                          "fasterrcnn_resnet50_fpn",
+                          "fasterrcnn_resnet50_fpn_v2",
+                          "fasterrcnn_mobilenet_v3_large_fpn".
+        category_id (int): The COCO category ID to retain in the detections.
+        batch_size (int, optional): Batch size for inference. Defaults to 1.
+        device (str or None, optional): Device to run the model on (e.g., "cuda", "cpu", or "mps").
+                                        If None, resolved from model_config or defaults to CUDA.
+        box_score_thresh (float, optional): Confidence threshold for filtering bounding boxes.
+                                            Defaults to 0.6.
+        max_individuals (int or None, optional): Maximum number of individuals to retain per image.
+                                                 If None, resolved from model_config.
+        color_mode (str or None, optional): Color mode used for preprocessing (e.g., "RGB").
+                                            If None, resolved from model_config.
+        model_config (dict or None, optional): Optional configuration dictionary used to resolve
+                                               `device`, `max_individuals`, and `color_mode`.
+        transform (A.BaseCompose or None, optional): Optional preprocessing pipeline.
+                                                     If None, uses the model's default transform.
+
+    Returns:
+        DetectorInferenceRunner: A configured detector inference runner.
+
+    Raises:
+        ValueError: If `model_config` is not provided and required fields are missing.
+    """
+    if model_name not in TORCHVISION_DETECTORS:
+        raise ValueError(f"Unsupported model: {model_name}")
+
+    if model_config is not None:
+        if device is None:
+            device = resolve_device(model_config)
+        if max_individuals is None:
+            max_individuals = len(model_config["metadata"]["individuals"])
+        if color_mode is None:
+            color_mode = model_config["data"]["colormode"]
+    else:
+        missing = []
+        if device is None:
+            missing.append("device")
+        if max_individuals is None:
+            missing.append("max_individuals")
+        if color_mode is None:
+            missing.append("color_mode")
+        if missing:
+            raise ValueError(
+                f"If `model_config` is not provided, you must explicitly specify: {', '.join(missing)}."
+            )
+    if device == "mps":
+        device = "cpu"
+
+    if transform is None:
+        transform = build_transforms({"scale_to_unit_range": True})
+
+    entry = TORCHVISION_DETECTORS[model_name]
+    weights = entry["weights"]
+    detector = entry["fn"](weights=weights, box_score_thresh=box_score_thresh)
+
+    detector.eval().to(device)
+    filtered_detector = FilteredDetector(detector, class_id=category_id).to(device)
+    detector_runner = build_inference_runner(
+        task=Task.DETECT,
+        model=filtered_detector,
+        device=device,
+        snapshot_path=None,
+        batch_size=batch_size,
+        preprocessor=build_bottom_up_preprocessor(
+            color_mode=color_mode,
+            transform=transform,
+        ),
+        postprocessor=build_detector_postprocessor(
+            max_individuals=max_individuals,
+        ),
+    )
+    return detector_runner
+
+
 def get_pose_inference_runner(
     model_config: dict,
     snapshot_path: str | Path,

From 84b230ebe3e3f398a0ddf07e283d50c5729907be Mon Sep 17 00:00:00 2001
From: maximpavliv <maxim.pavliv@gmail.com>
Date: Mon, 21 Jul 2025 14:43:37 +0200
Subject: [PATCH 07/34] Add ScaleToUnitRange transform

---
 deeplabcut/pose_estimation_pytorch/data/transforms.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/deeplabcut/pose_estimation_pytorch/data/transforms.py b/deeplabcut/pose_estimation_pytorch/data/transforms.py
index b11c736498..7a296525c1 100644
--- a/deeplabcut/pose_estimation_pytorch/data/transforms.py
+++ b/deeplabcut/pose_estimation_pytorch/data/transforms.py
@@ -143,6 +143,9 @@ def build_transforms(augmentations: dict) -> A.BaseCompose:
             A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
         )
 
+    if augmentations.get("scale_to_unit_range"):
+        transforms.append(ScaleToUnitRange())
+
     return A.Compose(
         transforms,
         keypoint_params=A.KeypointParams(
@@ -669,3 +672,11 @@ def _sample(
             return low + (delta * np.random.random(size))
 
         raise ValueError(f"Unknown sampling: {self.sampling}")
+
+
+class ScaleToUnitRange(A.ImageOnlyTransform):
+    def __init__(self, always_apply=True, p=1.0):
+        super().__init__(always_apply=always_apply, p=p)
+
+    def apply(self, img, **params):
+        return img.astype(np.float32) / 255.0

From c4c1318317c6d1d6ad42845dff3766f775ce1bde Mon Sep 17 00:00:00 2001
From: maximpavliv <maxim.pavliv@gmail.com>
Date: Mon, 21 Jul 2025 15:03:34 +0200
Subject: [PATCH 08/34] Superanimal humanbody inference: use filtered detector
 runner

---
 deeplabcut/modelzoo/video_inference.py        | 84 ++++---------------
 .../modelzoo/inference.py                     | 50 ++++++++---
 2 files changed, 56 insertions(+), 78 deletions(-)

diff --git a/deeplabcut/modelzoo/video_inference.py b/deeplabcut/modelzoo/video_inference.py
index 46c4fdbfd8..46ed21e7d3 100644
--- a/deeplabcut/modelzoo/video_inference.py
+++ b/deeplabcut/modelzoo/video_inference.py
@@ -231,8 +231,8 @@ def video_inference_superanimal(
             https://pytorch.org/vision/stable/models/faster_rcnn.html
 
     (Model Explanation) SuperAnimal-Bird:
-    `superanimal_superbird` model aims to work on various bird species. It was developed 
-    during the 2024 DLC AI Residency Program. More info can be 
+    `superanimal_superbird` model aims to work on various bird species. It was developed
+    during the 2024 DLC AI Residency Program. More info can be
     [found here](https://deeplabcut.medium.com/deeplabcut-ai-residency-2024-recap-working-with-the-superanimal-bird-model-and-dlc-3-0-live-e55807ca2c7c)
 
     (Model Explanation) SuperAnimal-HumanBody:
@@ -250,10 +250,12 @@ def video_inference_superanimal(
             keypoints. When selecting this variant, a `detector_name` must be set with
             one of the provided object detectors. This model uses 17 body parts in
             the COCO body7 format.
-    - We provide an object detector (PyTorch engine):
-        - `fasterrcnn_mobilenet_v3_large_fpn`
-            This is a FasterRCNN model with a MobileNet backbone, see
-            https://pytorch.org/vision/stable/models/faster_rcnn.html
+    - The following object detectors can be used:
+        - `fasterrcnn_mobilenet_v3_large_fpn` (default)
+            This is a FasterRCNN model with a MobileNet backbone
+        - `fasterrcnn_resnet50_fpn`
+        - `fasterrcnn_resnet50_fpn_v2`
+        For more info, see https://pytorch.org/vision/stable/models/faster_rcnn.html
 
     Examples (PyTorch Engine)
     --------
@@ -345,62 +347,17 @@ def video_inference_superanimal(
             pseudo_threshold,
         )
     elif framework == "pytorch":
-        if detector_name is None:
+        torchvision_detector_name = None
+        if superanimal_name != "superanimal_humanbody" and detector_name is None:
             raise ValueError(
                 "You have to specify a detector_name when using the Pytorch framework."
             )
-
-        # Special handling for superanimal_humanbody - use dedicated implementation
-        if superanimal_name == "superanimal_humanbody":
-            from deeplabcut.pose_estimation_pytorch.modelzoo.superanimal_humanbody_video_inference import (
-                analyze_videos_superanimal_humanbody,
-            )
-            
-            # Convert videos to list if needed
-            if isinstance(videos, str):
-                videos = [videos]
-            
-            # Set destination folder
-            if dest_folder is None:
-                dest_folder = Path(videos[0]).parent
-            else:
-                dest_folder = Path(dest_folder)
-            
-            if not dest_folder.exists():
-                dest_folder.mkdir(parents=True, exist_ok=True)
-            
-            # Map parameters to the dedicated function
-            # Note: analyze_videos_superanimal_humanbody has its own parameter set
-            # Handle device parameter - convert "auto" to actual device
-            if device == "auto":
-                import torch
-                actual_device = "cuda" if torch.cuda.is_available() else "cpu"
+        elif superanimal_name == "superanimal_humanbody":
+            if detector_name is not None:
+                torchvision_detector_name = detector_name
             else:
-                actual_device = device
-            
-            dedicated_kwargs = {
-                "videotype": videotype,
-                "destfolder": str(dest_folder),
-                "bbox_threshold": bbox_threshold,
-                "pose_threshold": pcutoff,
-                "device": actual_device,
-                "cropping": cropping,
-                "batch_size": batch_size,
-                "detector_batch_size": detector_batch_size,
-            }
-            
-            # Use a dummy config path since the dedicated function loads its own config
-            dummy_config = "superanimal_humanbody"
-            
-            results = analyze_videos_superanimal_humanbody(
-                dummy_config,
-                videos,
-                **dedicated_kwargs,
-            )
-            
-            return results
+                torchvision_detector_name = "fasterrcnn_mobilenet_v3_large_fpn"
 
-        # Standard PyTorch implementation for other models
         from deeplabcut.pose_estimation_pytorch.modelzoo.inference import (
             _video_inference_superanimal,
         )
@@ -411,7 +368,7 @@ def video_inference_superanimal(
             config = load_super_animal_config(
                 super_animal=superanimal_name,
                 model_name=model_name,
-                detector_name=detector_name,
+                detector_name=detector_name if superanimal_name != "superanimal_humanbody" else None,
             )
 
         pose_model_path = customized_pose_checkpoint
@@ -422,23 +379,17 @@ def video_inference_superanimal(
             )
 
         detector_path = customized_detector_checkpoint
-        if detector_path is None:
+        if detector_path is None and superanimal_name != "superanimal_humanbody":
             detector_path = get_super_animal_snapshot_path(
                 dataset=superanimal_name,
                 model_name=detector_name,
             )
 
         dlc_scorer = get_super_animal_scorer(
-            superanimal_name, pose_model_path, detector_path
+            superanimal_name, pose_model_path, detector_path, torchvision_detector_name
         )
 
-        # Add superanimal_name to config metadata for all superanimal models (needed for detector routing)
-        if "metadata" not in config:
-            config["metadata"] = {}
-        config["metadata"]["superanimal_name"] = superanimal_name
-        
         config = update_config(config, max_individuals, device)
-        
         output_suffix = "_before_adapt"
         if video_adapt:
             # the users can pass in many videos. For now, we only use one video for
@@ -638,4 +589,5 @@ def video_inference_superanimal(
             output_suffix=output_suffix,
             plot_bboxes=plot_bboxes,
             bboxes_pcutoff=bbox_threshold,
+            torchvision_detector_name=torchvision_detector_name,
         )
diff --git a/deeplabcut/pose_estimation_pytorch/modelzoo/inference.py b/deeplabcut/pose_estimation_pytorch/modelzoo/inference.py
index 27ee16d6ea..970e977865 100644
--- a/deeplabcut/pose_estimation_pytorch/modelzoo/inference.py
+++ b/deeplabcut/pose_estimation_pytorch/modelzoo/inference.py
@@ -21,7 +21,11 @@
     video_inference,
     VideoIterator,
 )
-from deeplabcut.pose_estimation_pytorch.apis.utils import get_inference_runners
+from deeplabcut.pose_estimation_pytorch.apis.utils import (
+    get_inference_runners,
+    get_pose_inference_runner,
+    get_filtered_coco_detector_inference_runner
+)
 from deeplabcut.pose_estimation_pytorch.modelzoo.utils import (
     raise_warning_if_called_directly,
 )
@@ -60,6 +64,7 @@ def _video_inference_superanimal(
     output_suffix: str = "",
     plot_bboxes: bool = True,
     bboxes_pcutoff: float = 0.9,
+    torchvision_detector_name: str | None = None,
 ) -> dict:
     """
     Perform inference on a video using a superanimal model from the model zoo specified by `superanimal_name`.
@@ -91,6 +96,7 @@ def _video_inference_superanimal(
         output_suffix: The suffix to add to output file names (e.g. _before_adapt)
         plot_bboxes: Whether to plot bounding boxes in the output video
         bboxes_pcutoff: Confidence threshold for bounding box plotting
+        torchvision_detector_name: If using a filtered torchvision detector, the torchvision model name
 
     Returns:
         results: Dictionary with the result pd.DataFrame for each video
@@ -99,16 +105,36 @@ def _video_inference_superanimal(
         Warning: If the function is called directly.
     """
     raise_warning_if_called_directly()
-    pose_runner, detector_runner = get_inference_runners(
-        model_config=model_cfg,
-        snapshot_path=model_snapshot_path,
-        max_individuals=max_individuals,
-        num_bodyparts=len(model_cfg["metadata"]["bodyparts"]),
-        num_unique_bodyparts=0,
-        batch_size=batch_size,
-        detector_batch_size=detector_batch_size,
-        detector_path=detector_snapshot_path,
-    )
+
+    if superanimal_name == "superanimal_humanbody":
+        if torchvision_detector_name is None:
+            torchvision_detector_name = "fasterrcnn_mobilenet_v3_large_fpn"
+        COCO_PERSON = 1  # COCO class ID for person
+        detector_runner = get_filtered_coco_detector_inference_runner(
+            model_name=torchvision_detector_name,
+            category_id=COCO_PERSON,
+            batch_size=detector_batch_size,
+            max_individuals=max_individuals,
+            model_config=model_cfg,
+        )
+        pose_runner = get_pose_inference_runner(
+            model_cfg,
+            snapshot_path=model_snapshot_path,
+            batch_size=batch_size,
+            max_individuals=max_individuals,
+        )
+    else:
+        pose_runner, detector_runner = get_inference_runners(
+            model_config=model_cfg,
+            snapshot_path=model_snapshot_path,
+            max_individuals=max_individuals,
+            num_bodyparts=len(model_cfg["metadata"]["bodyparts"]),
+            num_unique_bodyparts=0,
+            batch_size=batch_size,
+            detector_batch_size=detector_batch_size,
+            detector_path=detector_snapshot_path,
+        )
+
     results = {}
 
     if isinstance(video_paths, str):
@@ -124,7 +150,7 @@ def _video_inference_superanimal(
         print(f"Processing video {video_path}")
 
         dlc_scorer = get_super_animal_scorer(
-            superanimal_name, model_snapshot_path, detector_snapshot_path
+            superanimal_name, model_snapshot_path, detector_snapshot_path, torchvision_detector_name
         )
 
         output_prefix = f"{Path(video_path).stem}_{dlc_scorer}"

From dc511cdc8e2a785311a467a99c3e77a9d99a8928 Mon Sep 17 00:00:00 2001
From: maximpavliv <maxim.pavliv@gmail.com>
Date: Mon, 21 Jul 2025 16:07:18 +0200
Subject: [PATCH 09/34] ModelZoo tab: make humanbody general case

---
 deeplabcut/gui/tabs/modelzoo.py | 121 ++++++++++----------------------
 1 file changed, 37 insertions(+), 84 deletions(-)

diff --git a/deeplabcut/gui/tabs/modelzoo.py b/deeplabcut/gui/tabs/modelzoo.py
index ede5bf0e3e..54e74d7462 100644
--- a/deeplabcut/gui/tabs/modelzoo.py
+++ b/deeplabcut/gui/tabs/modelzoo.py
@@ -414,92 +414,45 @@ def run_video_adaptation(self):
         self.run_button.setStyleSheet("background-color: #9E9E9E; color: white; font-weight: bold;")  # Gray when disabled
         self.root._progress_bar.show()
         try:
-            # Use dedicated function for superanimal_humanbody
-            if supermodel_name == "superanimal_humanbody":
-                # Download config from HuggingFace (needed for the dedicated function)
-                from deeplabcut.pose_estimation_pytorch.modelzoo.utils import get_snapshot_folder_path
-                import huggingface_hub
-                
-                model_files = get_snapshot_folder_path()
-                model_files.mkdir(exist_ok=True)
-                
-                # Download config file from HuggingFace
-                config_path = Path(
-                    huggingface_hub.hf_hub_download(
-                        "DeepLabCut/HumanBody",
-                        "rtmpose-x_simcc-body7_pytorch_config.yaml",
-                        local_dir=model_files,
-                    )
+            # Use standard function for other models
+            if can_run_in_background:
+                func = partial(
+                    deeplabcut.video_inference_superanimal,
+                    files,
+                    supermodel_name,
+                    dest_folder=self._destfolder,
+                    **kwargs,
                 )
-                
-                # Map GUI parameters to dedicated function parameters
-                dedicated_kwargs = {
-                    "destfolder": self._destfolder,
-                    "bbox_threshold": kwargs.get("bbox_threshold", 0.1),
-                    "pose_threshold": kwargs.get("pseudo_threshold", 0.4),
-                    "device": "cuda" if torch.cuda.is_available() else "cpu",
-                    "detector_name": kwargs.get("detector_name", "fasterrcnn_mobilenet_v3_large_fpn"),
-                }
-                
-                if can_run_in_background:
-                    func = partial(
-                        deeplabcut.analyze_videos_superanimal_humanbody,
-                        config_path,
-                        files,
-                        **dedicated_kwargs,
-                    )
-                    self.worker, self.thread = move_to_separate_thread(func)
-                    self.worker.finished.connect(self.signal_analysis_complete)
-                    self.thread.start()
-                else:
-                    print(f"Calling analyze_videos_superanimal_humanbody with config={config_path}, kwargs={dedicated_kwargs}")
-                    results = deeplabcut.analyze_videos_superanimal_humanbody(
-                        config_path,
-                        files,
-                        **dedicated_kwargs,
-                    )
-                    # Patch: Call signal_analysis_complete for non-background execution
-                    self.signal_analysis_complete()
+                self.worker, self.thread = move_to_separate_thread(func)
+                self.worker.finished.connect(self.signal_analysis_complete)
+                self.thread.start()
             else:
-                # Use standard function for other models
-                if can_run_in_background:
-                    func = partial(
-                        deeplabcut.video_inference_superanimal,
-                        files,
-                        supermodel_name,
-                        dest_folder=self._destfolder,
-                        **kwargs,
-                    )
-                    self.worker, self.thread = move_to_separate_thread(func)
-                    self.worker.finished.connect(self.signal_analysis_complete)
-                    self.thread.start()
-                else:
-                    print(f"Calling video_inference_superanimal with kwargs={kwargs}")
-                    results = deeplabcut.video_inference_superanimal(
-                        files,
-                        supermodel_name,
-                        dest_folder=self._destfolder,
-                        **kwargs,
-                    )
-                # Check for skipped frames and show warning if needed
-                for video_path in files:
-                    try:
-                        df = results[video_path]
-                        n_processed = len(df)
-                        cap = cv2.VideoCapture(video_path)
-                        n_total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-                        cap.release()
-                        if n_processed < n_total:
-                            msg = QtWidgets.QMessageBox()
-                            msg.setIcon(QtWidgets.QMessageBox.Warning)
-                            msg.setText(f"Warning: Only {n_processed} out of {n_total} frames had detections. The output movie and results include only those frames.")
-                            msg.setWindowTitle("Partial Detections")
-                            msg.setMinimumWidth(400)
-                            msg.setStandardButtons(QtWidgets.QMessageBox.Ok)
-                            msg.exec_()
-                    except Exception as e:
-                        print(f"[GUI Warning] Could not check processed frames: {e}")
-                self.signal_analysis_complete()
+                print(f"Calling video_inference_superanimal with kwargs={kwargs}")
+                results = deeplabcut.video_inference_superanimal(
+                    files,
+                    supermodel_name,
+                    dest_folder=self._destfolder,
+                    **kwargs,
+                )
+            # Check for skipped frames and show warning if needed
+            for video_path in files:
+                try:
+                    df = results[video_path]
+                    n_processed = len(df)
+                    cap = cv2.VideoCapture(video_path)
+                    n_total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+                    cap.release()
+                    if n_processed < n_total:
+                        msg = QtWidgets.QMessageBox()
+                        msg.setIcon(QtWidgets.QMessageBox.Warning)
+                        msg.setText(f"Warning: Only {n_processed} out of {n_total} frames had detections. The output movie and results include only those frames.")
+                        msg.setWindowTitle("Partial Detections")
+                        msg.setMinimumWidth(400)
+                        msg.setStandardButtons(QtWidgets.QMessageBox.Ok)
+                        msg.exec_()
+                except Exception as e:
+                    print(f"[GUI Warning] Could not check processed frames: {e}")
+            self.signal_analysis_complete()
         except Exception as e:
             print(f"[Error] {e}")
             self.run_button.setEnabled(True)

From 479cc66c4698c1ae63d9756f1c4b77e5ce482832 Mon Sep 17 00:00:00 2001
From: maximpavliv <maxim.pavliv@gmail.com>
Date: Mon, 21 Jul 2025 16:08:12 +0200
Subject: [PATCH 10/34] get_super_animal_scorer(): add
 torchvision_detector_name arg

---
 deeplabcut/modelzoo/utils.py | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/deeplabcut/modelzoo/utils.py b/deeplabcut/modelzoo/utils.py
index 99b4c7b7f6..00de90afb9 100644
--- a/deeplabcut/modelzoo/utils.py
+++ b/deeplabcut/modelzoo/utils.py
@@ -62,17 +62,23 @@ def get_super_animal_project_cfg(super_animal: str) -> dict:
 def get_super_animal_scorer(
     super_animal: str,
     model_snapshot_path: Path,
-    detector_snapshot_path: Path | str | None,
+    detector_snapshot_path: Path | None,
+    torchvision_detector_name: str | None = None,
 ) -> str:
     """
     Args:
         super_animal: The SuperAnimal dataset on which the models were trained
         model_snapshot_path: The path for the SuperAnimal pose model snapshot
-        detector_snapshot_path: The path or name for the SuperAnimal detector, if a detector is being used.
+        detector_snapshot_path: The path for the SuperAnimal detector snapshot, if a
+            detector is being used.
+        torchvision_detector_name: The name of a pretrained COCO detector from torchvision,
+            if such a detector is used instead of a snapshot.
 
     Returns:
         The DLC scorer name to use for the given SuperAnimal models.
     """
+    if detector_snapshot_path is not None and torchvision_detector_name is not None:
+        raise ValueError("Provide only one of `detector_snapshot_path` or `torchvision_detector_name`, not both.")
     super_animal_prefix = super_animal + "_"
     # Always use model name first
     model_name = model_snapshot_path.stem if hasattr(model_snapshot_path, "stem") else str(model_snapshot_path)
@@ -81,12 +87,13 @@ def get_super_animal_scorer(
     dlc_scorer = f"{super_animal_prefix}{model_name}"
 
     # Then add detector name if provided
-    if detector_snapshot_path:
-        if isinstance(detector_snapshot_path, (str, Path)):
-            detector_name = Path(detector_snapshot_path).stem if hasattr(detector_snapshot_path, "stem") else str(detector_snapshot_path)
-            if detector_name.startswith(super_animal_prefix):
-                detector_name = detector_name[len(super_animal_prefix) :]
-            dlc_scorer += f"_{detector_name}"
+    if detector_snapshot_path is not None:
+        detector_name = detector_snapshot_path.stem
+        if detector_name.startswith(super_animal_prefix):
+            detector_name = detector_name[len(super_animal_prefix):]
+        dlc_scorer += f"_{detector_name}_"
+    elif torchvision_detector_name is not None:
+        dlc_scorer += f"_{torchvision_detector_name}_"
 
     return dlc_scorer
 

From 6a14584148a8e29eb378630a91180f90684412de Mon Sep 17 00:00:00 2001
From: maximpavliv <maxim.pavliv@gmail.com>
Date: Mon, 21 Jul 2025 16:09:12 +0200
Subject: [PATCH 11/34] Remove superanimal_humanbody_video_inference.py module

---
 deeplabcut/__init__.py                        |   1 -
 .../modelzoo/__init__.py                      |   4 -
 .../superanimal_humanbody_video_inference.py  | 487 ------------------
 3 files changed, 492 deletions(-)
 delete mode 100644 deeplabcut/pose_estimation_pytorch/modelzoo/superanimal_humanbody_video_inference.py

diff --git a/deeplabcut/__init__.py b/deeplabcut/__init__.py
index f157d06bb0..72dac1e3ce 100644
--- a/deeplabcut/__init__.py
+++ b/deeplabcut/__init__.py
@@ -60,7 +60,6 @@
 )
 
 from deeplabcut.modelzoo.video_inference import video_inference_superanimal
-from deeplabcut.pose_estimation_pytorch.modelzoo.superanimal_humanbody_video_inference import analyze_videos_superanimal_humanbody
 
 from deeplabcut.utils import (
     create_labeled_video,
diff --git a/deeplabcut/pose_estimation_pytorch/modelzoo/__init__.py b/deeplabcut/pose_estimation_pytorch/modelzoo/__init__.py
index 138ac3901c..e8232cd895 100644
--- a/deeplabcut/pose_estimation_pytorch/modelzoo/__init__.py
+++ b/deeplabcut/pose_estimation_pytorch/modelzoo/__init__.py
@@ -16,7 +16,3 @@
     get_super_animal_snapshot_path,
     load_super_animal_config,
 )
-
-from deeplabcut.pose_estimation_pytorch.modelzoo.superanimal_humanbody_video_inference import (
-    analyze_videos_superanimal_humanbody,
-)
diff --git a/deeplabcut/pose_estimation_pytorch/modelzoo/superanimal_humanbody_video_inference.py b/deeplabcut/pose_estimation_pytorch/modelzoo/superanimal_humanbody_video_inference.py
deleted file mode 100644
index e867207b10..0000000000
--- a/deeplabcut/pose_estimation_pytorch/modelzoo/superanimal_humanbody_video_inference.py
+++ /dev/null
@@ -1,487 +0,0 @@
-#!/usr/bin/env python3
-"""
-Dedicated video inference implementation for superanimal_humanbody with torchvision detector.
-This avoids modifying core functions and provides a clean, specific implementation.
-"""
-
-import cv2
-import numpy as np
-from pathlib import Path
-from typing import List, Dict, Any, Union
-import torch
-import torchvision.models.detection as detection
-from PIL import Image
-from tqdm import tqdm
-import json
-import logging
-import yaml
-import pandas as pd
-
-from deeplabcut.pose_estimation_pytorch.apis.videos import VideoIterator
-from deeplabcut.pose_estimation_pytorch.apis.utils import get_inference_runners
-from deeplabcut.pose_estimation_pytorch.modelzoo.inference import _video_inference_superanimal
-from deeplabcut.modelzoo.utils import get_super_animal_scorer, get_superanimal_colormaps
-
-
-def torchvision_detector_inference(images, threshold=0.1, device="cpu"):
-    """
-    Run the exact torchvision detector on a list of images.
-    This is the working implementation that matches the Colab code.
-    
-    Args:
-        images: list of np.ndarray or PIL.Image
-        threshold: float, detection threshold
-        device: str, device to run on
-    Returns:
-        list of dicts with 'bboxes', 'scores', and 'labels'
-    """
-    weights = detection.FasterRCNN_MobileNet_V3_Large_FPN_Weights.DEFAULT
-    detector = detection.fasterrcnn_mobilenet_v3_large_fpn(
-        weights=weights, box_score_thresh=threshold
-    )
-    detector.eval()
-    detector.to(device)
-    preprocess = weights.transforms()
-
-    results = []
-    for image in images:
-        if isinstance(image, np.ndarray):
-            image = Image.fromarray(image).convert("RGB")
-        elif not isinstance(image, Image.Image):
-            image = Image.open(image).convert("RGB")
-        
-        batch = [preprocess(image).to(device)]
-        with torch.no_grad():
-            predictions = detector(batch)[0]
-        
-        bboxes = predictions["boxes"].cpu().numpy()
-        labels = predictions["labels"].cpu().numpy()
-        scores = predictions["scores"].cpu().numpy()
-        
-        # Filter for humans (COCO class 1)
-        human_mask = labels == 1
-        human_bboxes = bboxes[human_mask]
-        human_scores = scores[human_mask]
-        human_labels = labels[human_mask]
-        
-        # Convert to xywh format
-        if len(human_bboxes) > 0:
-            human_bboxes[:, 2] -= human_bboxes[:, 0]  # width = x2 - x1
-            human_bboxes[:, 3] -= human_bboxes[:, 1]  # height = y2 - y1
-        
-        results.append({
-            "bboxes": human_bboxes,
-            "scores": human_scores,
-            "labels": human_labels
-        })
-    
-    return results
-
-
-def video_inference_superanimal_humanbody(
-    video: Union[str, Path, VideoIterator],
-    model_config: dict,
-    model_snapshot_path: Union[str, Path],
-    detector_snapshot_path: Union[str, Path] = None,
-    max_individuals: int = 1,
-    bbox_threshold: float = 0.1,
-    device: str = "cpu",
-    cropping: List[int] = None,
-    dest_folder: str = None,
-    output_suffix: str = "",
-) -> List[Dict[str, np.ndarray]]:
-    """
-    Dedicated video inference for superanimal_humanbody with torchvision detector.
-    
-    This implementation:
-    1. Uses the exact torchvision detector
-    2. Preserves detection scores and labels
-    3. Handles missing detections gracefully
-    4. Doesn't modify core DeepLabCut functions
-    
-    Args:
-        video: Video path or VideoIterator
-        model_config: Model configuration
-        model_snapshot_path: Path to pose model snapshot
-        detector_snapshot_path: Path to detector snapshot (not used, we use Colab-style detector)
-        max_individuals: Maximum number of individuals to detect
-        bbox_threshold: Detection threshold
-        device: Device to run on
-        cropping: Video cropping parameters
-        dest_folder: Output folder
-        output_suffix: Output file suffix
-        
-    Returns:
-        List of predictions for each frame
-    """
-    
-    # Initialize video iterator
-    if not isinstance(video, VideoIterator):
-        video = VideoIterator(str(video), cropping=cropping)
-    elif cropping is not None:
-        video.set_crop(cropping)
-
-    n_frames = video.get_n_frames(robust=False)
-    vid_w, vid_h = video.dimensions
-    
-    print(f"Starting superanimal_humanbody analysis of {video.video_path}")
-    print(
-        f"Video metadata: \n"
-        f"  Overall # of frames:    {n_frames}\n"
-        f"  Duration of video [s]:  {n_frames / max(1, video.fps):.2f}\n"
-        f"  fps:                    {video.fps}\n"
-        f"  resolution:             w={vid_w}, h={vid_h}\n"
-    )
-
-    # Step 1: Run Colab-style torchvision detector
-    print(f"Using torchvision detector with threshold {bbox_threshold}")
-    
-    detector_progress = tqdm(video, desc="Detector")
-    bbox_predictions = []
-    
-    for i, frame in enumerate(detector_progress):
-        result = torchvision_detector_inference(
-            images=[frame], 
-            threshold=bbox_threshold, 
-            device=device
-        )
-        bbox_predictions.extend(result)
-    
-    # Handle missing detections by padding with full-frame bboxes
-    if len(bbox_predictions) < n_frames:
-        print(f"Detector returned {len(bbox_predictions)} predictions for {n_frames} frames. Padding with full-frame bboxes.")
-        for _ in range(n_frames - len(bbox_predictions)):
-            bbox_predictions.append({
-                'bboxes': np.array([[0, 0, vid_w, vid_h]]),
-                'scores': np.array([0.0]),
-                'labels': np.array([1])
-            })
-    elif len(bbox_predictions) > n_frames:
-        print(f"Detector returned more predictions than frames. Truncating to {n_frames}.")
-        bbox_predictions = bbox_predictions[:n_frames]
-    
-    # Rename scores to bbox_scores to match DeepLabCut expectations
-    for pred in bbox_predictions:
-        if 'scores' in pred:
-            pred['bbox_scores'] = pred.pop('scores')
-    
-    video.set_context(bbox_predictions)
-
-    # Step 2: Run pose estimation
-    print(f"Running pose estimation")
-    
-    # Get pose inference runner
-    pose_runner, _ = get_inference_runners(
-        model_config=model_config,
-        snapshot_path=model_snapshot_path,
-        max_individuals=max_individuals,
-        num_bodyparts=len(model_config["metadata"]["bodyparts"]),
-        num_unique_bodyparts=len(model_config["metadata"]["unique_bodyparts"]),
-        device=device,
-        detector_path=None,  # We don't use the detector runner since we already have bboxes
-    )
-    
-    pose_progress = tqdm(video, desc="Pose")
-    predictions = []
-    
-    for i, frame in enumerate(pose_progress):
-        result = pose_runner.inference(images=[frame])
-        predictions.extend(result)
-    
-    # Add detection context back to predictions
-    for i, pred in enumerate(predictions):
-        if i < len(bbox_predictions):
-            pred['bboxes'] = bbox_predictions[i]['bboxes']
-            pred['bbox_scores'] = bbox_predictions[i]['bbox_scores']
-            if 'labels' in bbox_predictions[i]:
-                pred['bbox_labels'] = bbox_predictions[i]['labels']
-    
-    # Log detection statistics
-    frames_with_detections = sum(
-        1 for pred in predictions if (
-            'bboxes' in pred and len(pred['bboxes']) > 0 and 
-            not np.all(pred['bboxes'] == np.array([0, 0, vid_w, vid_h]))
-        )
-    )
-    logging.info(f"Detected individuals in {frames_with_detections} of {n_frames} frames")
-    
-    return predictions
-
-
-def analyze_videos_superanimal_humanbody(
-    config: str,
-    videos: Union[str, List[str]],
-    videotype: str = None,
-    shuffle: int = 1,
-    trainingsetindex: int = 0,
-    save_as_csv: bool = False,
-    in_random_order: bool = False,
-    snapshot_index: Union[int, str] = None,
-    detector_snapshot_index: Union[int, str] = None,
-    device: str = None,
-    destfolder: str = None,
-    batch_size: int = None,
-    detector_batch_size: int = None,
-    dynamic: tuple = (False, 0.5, 10),
-    ctd_conditions: dict = None,
-    ctd_tracking: bool = False,
-    top_down_dynamic: dict = None,
-    modelprefix: str = "",
-    use_shelve: bool = False,
-    robust_nframes: bool = False,
-    transform = None,
-    auto_track: bool = True,
-    n_tracks: int = None,
-    animal_names: List[str] = None,
-    calibrate: bool = False,
-    identity_only: bool = None,
-    overwrite: bool = False,
-    cropping: List[int] = None,
-    save_as_df: bool = False,
-    bbox_threshold: float = 0.1,
-    pose_threshold: float = 0.4,  # Add pose threshold parameter
-    model_snapshot_path: str = None,
-    detector_name: str = "fasterrcnn_mobilenet_v3_large_fpn",
-) -> str:
-    """
-    Wrapper function that uses the dedicated superanimal_humanbody implementation.
-    
-    This function mimics the interface of the standard analyze_videos function
-    but uses our dedicated implementation for superanimal_humanbody.
-    """
-    
-    # Load model configuration using the standard function (which handles detector config)
-    from deeplabcut.pose_estimation_pytorch.modelzoo.utils import load_super_animal_config
-    
-    # Use the standard function to get the complete config with detector
-    model_config = load_super_animal_config(
-        super_animal="superanimal_humanbody",
-        model_name="rtmpose_x",
-        detector_name=detector_name,
-        max_individuals=10,  # Default value
-        device=device
-    )
-    
-    # Use provided model snapshot path or get it from dlclibrary
-    if model_snapshot_path is None:
-        from deeplabcut.pose_estimation_pytorch.modelzoo.utils import get_super_animal_snapshot_path
-        
-        # Get the model snapshot path using dlclibrary
-        model_snapshot_path = get_super_animal_snapshot_path(
-            dataset="superanimal_humanbody",
-            model_name="rtmpose_x",
-            download=True
-        )
-    
-    # Convert videos to list
-    if isinstance(videos, str):
-        videos = [videos]
-    
-    # Set destination folder
-    if destfolder is None:
-        destfolder = Path(videos[0]).parent
-    else:
-        destfolder = Path(destfolder)
-    
-    if not destfolder.exists():
-        destfolder.mkdir(parents=True, exist_ok=True)
-    
-    results = {}
-    
-    for video_path in videos:
-        print(f"Processing video {video_path}")
-        video_name = Path(video_path).stem
-        # Use detector_name in scorer and output file names
-        dlc_scorer = get_super_animal_scorer(
-            "superanimal_humanbody", model_snapshot_path, detector_name
-        )
-        output_prefix = f"{video_name}_{dlc_scorer}"
-        output_json = destfolder / f"{output_prefix}_before_adapt.json"
-
-        if output_json.exists():
-            print(f"Predictions already exist for {video_path}, skipping inference.")
-            # Load predictions from existing JSON file
-            with open(output_json, "r") as f:
-                predictions = json.load(f)
-            results[video_path] = predictions
-        else:
-            # Run our dedicated inference
-            predictions = video_inference_superanimal_humanbody(
-                video=video_path,
-                model_config=model_config,
-                model_snapshot_path=model_snapshot_path,
-                max_individuals=len(model_config["metadata"]["individuals"]),
-                bbox_threshold=bbox_threshold,
-                device=device,
-                cropping=cropping,
-                dest_folder=str(destfolder),
-            )
-            with open(output_json, "w") as f:
-                json.dump(predictions, f, cls=NumpyEncoder, indent=2)
-            print(f"Results saved to {output_json}")
-            results[video_path] = predictions
-
-        # Always create labeled video, regardless of whether predictions already existed
-        # Create labeled video just like other superanimal_* models
-        # Note: This always runs regardless of whether predictions were loaded or newly created
-        try:
-            from deeplabcut.pose_estimation_pytorch.apis.videos import create_df_from_prediction
-            from deeplabcut.utils.make_labeled_video import create_video
-            
-            # Convert our predictions to the format expected by create_df_from_prediction
-            def convert_predictions_format(predictions, model_config):
-                """Convert our prediction format to the format expected by create_df_from_prediction."""
-                bodyparts = model_config['metadata']['bodyparts']
-                individuals = model_config['metadata'].get('individuals', ['individual_0'])
-                
-                converted_predictions = []
-                for frame_pred in predictions:
-                    # Create the expected numpy array: (num_individuals, num_bodyparts, 3)
-                    num_individuals = len(individuals)
-                    num_bodyparts = len(bodyparts)
-                    
-                    # Initialize with NaN values
-                    bodyparts_array = np.full((num_individuals, num_bodyparts, 3), np.nan)
-                    
-                    # Handle different prediction formats
-                    if 'bodyparts' in frame_pred:
-                        if isinstance(frame_pred['bodyparts'], list):
-                            # Handle list format (from JSON loading)
-                            for i, individual_preds in enumerate(frame_pred['bodyparts']):
-                                if i < num_individuals and isinstance(individual_preds, list):
-                                    for j, pred in enumerate(individual_preds):
-                                        if j < num_bodyparts and len(pred) >= 3:
-                                            bodyparts_array[i, j] = [pred[0], pred[1], pred[2]]
-                        elif isinstance(frame_pred['bodyparts'], np.ndarray):
-                            # Handle numpy array format (from fresh predictions after postprocessing)
-                            poses = frame_pred['bodyparts']
-                            if poses.shape[1] == num_bodyparts:
-                                # poses shape: (num_individuals, num_bodyparts, 3)
-                                num_detected = min(poses.shape[0], num_individuals)
-                                bodyparts_array[:num_detected] = poses[:num_detected]
-                    elif 'bodypart' in frame_pred and 'poses' in frame_pred['bodypart']:
-                        # Handle pose runner format (fresh predictions before postprocessing)
-                        poses = frame_pred['bodypart']['poses']
-                        if isinstance(poses, np.ndarray) and poses.shape[1] == num_bodyparts:
-                            # poses shape: (num_individuals, num_bodyparts, 3)
-                            num_detected = min(poses.shape[0], num_individuals)
-                            bodyparts_array[:num_detected] = poses[:num_detected]
-                    
-                    # Create the converted prediction
-                    converted_pred = {
-                        'bodyparts': bodyparts_array
-                    }
-                    
-                    # Add bbox info if available
-                    if 'bboxes' in frame_pred:
-                        converted_pred['bboxes'] = frame_pred['bboxes']
-                    if 'bbox_scores' in frame_pred:
-                        converted_pred['bbox_scores'] = frame_pred['bbox_scores']
-                    
-                    converted_predictions.append(converted_pred)
-                
-                return converted_predictions
-            
-            # Convert predictions to the expected format
-            converted_predictions = convert_predictions_format(predictions, model_config)
-            
-            # Get the proper scorer name
-            dlc_scorer = get_super_animal_scorer(
-                "superanimal_humanbody", model_snapshot_path, detector_name
-            )
-            
-            output_path = destfolder
-            output_h5 = output_path / f"{output_prefix}.h5"
-            
-            # Convert predictions to DataFrame format
-            df = create_df_from_prediction(
-                predictions=converted_predictions,
-                dlc_scorer=dlc_scorer,
-                multi_animal=True,
-                model_cfg=model_config,
-                output_path=output_path,
-                output_prefix=output_prefix,
-            )
-            
-            # Save HDF5 file
-            df.to_hdf(output_h5, key='df_with_missing', mode='w')
-            print(f"Created HDF5 file: {output_h5}")
-            
-            # Create labeled video using the same approach as other superanimal models
-            output_video = output_path / f"{output_prefix}_labeled.mp4"
-            
-            # Get colormap for humanbody
-            superanimal_colormaps = get_superanimal_colormaps()
-            colormap = superanimal_colormaps.get("superanimal_humanbody", "rainbow")
-            
-            # Load skeleton from the superanimal_humanbody.yaml config
-            skeleton_edges = None
-            try:
-                import yaml
-                import os
-                # Get the correct path to the config file using DeepLabCut's path resolution
-                from deeplabcut.utils.auxiliaryfunctions import get_deeplabcut_path
-                dlc_root_path = get_deeplabcut_path()
-                config_path = os.path.join(dlc_root_path, "modelzoo", "project_configs", "superanimal_humanbody.yaml")
-                with open(config_path, 'r') as f:
-                    config = yaml.safe_load(f)
-                skeleton_indices = config.get('skeleton', None)
-                if skeleton_indices:
-                    # Convert skeleton indices to bodypart names
-                    bodyparts = model_config['metadata']['bodyparts']
-                    skeleton_edges = []
-                    for idx1, idx2 in skeleton_indices:
-                        # Fix 1-based indexing (subtract 1 to convert to 0-based)
-                        idx1_0based = idx1 - 1 if idx1 > 0 else idx1
-                        idx2_0based = idx2 - 1 if idx2 > 0 else idx2
-                        if idx1_0based < len(bodyparts) and idx2_0based < len(bodyparts) and idx1_0based >= 0 and idx2_0based >= 0:
-                            skeleton_edges.append((bodyparts[idx1_0based], bodyparts[idx2_0based]))
-                        else:
-                            print(f"Warning: Skeleton indices {idx1}->{idx1_0based}, {idx2}->{idx2_0based} out of range for {len(bodyparts)} bodyparts")
-                    print(f"Loaded skeleton with {len(skeleton_edges)} connections")
-                else:
-                    print("No skeleton found in config, skeleton plotting will be disabled")
-            except Exception as e:
-                print(f"Could not load skeleton from config: {e}")
-                skeleton_edges = None
-            
-            # Get bbox info for video creation
-            bbox_keys_in_predictions = {"bboxes", "bbox_scores"}
-            bboxes_list = [
-                {key: value for key, value in p.items() if key in bbox_keys_in_predictions}
-                for p in predictions
-            ]
-            
-            # Get cropping info
-            bbox = cropping if cropping is not None else (0, 1920, 0, 1080)  # Default bbox
-            
-            print(f"Creating labeled video for {video_path}...")
-            create_video(
-                video_path,
-                output_h5,
-                pcutoff=pose_threshold,
-                fps=30,  # Default fps
-                bbox=bbox,
-                cmap=colormap,
-                output_path=str(output_video),
-                plot_bboxes=True,
-                bboxes_list=bboxes_list,
-                bboxes_pcutoff=bbox_threshold,
-                skeleton_edges=skeleton_edges,  # Add skeleton support
-            )
-            print(f"Labeled video created: {output_video}")
-            
-        except Exception as e:
-            print(f"[Warning] Could not create labeled video for {video_path}: {e}")
-            import traceback
-            traceback.print_exc()
-    
-    return str(destfolder)
-
-
-class NumpyEncoder(json.JSONEncoder):
-    """JSON encoder that handles numpy arrays"""
-    def default(self, obj):
-        if isinstance(obj, np.ndarray):
-            return obj.tolist()
-        return super().default(obj) 
\ No newline at end of file

From 677481228433dd27f50a193004edbd562bbfc93c Mon Sep 17 00:00:00 2001
From: maximpavliv <maxim.pavliv@gmail.com>
Date: Mon, 21 Jul 2025 17:22:04 +0200
Subject: [PATCH 12/34] Regularize get_super_animal_model_config_path()

---
 deeplabcut/pose_estimation_pytorch/modelzoo/utils.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/deeplabcut/pose_estimation_pytorch/modelzoo/utils.py b/deeplabcut/pose_estimation_pytorch/modelzoo/utils.py
index 2c1cbf66b7..ec941e49f7 100644
--- a/deeplabcut/pose_estimation_pytorch/modelzoo/utils.py
+++ b/deeplabcut/pose_estimation_pytorch/modelzoo/utils.py
@@ -40,20 +40,15 @@ def get_snapshot_folder_path() -> Path:
     return Path(auxiliaryfunctions.get_deeplabcut_path()) / "modelzoo" / "checkpoints"
 
 
-def get_super_animal_model_config_path(model_name: str, super_animal: str = None) -> Path:
+def get_super_animal_model_config_path(model_name: str) -> Path:
     """Gets the path to the configuration file for a SuperAnimal model.
 
     Args:
         model_name: The name of the model for which to get the path.
-        super_animal: The name of the SuperAnimal (used for specific model configs).
 
     Returns:
         The path to the config file for a SuperAnimal model.
     """
-    # Special case for superanimal_humanbody with rtmpose_x
-    if model_name == "rtmpose_x" and super_animal == "superanimal_humanbody":
-        return get_model_configs_folder_path() / "superanimal_humanbody_rtmpose_x.yaml"
-    
     return get_model_configs_folder_path() / f"{model_name}.yaml"
 
 

From 424ac92941fbdf30ff61c432a2b6575fc3836ca6 Mon Sep 17 00:00:00 2001
From: maximpavliv <maxim.pavliv@gmail.com>
Date: Mon, 21 Jul 2025 17:27:09 +0200
Subject: [PATCH 13/34] Regularize load_super_animal_config()

---
 .../pose_estimation_pytorch/modelzoo/utils.py | 50 +++----------------
 1 file changed, 8 insertions(+), 42 deletions(-)

diff --git a/deeplabcut/pose_estimation_pytorch/modelzoo/utils.py b/deeplabcut/pose_estimation_pytorch/modelzoo/utils.py
index ec941e49f7..e455b972cb 100644
--- a/deeplabcut/pose_estimation_pytorch/modelzoo/utils.py
+++ b/deeplabcut/pose_estimation_pytorch/modelzoo/utils.py
@@ -15,7 +15,6 @@
 
 import torch
 from dlclibrary import download_huggingface_model
-import huggingface_hub
 
 import deeplabcut.pose_estimation_pytorch.config.utils as config_utils
 from deeplabcut.core.config import read_config_as_dict
@@ -108,52 +107,19 @@ def load_super_animal_config(
     project_cfg_path = get_super_animal_project_config_path(super_animal=super_animal)
     project_config = read_config_as_dict(project_cfg_path)
 
-    # Special handling for superanimal_humanbody with rtmpose_x - download config from HuggingFace
-    if super_animal == "superanimal_humanbody" and model_name == "rtmpose_x":
-        # Download config from HuggingFace
-        model_files = get_snapshot_folder_path()
-        model_files.mkdir(exist_ok=True)
-        
-        path_model_config = Path(
-            huggingface_hub.hf_hub_download(
-                "DeepLabCut/HumanBody",
-                "rtmpose-x_simcc-body7_pytorch_config.yaml",
-                local_dir=model_files,
-            )
-        )
-        model_config = read_config_as_dict(path_model_config)
-    else:
-        # Use local config file for other models
-        model_cfg_path = get_super_animal_model_config_path(model_name=model_name, super_animal=super_animal)
-        model_config = read_config_as_dict(model_cfg_path)
-    
-    model_config = add_metadata(project_config, model_config, model_cfg_path if 'model_cfg_path' in locals() else path_model_config)
+    model_cfg_path = get_super_animal_model_config_path(model_name=model_name)
+    model_config = read_config_as_dict(model_cfg_path)
+    model_config = add_metadata(project_config, model_config, model_cfg_path)
+    model_config = update_config(model_config, max_individuals, device)
 
-    if detector_name is None:
+    if detector_name is None and super_animal != "superanimal_humanbody":
         model_config["method"] = "BU"
     else:
-        # Check if this is a torchvision detector (not in dlclibrary)
-        if super_animal == "superanimal_humanbody" and detector_name == "fasterrcnn_mobilenet_v3_large_fpn":
-            # Use torchvision detector - set method to TD and load detector config
-            model_config["method"] = "TD"
-            detector_cfg_path = get_super_animal_model_config_path(model_name=detector_name, super_animal=super_animal)
+        model_config["method"] = "TD"
+        if super_animal != "superanimal_humanbody":
+            detector_cfg_path = get_super_animal_model_config_path(model_name=detector_name)
             detector_cfg = read_config_as_dict(detector_cfg_path)
             model_config["detector"] = detector_cfg
-        else:
-            # Load detector config from dlclibrary
-            detector_cfg_path = get_super_animal_model_config_path(model_name=detector_name, super_animal=super_animal)
-            detector_cfg = read_config_as_dict(detector_cfg_path)
-            model_config["method"] = "TD"
-            model_config["detector"] = detector_cfg
-    
-    # Update config after detector is added (if any)
-    model_config = update_config(model_config, max_individuals, device)
-    
-    # Add superanimal_name to metadata for all superanimal models (needed for detector routing)
-    if "metadata" not in model_config:
-        model_config["metadata"] = {}
-    model_config["metadata"]["superanimal_name"] = super_animal
-    
     return model_config
 
 

From 04438f844ea1b53e2b23a50e25866158ad606a18 Mon Sep 17 00:00:00 2001
From: maximpavliv <maxim.pavliv@gmail.com>
Date: Mon, 21 Jul 2025 17:28:23 +0200
Subject: [PATCH 14/34] Regularize download_super_animal_snapshot()

---
 .../pose_estimation_pytorch/modelzoo/utils.py | 24 ++++++-------------
 1 file changed, 7 insertions(+), 17 deletions(-)

diff --git a/deeplabcut/pose_estimation_pytorch/modelzoo/utils.py b/deeplabcut/pose_estimation_pytorch/modelzoo/utils.py
index e455b972cb..8b54317d0a 100644
--- a/deeplabcut/pose_estimation_pytorch/modelzoo/utils.py
+++ b/deeplabcut/pose_estimation_pytorch/modelzoo/utils.py
@@ -137,25 +137,15 @@ def download_super_animal_snapshot(dataset: str, model_name: str) -> Path:
         RuntimeError if the model fails to download.
     """
     snapshot_dir = get_snapshot_folder_path()
-    full_model_name = f"{dataset}_{model_name}"
-    model_path = snapshot_dir / f"{full_model_name}.pt"
+    model_name = f"{dataset}_{model_name}"
+    model_filename = f"{model_name}.pt"
+    model_path = snapshot_dir / model_filename
 
-    # Use the full name for dlclibrary lookup (consistent with dlclibrary naming)
-    download_huggingface_model(full_model_name, target_dir=str(snapshot_dir))
-    
-    # Check if the file was downloaded with the expected name
+    download_huggingface_model(model_name, target_dir=str(snapshot_dir), rename_mapping=model_filename)
     if not model_path.exists():
-        # If not, look for the actual downloaded filename and rename it
-        if dataset == "superanimal_humanbody" and model_name == "rtmpose_x":
-            actual_file = snapshot_dir / "rtmpose-x_simcc-body7.pt"
-            if actual_file.exists():
-                actual_file.rename(model_path)
-            else:
-                raise RuntimeError(f"Failed to download {model_name} to {model_path}")
-        else:
-            raise RuntimeError(f"Failed to download {model_name} to {model_path}")
-
-    return snapshot_dir / f"{full_model_name}.pt"
+        raise RuntimeError(f"Failed to download {model_name} to {model_path}")
+
+    return snapshot_dir / f"{model_name}.pt"
 
 
 def get_gpu_memory_map():

From 256977bfa8b7f2b15c3528df8aa0724fd924a7c8 Mon Sep 17 00:00:00 2001
From: maximpavliv <maxim.pavliv@gmail.com>
Date: Mon, 21 Jul 2025 17:29:27 +0200
Subject: [PATCH 15/34] update_config(): superanimal_humanbody - compatible

---
 deeplabcut/pose_estimation_pytorch/modelzoo/utils.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/deeplabcut/pose_estimation_pytorch/modelzoo/utils.py b/deeplabcut/pose_estimation_pytorch/modelzoo/utils.py
index 8b54317d0a..9ff66b4240 100644
--- a/deeplabcut/pose_estimation_pytorch/modelzoo/utils.py
+++ b/deeplabcut/pose_estimation_pytorch/modelzoo/utils.py
@@ -190,8 +190,6 @@ def update_config(config: dict, max_individuals: int, device: str):
     Returns:
         The model configuration for a SuperAnimal-pretrained model.
     """
- 
-    
     config = config_utils.replace_default_values(
         config,
         num_bodyparts=len(config["metadata"]["bodyparts"]),
@@ -201,6 +199,7 @@ def update_config(config: dict, max_individuals: int, device: str):
     config["metadata"]["individuals"] = [f"animal{i}" for i in range(max_individuals)]
 
     config["device"] = device
-    if "detector" in config:
+    if config.get("detector", None) is not None:
         config["detector"]["device"] = device
+
     return config

From 8d4cf202a1bd2bdd65ab0e85d92f9deb08356557 Mon Sep 17 00:00:00 2001
From: maximpavliv <maxim.pavliv@gmail.com>
Date: Mon, 21 Jul 2025 17:49:48 +0200
Subject: [PATCH 16/34] Revert video_inference()

---
 .../pose_estimation_pytorch/apis/videos.py    | 38 +++++--------------
 1 file changed, 9 insertions(+), 29 deletions(-)

diff --git a/deeplabcut/pose_estimation_pytorch/apis/videos.py b/deeplabcut/pose_estimation_pytorch/apis/videos.py
index 33b807fae8..03dd703403 100644
--- a/deeplabcut/pose_estimation_pytorch/apis/videos.py
+++ b/deeplabcut/pose_estimation_pytorch/apis/videos.py
@@ -194,45 +194,25 @@ def video_inference(
 
     if detector_runner is not None:
         print(f"Running detector with batch size {detector_runner.batch_size}")
-        
-        detector_progress = tqdm(video, desc="Detector")
-        bbox_predictions = []
-        for i, frame in enumerate(detector_progress):
-            result = detector_runner.inference(images=[frame])
-            bbox_predictions.extend(result)
-        
-        # PATCH: Ensure bbox_predictions is always length n_frames
-        if len(bbox_predictions) < n_frames:
-            print(f"[PATCH] Detector returned {len(bbox_predictions)} predictions for {n_frames} frames. Padding with empty bboxes.")
-            for _ in range(n_frames - len(bbox_predictions)):
-                bbox_predictions.append({'bboxes': np.zeros((0, 4))})
-        elif len(bbox_predictions) > n_frames:
-            print(f"[PATCH] Detector returned more predictions than frames. Truncating to {n_frames}.")
-            bbox_predictions = bbox_predictions[:n_frames]
+        bbox_predictions = detector_runner.inference(images=tqdm(video))
         video.set_context(bbox_predictions)
 
     print(f"Running pose prediction with batch size {pose_runner.batch_size}")
     if shelf_writer is not None:
         shelf_writer.open()
-    
-    pose_progress = tqdm(video, desc="Pose")
-    predictions = []
-    for i, frame in enumerate(pose_progress):
-        result = pose_runner.inference(images=[frame])
-        predictions.extend(result)
-    
+
+    predictions = pose_runner.inference(images=tqdm(video), shelf_writer=shelf_writer)
     if shelf_writer is not None:
         shelf_writer.close()
 
     if shelf_writer is None and len(predictions) != n_frames:
-        frames_with_detections = sum(
-            1 for pred in predictions if (
-                ('bodyparts' in pred and pred['bodyparts'].shape[0] > 0) or
-                ('bboxes' in pred and len(pred['bboxes']) > 0)
-            )
-        )
+        tip_url = "https://deeplabcut.github.io/DeepLabCut/docs/recipes/io.html"
+        header = "#tips-on-video-re-encoding-and-preprocessing"
         logging.warning(
-            f"Only {frames_with_detections} of {n_frames} frames had detections!"
+            f"The video metadata indicates that there {n_frames} in the video, but "
+            f"only {len(predictions)} were able to be processed. This can happen if "
+            "the video is corrupted. You can try to fix the issue by re-encoding your "
+            f"video (tips on how to do that: {tip_url}{header})"
         )
 
     return predictions

From edf4f9ddf7c00c22cfb01418f479cd58abc39a29 Mon Sep 17 00:00:00 2001
From: maximpavliv <maxim.pavliv@gmail.com>
Date: Mon, 21 Jul 2025 17:50:11 +0200
Subject: [PATCH 17/34] Revert create_df_from_prediction()

---
 .../pose_estimation_pytorch/apis/videos.py    | 54 +------------------
 1 file changed, 1 insertion(+), 53 deletions(-)

diff --git a/deeplabcut/pose_estimation_pytorch/apis/videos.py b/deeplabcut/pose_estimation_pytorch/apis/videos.py
index 03dd703403..96cc879ecb 100644
--- a/deeplabcut/pose_estimation_pytorch/apis/videos.py
+++ b/deeplabcut/pose_estimation_pytorch/apis/videos.py
@@ -783,59 +783,7 @@ def create_df_from_prediction(
     output_prefix: str | Path,
     save_as_csv: bool = False,
 ) -> pd.DataFrame:
-    # Check if any predictions were made
-    if not predictions:
-        raise ValueError(
-            "No objects were detected in the video. This can happen if:\n"
-            "1. The video doesn't contain the type of objects the model was trained to detect\n"
-            "2. The objects are too small, blurry, or occluded\n"
-            "3. The detector confidence threshold is too high\n"
-            "4. The video quality is poor\n\n"
-            "Try:\n"
-            "- Using a different video with clearer objects\n"
-            "- Adjusting the detector confidence threshold\n"
-            "- Checking if the model is appropriate for your use case"
-        )
-    
-    # Check if any predictions contain valid detections (non-empty bboxes)
-    valid_predictions = []
-    for pred in predictions:
-        if "bboxes" in pred and len(pred["bboxes"]) > 0:
-            valid_predictions.append(pred)
-        elif "bodyparts" in pred and pred["bodyparts"].shape[0] > 0:
-            valid_predictions.append(pred)
-    
-    if not valid_predictions:
-        raise ValueError(
-            "No objects were detected in the video. This can happen if:\n"
-            "1. The video doesn't contain the type of objects the model was trained to detect\n"
-            "2. The objects are too small, blurry, or occluded\n"
-            "3. The detector confidence threshold is too high\n"
-            "4. The video quality is poor\n\n"
-            "Try:\n"
-            "- Using a different video with clearer objects\n"
-            "- Adjusting the detector confidence threshold\n"
-            "- Checking if the model is appropriate for your use case"
-        )
-    
-    # Ensure all predictions have the same shape by padding with zeros if needed
-    max_individuals = max(p["bodyparts"].shape[0] for p in predictions) if predictions else 0
-    num_bodyparts = predictions[0]["bodyparts"].shape[1] if predictions else 0
-    
-    # Pad all predictions to have the same number of individuals
-    padded_predictions = []
-    for p in predictions:
-        current_individuals = p["bodyparts"].shape[0]
-        if current_individuals < max_individuals:
-            # Pad with zeros for missing individuals
-            padding = np.zeros((max_individuals - current_individuals, num_bodyparts, 3))
-            padded_bodyparts = np.concatenate([p["bodyparts"][..., :3], padding], axis=0)
-        else:
-            padded_bodyparts = p["bodyparts"][..., :3]
-        padded_predictions.append(padded_bodyparts)
-    
-    pred_bodyparts = np.stack(padded_predictions)
-    
+    pred_bodyparts = np.stack([p["bodyparts"][..., :3] for p in predictions])
     pred_unique_bodyparts = None
     if len(predictions) > 0 and "unique_bodyparts" in predictions[0]:
         pred_unique_bodyparts = np.stack([p["unique_bodyparts"] for p in predictions])

From cded5817dde33eb89b1f8a9571115549ad436400 Mon Sep 17 00:00:00 2001
From: maximpavliv <maxim.pavliv@gmail.com>
Date: Mon, 21 Jul 2025 18:56:25 +0200
Subject: [PATCH 18/34] Restore CTDInferenceRunner

---
 deeplabcut/pose_estimation_pytorch/runners/inference.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/deeplabcut/pose_estimation_pytorch/runners/inference.py b/deeplabcut/pose_estimation_pytorch/runners/inference.py
index 7c38468117..be851cc44d 100644
--- a/deeplabcut/pose_estimation_pytorch/runners/inference.py
+++ b/deeplabcut/pose_estimation_pytorch/runners/inference.py
@@ -590,12 +590,13 @@ def predict(
         raw_predictions = self.model.get_predictions(outputs)
         predictions = [
             {
-                "detection": {
-                    "bboxes": item["boxes"].cpu().numpy().reshape(-1, 4),
-                    "bbox_scores": item["scores"].cpu().numpy().reshape(-1),
+                head: {
+                    pred_name: pred[b].cpu().numpy()
+                    for pred_name, pred in head_outputs.items()
                 }
+                for head, head_outputs in raw_predictions.items()
             }
-            for item in raw_predictions
+            for b in range(len(inputs))
         ]
 
         return predictions

From 7eaf923a015384ae895e4404ad766a8e90e68309 Mon Sep 17 00:00:00 2001
From: maximpavliv <maxim.pavliv@gmail.com>
Date: Mon, 21 Jul 2025 19:05:15 +0200
Subject: [PATCH 19/34] Remove TorchvisionDetectorInferenceRunner

---
 .../runners/__init__.py                       |  1 -
 .../runners/inference.py                      | 71 +------------------
 2 files changed, 1 insertion(+), 71 deletions(-)

diff --git a/deeplabcut/pose_estimation_pytorch/runners/__init__.py b/deeplabcut/pose_estimation_pytorch/runners/__init__.py
index 29ab9724c6..6d3aa5fac7 100644
--- a/deeplabcut/pose_estimation_pytorch/runners/__init__.py
+++ b/deeplabcut/pose_estimation_pytorch/runners/__init__.py
@@ -24,7 +24,6 @@
 from deeplabcut.pose_estimation_pytorch.runners.inference import (
     build_inference_runner,
     DetectorInferenceRunner,
-    TorchvisionDetectorInferenceRunner,
     InferenceRunner,
     PoseInferenceRunner,
 )
diff --git a/deeplabcut/pose_estimation_pytorch/runners/inference.py b/deeplabcut/pose_estimation_pytorch/runners/inference.py
index be851cc44d..bdb01a2f4f 100644
--- a/deeplabcut/pose_estimation_pytorch/runners/inference.py
+++ b/deeplabcut/pose_estimation_pytorch/runners/inference.py
@@ -20,7 +20,6 @@
 import numpy as np
 import torch
 import torch.nn as nn
-import torchvision
 
 import deeplabcut.pose_estimation_pytorch.post_processing.nms as nms
 import deeplabcut.pose_estimation_pytorch.runners.ctd as ctd
@@ -889,69 +888,7 @@ def inference(self, images) -> list[dict[str, np.ndarray]]:
             return super().inference(images)
 
 
-class TorchvisionDetectorInferenceRunner(DetectorInferenceRunner):
-    """Runner for torchvision detector inference that bypasses standard preprocessing"""
-    
-    def __init__(self, model: BaseDetector, **kwargs):
-        """
-        Args:
-            model: The torchvision detector to use for inference.
-            **kwargs: Inference runner kwargs.
-        """
-        super().__init__(model, **kwargs)
-        
-    def predict(
-        self, inputs: torch.Tensor, **kwargs
-    ) -> list[dict[str, dict[str, np.ndarray]]]:
-        """Makes predictions from a model input and output
-
-        Args:
-            inputs: the inputs to the model, of shape (batch_size, ...)
-
-        Returns:
-            predictions for each of the 'batch_size' inputs, made by each head
-        """
-        if self.device and "cuda" in str(self.device):
-            with torch.autocast(device_type=str(self.device)):
-                _, raw_predictions = self.model(inputs.to(self.device))
-        else:
-            _, raw_predictions = self.model(inputs.to(self.device))
-        
-        predictions = []
-        for item in raw_predictions:
-            if isinstance(item, dict) and "boxes" in item:
-                predictions.append({
-                    "detection": {
-                        "bboxes": item["boxes"].cpu().numpy().reshape(-1, 4),
-                        "bbox_scores": item["scores"].cpu().numpy().reshape(-1),
-                    }
-                })
-            else:
-                # Handle unexpected output format
-                predictions.append({
-                    "detection": {
-                        "bboxes": np.zeros((0, 4)),
-                        "bbox_scores": np.zeros(0),
-                    }
-                })
-        
         return predictions
-        
-    def inference(self, images) -> list[dict[str, np.ndarray]]:
-        """Run inference using the torchvision detector's inference method
-        
-        Args:
-            images: List of image paths, PIL Images, or numpy arrays
-            
-        Returns:
-            List of detection results with bboxes in xywh format
-        """
-        # Always use the detector's own inference method for torchvision detectors
-        if hasattr(self.model, 'inference'):
-            return self.model.inference(images)
-        else:
-            # This should never happen for torchvision detectors
-            raise RuntimeError("TorchvisionDetectorInferenceRunner requires model to have inference method")
 
 
 def build_inference_runner(
@@ -1019,13 +956,7 @@ def build_inference_runner(
                 f"The DynamicCropper can only be used for pose estimation; not object "
                 f"detection. Please turn off dynamic cropping."
             )
-        
-        # Simple check: if superanimal_humanbody, use torchvision inference
-        # Otherwise, use standard inference
-        if hasattr(model, 'superanimal_name') and model.superanimal_name == "superanimal_humanbody":
-            return TorchvisionDetectorInferenceRunner(**kwargs)
-        else:
-            return DetectorInferenceRunner(**kwargs)
+        return DetectorInferenceRunner(**kwargs)
 
     if task != Task.BOTTOM_UP:
         if dynamic is not None and not isinstance(dynamic, TopDownDynamicCropper):

From feb34d13f7ffc4280a6f2618e962be66f601acc5 Mon Sep 17 00:00:00 2001
From: maximpavliv <maxim.pavliv@gmail.com>
Date: Mon, 21 Jul 2025 19:05:52 +0200
Subject: [PATCH 20/34] Revert DetectorInferenceRunner

---
 .../runners/inference.py                      | 47 ++++---------------
 1 file changed, 9 insertions(+), 38 deletions(-)

diff --git a/deeplabcut/pose_estimation_pytorch/runners/inference.py b/deeplabcut/pose_estimation_pytorch/runners/inference.py
index bdb01a2f4f..e4a51deaf9 100644
--- a/deeplabcut/pose_estimation_pytorch/runners/inference.py
+++ b/deeplabcut/pose_estimation_pytorch/runners/inference.py
@@ -850,44 +850,15 @@ def predict(
                 _, raw_predictions = self.model(inputs.to(self.device))
         else:
             _, raw_predictions = self.model(inputs.to(self.device))
-        
-        predictions = []
-        for item in raw_predictions:
-            if isinstance(item, dict) and "boxes" in item and "scores" in item:
-                predictions.append({
-                    "detection": {
-                        "bboxes": item["boxes"].cpu().numpy().reshape(-1, 4),
-                        "bbox_scores": item["scores"].cpu().numpy().reshape(-1),
-                    }
-                })
-            else:
-                # Handle unexpected output format
-                predictions.append({
-                    "detection": {
-                        "bboxes": np.zeros((0, 4)),
-                        "bbox_scores": np.zeros(0),
-                    }
-                })
-        
-        return predictions
-    
-    def inference(self, images) -> list[dict[str, np.ndarray]]:
-        """Run inference using the detector's own inference method if available
-        
-        Args:
-            images: List of image paths, PIL Images, or numpy arrays
-            
-        Returns:
-            List of detection results with bboxes in xywh format
-        """
-        # Use the detector's own inference method if it exists
-        if hasattr(self.model, 'inference'):
-            return self.model.inference(images)
-        else:
-            # Fall back to standard inference pipeline
-            return super().inference(images)
-
-
+        predictions = [
+            {
+                "detection": {
+                    "bboxes": item["boxes"].cpu().numpy().reshape(-1, 4),
+                    "scores": item["scores"].cpu().numpy().reshape(-1),
+                }
+            }
+            for item in raw_predictions
+        ]
         return predictions
 
 

From 711a47eb7d9e7ab8e7aa4812c68fd5f74f313074 Mon Sep 17 00:00:00 2001
From: maximpavliv <maxim.pavliv@gmail.com>
Date: Mon, 21 Jul 2025 19:52:21 +0200
Subject: [PATCH 21/34] superanimal_analyze_images() - make humanbody
 compatible

---
 .../apis/analyze_images.py                    | 64 +++++++++++++------
 1 file changed, 45 insertions(+), 19 deletions(-)

diff --git a/deeplabcut/pose_estimation_pytorch/apis/analyze_images.py b/deeplabcut/pose_estimation_pytorch/apis/analyze_images.py
index 0880923177..aca51296df 100644
--- a/deeplabcut/pose_estimation_pytorch/apis/analyze_images.py
+++ b/deeplabcut/pose_estimation_pytorch/apis/analyze_images.py
@@ -35,7 +35,7 @@
     get_pose_inference_runner,
     get_scorer_name,
     get_scorer_uid,
-    parse_snapshot_index_for_analysis,
+    parse_snapshot_index_for_analysis, get_filtered_coco_detector_inference_runner,
 )
 from deeplabcut.pose_estimation_pytorch.modelzoo.utils import update_config
 from deeplabcut.pose_estimation_pytorch.task import Task
@@ -159,19 +159,30 @@ def superanimal_analyze_images(
     else:
         snapshot_path = Path(customized_pose_checkpoint)
 
-    if customized_detector_checkpoint is None:
+    detector_path = customized_detector_checkpoint
+    if detector_path is None and superanimal_name != "superanimal_humanbody":
         detector_path = modelzoo.get_super_animal_snapshot_path(
             dataset=superanimal_name,
             model_name=detector_name,
         )
-    else:
-        detector_path = Path(customized_detector_checkpoint)
+
+    filtered_detector_config = None
+    if superanimal_name == "superanimal_humanbody":
+        if detector_name is not None:
+            torchvision_detector_name = detector_name
+        else:
+            torchvision_detector_name = "fasterrcnn_mobilenet_v3_large_fpn"
+        COCO_PERSON = 1  # COCO class ID for person
+        filtered_detector_config = {
+            "torchvision_detector_name": torchvision_detector_name,
+            "category_id": COCO_PERSON,
+        }
 
     if customized_model_config is None:
         config = modelzoo.load_super_animal_config(
             super_animal=superanimal_name,
             model_name=model_name,
-            detector_name=detector_name,
+            detector_name=detector_name if superanimal_name != "superanimal_humanbody" else None,
         )
     elif isinstance(customized_model_config, (str, Path)):
         config = config_utils.read_config_as_dict(customized_model_config)
@@ -180,7 +191,7 @@ def superanimal_analyze_images(
 
     config = update_config(config, max_individuals, device)
     config["metadata"]["individuals"] = [f"animal{i}" for i in range(max_individuals)]
-    if "detector" in config:
+    if config.get("detector") is not None:
         config["detector"]["model"]["box_score_thresh"] = bbox_threshold
 
     predictions = analyze_image_folder(
@@ -191,6 +202,7 @@ def superanimal_analyze_images(
         max_individuals=max_individuals,
         device=device,
         progress_bar=progress_bar,
+        filtered_detector_config=filtered_detector_config,
     )
 
     skeleton_bodyparts = config.get("skeleton", [])
@@ -394,6 +406,7 @@ def analyze_image_folder(
     device: str | None = None,
     max_individuals: int | None = None,
     progress_bar: bool = True,
+    filtered_detector_config: dict | None = None,
 ) -> dict[str, dict[str, np.ndarray | np.ndarray]]:
     """Runs pose inference on a folder of images and returns the predictions
 
@@ -411,6 +424,8 @@ def analyze_image_folder(
         max_individuals: The maximum number of individuals to detect in each image. Set
             to the number of individuals in the project if None.
         progress_bar: Whether to display a progress bar when running inference.
+        filtered_detector_config: If using a filtered torchvision detector instead of a saved detector snapshot,
+            specify the filtered detector configuration
 
     Returns:
         A dictionary mapping each image filename to the different types of predictions
@@ -423,15 +438,11 @@ def analyze_image_folder(
         model_cfg = config_utils.read_config_as_dict(model_cfg)
 
     pose_task = Task(model_cfg["method"])
-    if pose_task == Task.TOP_DOWN and detector_path is None:
-        detector_variant = model_cfg.get("detector", {}).get("model", {}).get("variant", "")
-        # Allow torchvision detectors to be loaded without a checkpoint
-        if detector_variant not in ["fasterrcnn_mobilenet_v3_large_fpn", "fasterrcnn_resnet50_fpn_v2"]:
-            raise ValueError(
-                "A detector path must be specified for image analysis using top-down models"
-                f" Please specify the `detector_path` parameter."
-            )
-        # else: will be handled by TorchvisionDetectorAdaptor
+    if pose_task == Task.TOP_DOWN and detector_path is None and filtered_detector_config is None:
+        raise ValueError(
+            "A detector path or filtered_detector_config must be specified for image analysis using top-down models"
+            f" Please specify the `detector_path` parameter or the `filtered_detector_config` parameter."
+        )
 
     if max_individuals is None:
         max_individuals = len(model_cfg["metadata"]["individuals"])
@@ -452,6 +463,8 @@ def analyze_image_folder(
 
     image_paths = parse_images_and_image_folders(images, image_suffixes)
     pose_inputs = image_paths
+
+    detector_runner = None
     if detector_path is not None:
         logging.info(f"Running object detection with {detector_path}")
         detector_runner = get_detector_inference_runner(
@@ -460,14 +473,27 @@ def analyze_image_folder(
             device=device,
             max_individuals=max_individuals,
         )
+    elif filtered_detector_config is not None:
+        model_name = filtered_detector_config["torchvision_detector_name"]
+        category_id = filtered_detector_config["category_id"]
+
+        logging.info(f"Running object detection with filtered torchvision detector '{model_name}', category_id={category_id}")
+        detector_runner = get_filtered_coco_detector_inference_runner(
+            model_name=model_name,
+            category_id=category_id,
+            batch_size=1,
+            device=device,
+            max_individuals=max_individuals,
+            color_mode=model_cfg["data"]["colormode"],
+            model_config=model_cfg,
+        )
 
-        detector_image_paths = image_paths
-        if progress_bar:
-            detector_image_paths = tqdm(detector_image_paths)
+    if detector_runner is not None:
+        detector_image_paths = tqdm(image_paths) if progress_bar else image_paths
         bbox_predictions = detector_runner.inference(images=detector_image_paths)
         pose_inputs = list(zip(image_paths, bbox_predictions))
 
-    logging.info(f"Running pose estimation with {detector_path}")
+    logging.info(f"Running pose estimation with {snapshot_path}")
 
     if progress_bar:
         pose_inputs = tqdm(pose_inputs)

From 28e4dd046c7effb2916bc33ea14fbac369d00dc3 Mon Sep 17 00:00:00 2001
From: maximpavliv <maxim.pavliv@gmail.com>
Date: Mon, 21 Jul 2025 20:07:19 +0200
Subject: [PATCH 22/34] Revert build_predictions_dataframe()

---
 .../pose_estimation_pytorch/apis/utils.py     | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/deeplabcut/pose_estimation_pytorch/apis/utils.py b/deeplabcut/pose_estimation_pytorch/apis/utils.py
index 6751b25a61..4f0361396c 100644
--- a/deeplabcut/pose_estimation_pytorch/apis/utils.py
+++ b/deeplabcut/pose_estimation_pytorch/apis/utils.py
@@ -418,25 +418,6 @@ def build_predictions_dataframe(
     """
     image_names = []
     prediction_data = []
-    
-    # Check if this is a humanbody model by looking at the first prediction
-    if predictions:
-        first_pred = next(iter(predictions.values()))
-        if "bodyparts" in first_pred:
-            actual_num_individuals = first_pred["bodyparts"].shape[0]
-            expected_num_individuals = len(parameters.individuals)
-            
-            # For humanbody models, if the actual number of individuals differs from expected,
-            # we need to adjust the parameters to match the actual predictions
-            if actual_num_individuals != expected_num_individuals:
-                # Create adjusted parameters with the actual number of individuals
-                adjusted_individuals = [f"individual_{i}" for i in range(actual_num_individuals)]
-                parameters = PoseDatasetParameters(
-                    bodyparts=parameters.bodyparts,
-                    unique_bpts=parameters.unique_bpts,
-                    individuals=adjusted_individuals,
-                )
-    
     for image_name, image_predictions in predictions.items():
         image_data = image_predictions["bodyparts"][..., :3].reshape(-1)
         if "unique_bodyparts" in image_predictions:

From 193f935688080d852d959580fc17864b7bca3c73 Mon Sep 17 00:00:00 2001
From: maximpavliv <maxim.pavliv@gmail.com>
Date: Mon, 21 Jul 2025 20:07:48 +0200
Subject: [PATCH 23/34] Revert get_inference_runners()

---
 .../pose_estimation_pytorch/apis/utils.py     | 39 ++++---------------
 1 file changed, 7 insertions(+), 32 deletions(-)

diff --git a/deeplabcut/pose_estimation_pytorch/apis/utils.py b/deeplabcut/pose_estimation_pytorch/apis/utils.py
index 4f0361396c..958146142a 100644
--- a/deeplabcut/pose_estimation_pytorch/apis/utils.py
+++ b/deeplabcut/pose_estimation_pytorch/apis/utils.py
@@ -579,45 +579,20 @@ def get_inference_runners(
         if device == "mps":
             detector_device = "cpu"
 
-        # Get superanimal name for filtering logic
-        superanimal_name = model_config.get("metadata", {}).get("superanimal_name", "")
-
-        if detector_path is not None or "detector" in model_config:
-            if detector_path is not None:
-                detector_path = str(detector_path)
+        if detector_path is not None:
+            detector_path = str(detector_path)
             if detector_transform is None:
                 detector_transform = build_transforms(
                     model_config["detector"]["data"]["inference"]
                 )
 
-            print(f"DEBUG: Creating detector for superanimal_name: '{superanimal_name}'")
-            if superanimal_name == "superanimal_humanbody":
-                # Only for superanimal_humanbody, use torchvision detector
-                from deeplabcut.pose_estimation_pytorch.models.detectors.torchvision import TorchvisionDetectorAdaptor
-                detector_config = model_config["detector"]["model"].copy()
-                expected_fields = {
-                    "model", "weights", "num_classes", "freeze_bn_stats", "freeze_bn_weights", 
-                    "box_score_thresh", "model_kwargs", "model_name", "superanimal_name"
-                }
-                unexpected_fields = [k for k in detector_config.keys() if k not in expected_fields]
-                for field in unexpected_fields:
-                    detector_config.pop(field, None)
-                if detector_path is not None:
-                    detector_config["weights"] = None
-                detector_model = TorchvisionDetectorAdaptor(**detector_config)
-                detector_model.superanimal_name = superanimal_name
-                print(f"DEBUG: Created TorchvisionDetectorAdaptor for {superanimal_name}")
-            else:
-                # For all other superanimal models, use the original logic (pre-humanbody integration)
-                detector_config = model_config["detector"]["model"].copy()
-                pretrained = False if detector_path is not None else True
-                detector_model = DETECTORS.build(detector_config, pretrained=pretrained)
-                detector_model.superanimal_name = superanimal_name
-                print(f"DEBUG: Created custom detector from DETECTORS registry for {superanimal_name}")
-                print(f"DEBUG: Custom detector type: {type(detector_model)}")
+            detector_config = model_config["detector"]["model"]
+            if "pretrained" in detector_config:
+                detector_config["pretrained"] = False
+
             detector_runner = build_inference_runner(
                 task=Task.DETECT,
-                model=detector_model,
+                model=DETECTORS.build(detector_config),
                 device=detector_device,
                 snapshot_path=detector_path,
                 batch_size=detector_batch_size,

From 613b2ac5ba8b75dce188f42ce49b90457959030a Mon Sep 17 00:00:00 2001
From: maximpavliv <maxim.pavliv@gmail.com>
Date: Mon, 21 Jul 2025 20:42:32 +0200
Subject: [PATCH 24/34] Revert detectors/fasterRCNN.py

---
 .../models/detectors/fasterRCNN.py              | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/deeplabcut/pose_estimation_pytorch/models/detectors/fasterRCNN.py b/deeplabcut/pose_estimation_pytorch/models/detectors/fasterRCNN.py
index 8079682cb5..edfdbe8a23 100644
--- a/deeplabcut/pose_estimation_pytorch/models/detectors/fasterRCNN.py
+++ b/deeplabcut/pose_estimation_pytorch/models/detectors/fasterRCNN.py
@@ -59,17 +59,16 @@ def __init__(
 
         super().__init__(
             model=variant,
-            weights=None,  # Always pass None to ensure num_classes=2 is used
-            num_classes=2,  # Always use 2 classes for superanimal models
+            weights=("COCO_V1" if pretrained else None),
+            num_classes=None,
             freeze_bn_stats=freeze_bn_stats,
             freeze_bn_weights=freeze_bn_weights,
             box_score_thresh=box_score_thresh,
         )
 
-        if not pretrained:
-            num_classes = 2
-            in_features = self.model.roi_heads.box_predictor.cls_score.in_features
-            self.model.roi_heads.box_predictor = detection.faster_rcnn.FastRCNNPredictor(
-                in_features, num_classes
-            )
-            
+        # Modify the base predictor to output the correct number of classes
+        num_classes = 2
+        in_features = self.model.roi_heads.box_predictor.cls_score.in_features
+        self.model.roi_heads.box_predictor = detection.faster_rcnn.FastRCNNPredictor(
+            in_features, num_classes
+        )

From d848741b7ae73c64475ba588a5109399408cf10d Mon Sep 17 00:00:00 2001
From: maximpavliv <maxim.pavliv@gmail.com>
Date: Mon, 21 Jul 2025 20:43:10 +0200
Subject: [PATCH 25/34] Revert detectors/torchvision.py

---
 .../models/detectors/torchvision.py           | 245 +-----------------
 1 file changed, 11 insertions(+), 234 deletions(-)

diff --git a/deeplabcut/pose_estimation_pytorch/models/detectors/torchvision.py b/deeplabcut/pose_estimation_pytorch/models/detectors/torchvision.py
index eb91bdbdf3..6c700377f7 100644
--- a/deeplabcut/pose_estimation_pytorch/models/detectors/torchvision.py
+++ b/deeplabcut/pose_estimation_pytorch/models/detectors/torchvision.py
@@ -13,9 +13,6 @@
 
 import torch
 import torchvision.models.detection as detection
-import numpy as np
-from PIL import Image
-import torchvision
 
 from deeplabcut.pose_estimation_pytorch.models.detectors.base import (
     BaseDetector,
@@ -51,58 +48,38 @@ class TorchvisionDetectorAdaptor(BaseDetector):
         freeze_bn_weights: Whether to freeze weights for BatchNorm layers.
         box_score_thresh: during inference, only return proposals with a classification
             score greater than box_score_thresh
-        model_name: Optional name of the model
-        superanimal_name: Optional name of the superanimal model
     """
 
     def __init__(
         self,
-        model: str = "fasterrcnn_resnet50_fpn_v2",
+        model: str,
         weights: str | None = None,
         num_classes: int | None = 2,
         freeze_bn_stats: bool = False,
         freeze_bn_weights: bool = False,
         box_score_thresh: float = 0.01,
         model_kwargs: dict | None = None,
-        model_name: str | None = None,
-        superanimal_name: str | None = None,
     ) -> None:
         super().__init__(
             freeze_bn_stats=freeze_bn_stats,
             freeze_bn_weights=freeze_bn_weights,
             pretrained=weights is not None,
         )
-        self.model_name = model_name
-        self.superanimal_name = superanimal_name
 
+        # Load the model
         model_fn = getattr(detection, model)
         if model_kwargs is None:
             model_kwargs = {}
 
-        # Get the proper weights class
-        if weights == "COCO_V1" or weights is None:
-            if model == "fasterrcnn_mobilenet_v3_large_fpn":
-                weights = detection.FasterRCNN_MobileNet_V3_Large_FPN_Weights.DEFAULT
-            elif model == "fasterrcnn_resnet50_fpn_v2" and self.superanimal_name == "superanimal_humanbody":
-                weights = detection.FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT
-            else:
-                weights = None
-
-        if weights is not None:
-            self.model = model_fn(
-                weights=weights,
-                box_score_thresh=box_score_thresh,
-                **model_kwargs,
-            )
-        else:
-            self.model = model_fn(
-                weights=weights,
-                box_score_thresh=box_score_thresh,
-                num_classes=num_classes,
-                **model_kwargs,
-            )
+        self.model = model_fn(
+            weights=weights,
+            box_score_thresh=box_score_thresh,
+            num_classes=num_classes,
+            **model_kwargs,
+        )
 
-        self.transforms = weights.transforms() if weights is not None else None
+        # See source:  https://stackoverflow.com/a/65347721
+        self.model.eager_outputs = lambda losses, detections: (losses, detections)
 
     def forward(
         self, x: torch.Tensor, targets: list[dict[str, torch.Tensor]] | None = None
@@ -118,152 +95,7 @@ def forward(
             losses: {'loss_name': loss_value}
             detections: for each of the b images, {"boxes": bounding_boxes}
         """
-        result = self.model(x, targets)
-        
-        # Handle different return formats from torchvision models
-        if isinstance(result, tuple):
-            if len(result) == 2:
-                # Standard format: (losses, predictions)
-                return result
-            elif len(result) > 2:
-                # Some models return additional values, take first two
-                return result[0], result[1]
-            else:
-                # Single value, assume it's predictions
-                # Return zero loss tensor for training compatibility
-                device = x.device
-                dummy_loss = torch.tensor(0.0, device=device, requires_grad=True)
-                return {"total_loss": dummy_loss}, result[0]
-        else:
-            # Single value, assume it's predictions
-            # Return zero loss tensor for training compatibility
-            device = x.device
-            dummy_loss = torch.tensor(0.0, device=device, requires_grad=True)
-            return {"total_loss": dummy_loss}, result
-
-    def inference(self, images) -> list[dict[str, np.ndarray]]:
-        """
-        Run inference on images using the torchvision detector
-        
-        Args:
-            images: List of PIL Images or numpy arrays
-            
-        Returns:
-            List of detection results, each containing "bboxes" in xywh format
-        """
-
-
-        self.model.eval()
-        device = next(self.model.parameters()).device
-        
-        results = []
-        
-        with torch.no_grad():
-            for i, image in enumerate(images):
-                # Convert to PIL Image if needed
-                if isinstance(image, np.ndarray):
-                    image = Image.fromarray(image).convert("RGB")
-                elif not isinstance(image, Image.Image):
-                    image = Image.open(image).convert("RGB")
-                
-                # Apply proper preprocessing
-                if self.transforms is not None:
-                    batch = [self.transforms(image).to(device)]
-                else:
-                    # For SSD models, we need specific preprocessing
-                    if hasattr(self.model, 'roi_heads'):
-                        # FasterRCNN preprocessing
-                        import torchvision.transforms as transforms
-                        preprocess = transforms.Compose([
-                            transforms.ToTensor(),
-                            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
-                        ])
-                    else:
-                        # SSD preprocessing - resize to 320x320
-                        import torchvision.transforms as transforms
-                        preprocess = transforms.Compose([
-                            transforms.Resize((320, 320)),
-                            transforms.ToTensor(),
-                            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
-                        ])
-                    batch = [preprocess(image).to(device)]
-                
-                # Run detection - call model directly without going through forward method
-                # Check if model has roi_heads (FasterRCNN) or not (SSD)
-                if hasattr(self.model, 'roi_heads'):
-                    pass  # Model has roi_heads (FasterRCNN)
-                else:
-                    pass  # Model is SSD - no roi_heads attribute
-                
-                # Call the underlying torchvision model directly for inference
-                predictions = self.model(batch)
-                
-                # Handle the output format - during inference, should be list of dicts
-                if isinstance(predictions, (list, tuple)) and len(predictions) > 0:
-                    prediction = predictions[0]  # First image
-                else:
-                    prediction = predictions
-                
-                # Check if predictions are empty due to threshold
-                if isinstance(prediction, dict) and len(prediction) > 0:
-                    if 'scores' in prediction:
-                        print(f"DEBUG: Max score: {prediction['scores'].max() if len(prediction['scores']) > 0 else 'No scores'}")
-                        # Check if model has roi_heads (FasterRCNN) or not (SSD)
-                        # Skip threshold check for SSD models that don't have roi_heads
-                
-                if not isinstance(prediction, dict) or "boxes" not in prediction:
-                    # Unexpected output, return empty
-                    results.append({
-                        "bboxes": np.zeros((0, 4)),
-                        "bbox_scores": np.zeros(0)
-                    })
-                    continue
-
-                bboxes = prediction["boxes"].cpu().numpy()
-                labels = prediction["labels"].cpu().numpy()
-                scores = prediction["scores"].cpu().numpy()
-
-                # Handle empty detections
-                if len(bboxes) == 0:
-                    detected_bboxes = np.zeros((0, 4))
-                    detected_scores = np.zeros(0)
-                else:
-                    # For humanbody models, filter for humans (COCO class 1)
-                    # For quadruped and other models, return all detections
-                    if self.superanimal_name == 'superanimal_humanbody':
-                        detection_mask = labels == 1
-                    else:
-                        detection_mask = np.ones(len(bboxes), dtype=bool)
-                    detected_bboxes = bboxes[detection_mask]
-                    detected_scores = scores[detection_mask]
-                    detected_labels = labels[detection_mask]
-
-                # Convert to xywh format
-                if len(detected_bboxes) > 0:
-                    # Convert from (x1, y1, x2, y2) to (x, y, w, h)
-                    detected_bboxes[:, 2] -= detected_bboxes[:, 0]  # width = x2 - x1
-                    detected_bboxes[:, 3] -= detected_bboxes[:, 1]  # height = y2 - y1
-                    
-                    # Sort by confidence and keep top detections
-                    sorted_indices = np.argsort(detected_scores)[::-1]
-                    detected_bboxes = detected_bboxes[sorted_indices]
-                    detected_scores = detected_scores[sorted_indices]
-                    
-                    # Limit to reasonable number of detections
-                    max_detections = 10
-                    if len(detected_bboxes) > max_detections:
-                        detected_bboxes = detected_bboxes[:max_detections]
-                        detected_scores = detected_scores[:max_detections]
-                else:
-                    detected_bboxes = np.zeros((0, 4))
-                    detected_scores = np.zeros(0)
-
-                results.append({
-                    "bboxes": detected_bboxes,
-                    "bbox_scores": detected_scores
-                })
-        
-        return results
+        return self.model(x, targets)
 
     def get_target(self, labels: dict) -> list[dict[str, torch.Tensor]]:
         """
@@ -328,58 +160,3 @@ def get_target(self, labels: dict) -> list[dict[str, torch.Tensor]]:
             )
 
         return res
-
-def torchvision_detector_inference(images, threshold=0.1, device="cpu"): 
-    """
-    Run the Colab-style torchvision detector on a list of images.
-    Args:
-        images: list of np.ndarray or PIL.Image
-        threshold: float, detection threshold
-        device: str, device to run on
-    Returns:
-        list of dicts with 'bboxes' and 'scores'
-    """
-    import torchvision.models.detection as detection
-    from PIL import Image
-    import numpy as np
-    import torch
-
-    # Use the exact working logic from colab_style_detector_test.py
-    weights = detection.FasterRCNN_MobileNet_V3_Large_FPN_Weights.DEFAULT
-    detector = detection.fasterrcnn_mobilenet_v3_large_fpn(
-        weights=weights, box_score_thresh=threshold
-    )
-    detector.eval()
-    detector.to(device)
-    preprocess = weights.transforms()
-
-    results = []
-    for image in images:
-        if isinstance(image, np.ndarray):
-            image = Image.fromarray(image).convert("RGB")
-        elif not isinstance(image, Image.Image):
-            image = Image.open(image).convert("RGB")
-        
-        batch = [preprocess(image).to(device)]
-        with torch.no_grad():
-            predictions = detector(batch)[0]
-        
-        bboxes = predictions["boxes"].cpu().numpy()
-        labels = predictions["labels"].cpu().numpy()
-        scores = predictions["scores"].cpu().numpy()
-        
-        # Filter for humans (COCO class 1)
-        human_mask = labels == 1
-        human_bboxes = bboxes[human_mask]
-        human_scores = scores[human_mask]
-        
-        # Convert to xywh format
-        if len(human_bboxes) > 0:
-            human_bboxes[:, 2] -= human_bboxes[:, 0]
-            human_bboxes[:, 3] -= human_bboxes[:, 1]
-        
-        results.append({
-            "bboxes": human_bboxes,
-            "scores": human_scores
-        })
-    return results

From bba86890a4ed252e48801702f2ab062492ae6668 Mon Sep 17 00:00:00 2001
From: maximpavliv <maxim.pavliv@gmail.com>
Date: Mon, 21 Jul 2025 21:05:22 +0200
Subject: [PATCH 26/34] Revert base Runner

---
 .../pose_estimation_pytorch/runners/base.py   | 38 +------------------
 1 file changed, 1 insertion(+), 37 deletions(-)

diff --git a/deeplabcut/pose_estimation_pytorch/runners/base.py b/deeplabcut/pose_estimation_pytorch/runners/base.py
index c5fae450c2..ee8dbd6d1a 100644
--- a/deeplabcut/pose_estimation_pytorch/runners/base.py
+++ b/deeplabcut/pose_estimation_pytorch/runners/base.py
@@ -114,43 +114,7 @@ def load_snapshot(
             The content of the snapshot file.
         """
         snapshot = attempt_snapshot_load(snapshot_path, device, weights_only)
-        
-        # Handle the case where snapshot keys have 'model.' prefix
-        snapshot_weights = snapshot["model"]
-        model_state_dict = model.state_dict()
-        
-        # Diagnostic: Always add 'model.' prefix for superanimal_topviewmouse detectors
-        is_topviewmouse = hasattr(model, 'superanimal_name') and getattr(model, 'superanimal_name', None) == 'superanimal_topviewmouse'
-        is_detector = 'FasterRCNN' in str(type(model)) or 'SSDLite' in str(type(model))
-        if is_topviewmouse and is_detector:
-            print(f"DEBUG: Forcing prefix ADD for superanimal_topviewmouse detector!")
-            cleaned_weights = {}
-            for key, value in snapshot_weights.items():
-                if not key.startswith('model.'):
-                    cleaned_key = 'model.' + key  # Add 'model.' prefix
-                    cleaned_weights[cleaned_key] = value
-                else:
-                    cleaned_weights[key] = value
-            print(f"DEBUG: Loading cleaned weights with {len(cleaned_weights)} keys")
-            model.load_state_dict(cleaned_weights)
-        elif (any(key.startswith('model.') for key in snapshot_weights.keys()) and 
-            not any(key.startswith('model.') for key in model_state_dict.keys())):
-            print(f"DEBUG: Detected 'model.' prefix mismatch, cleaning keys...")
-            # Strip the 'model.' prefix from snapshot keys
-            cleaned_weights = {}
-            for key, value in snapshot_weights.items():
-                if key.startswith('model.'):
-                    cleaned_key = key[6:]  # Remove 'model.' prefix
-                    cleaned_weights[cleaned_key] = value
-                else:
-                    cleaned_weights[key] = value
-            print(f"DEBUG: Loading cleaned weights with {len(cleaned_weights)} keys")
-            model.load_state_dict(cleaned_weights)
-        else:
-            print(f"DEBUG: No prefix mismatch, loading original weights")
-            # Use original snapshot weights
-            model.load_state_dict(snapshot["model"])
-        
+        model.load_state_dict(snapshot["model"])
         return snapshot
 
 

From 25fa08dfc43dd69db2ee0d94996341bb8ebdd056 Mon Sep 17 00:00:00 2001
From: maximpavliv <maxim.pavliv@gmail.com>
Date: Thu, 31 Jul 2025 13:27:05 +0200
Subject: [PATCH 27/34] Fix superanimal_humanbody unit test

---
 test_superanimal_humanbody.py                 | 71 -------------------
 ...test_filtered_detector_inference_runner.py | 54 ++++++++++++++
 2 files changed, 54 insertions(+), 71 deletions(-)
 delete mode 100644 test_superanimal_humanbody.py
 create mode 100644 tests/pose_estimation_pytorch/runners/test_filtered_detector_inference_runner.py

diff --git a/test_superanimal_humanbody.py b/test_superanimal_humanbody.py
deleted file mode 100644
index d5b9d8af75..0000000000
--- a/test_superanimal_humanbody.py
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/usr/bin/env python3
-"""
-Test script for superanimal_humanbody with torchvision detector
-"""
-
-import torch
-import torchvision.models.detection as detection
-from deeplabcut.pose_estimation_pytorch.modelzoo import load_super_animal_config
-
-def test_torchvision_detector():
-    """Test that the torchvision detector works with superanimal_humanbody"""
-    
-    # Load the superanimal_humanbody config
-    config = load_super_animal_config(
-        super_animal="superanimal_humanbody",
-        model_name="rtmpose_x",
-        detector_name="fasterrcnn_mobilenet_v3_large_fpn",
-    )
-    
-    print("Config loaded successfully!")
-    print(f"Model method: {config['method']}")
-    print(f"Detector variant: {config['detector']['model']['variant']}")
-    
-    # Check if the detector is configured to use torchvision
-    detector_config = config['detector']['model']
-    print(f"Detector config: {detector_config}")
-    
-    # Test loading the torchvision detector directly
-    print("\nTesting torchvision detector loading...")
-    weights = detection.FasterRCNN_MobileNet_V3_Large_FPN_Weights.DEFAULT
-    detector = detection.fasterrcnn_mobilenet_v3_large_fpn(
-        weights=weights, box_score_thresh=0.6,
-    )
-    detector.eval()
-    print("Torchvision detector loaded successfully!")
-    
-    # Test that the detector config matches what we expect for torchvision
-    print("\nTesting detector config compatibility...")
-    expected_variant = "fasterrcnn_mobilenet_v3_large_fpn"
-    actual_variant = detector_config.get("variant", "")
-    
-    if actual_variant == expected_variant:
-        print(f"✅ Detector variant matches expected: {expected_variant}")
-    else:
-        print(f"❌ Detector variant mismatch. Expected: {expected_variant}, Got: {actual_variant}")
-        return False
-    
-    # Test that the config has the correct structure for torchvision detector
-    if "type" in detector_config and detector_config["type"] == "FasterRCNN":
-        print("✅ Detector type is correctly set to FasterRCNN")
-    else:
-        print("❌ Detector type is not correctly set")
-        return False
-    
-    # Test that the config allows for torchvision weights (no pretrained field or pretrained=False)
-    if "pretrained" not in detector_config or detector_config.get("pretrained") is False:
-        print("✅ Detector config allows torchvision weights")
-    else:
-        print("❌ Detector config has pretrained=True, which may conflict with torchvision weights")
-        return False
-    
-    print("\n✅ All tests passed! The torchvision detector integration is working correctly.")
-    return True
-
-if __name__ == "__main__":
-    print("Testing superanimal_humanbody with torchvision detector...")
-    success = test_torchvision_detector()
-    if success:
-        print("\n✅ Test passed! The torchvision detector works with superanimal_humanbody")
-    else:
-        print("\n❌ Test failed! There's an issue with the torchvision detector integration") 
\ No newline at end of file
diff --git a/tests/pose_estimation_pytorch/runners/test_filtered_detector_inference_runner.py b/tests/pose_estimation_pytorch/runners/test_filtered_detector_inference_runner.py
new file mode 100644
index 0000000000..8fa3242850
--- /dev/null
+++ b/tests/pose_estimation_pytorch/runners/test_filtered_detector_inference_runner.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python3
+"""
+Test script for superanimal_humanbody with torchvision detector
+"""
+
+from deeplabcut.pose_estimation_pytorch.apis.utils import TORCHVISION_DETECTORS, \
+    get_filtered_coco_detector_inference_runner
+from deeplabcut.pose_estimation_pytorch.models.detectors.filtered_detector import FilteredDetector
+from deeplabcut.pose_estimation_pytorch.modelzoo import load_super_animal_config
+
+def test_torchvision_detector():
+    """Test that the torchvision detector works with superanimal_humanbody"""
+    for detector_name in TORCHVISION_DETECTORS:
+
+        # Load the superanimal_humanbody config
+        superanimal_config = load_super_animal_config(
+            super_animal="superanimal_humanbody",
+            model_name="rtmpose_x",
+            detector_name=detector_name,
+        )
+        print("Config loaded successfully!")
+
+        # Test loading the torchvision detector directly
+        print("\nTesting torchvision detector loading...")
+        entry = TORCHVISION_DETECTORS[detector_name]
+        weights = entry["weights"]
+        coco_detector = entry["fn"](weights=weights, box_score_thresh=0.6)
+        coco_detector.eval()
+        print("Torchvision detector loaded successfully!")
+
+        # Test loading the FilteredDetector
+        COCO_PERSON = 1  # COCO class ID for person
+        person_detector = FilteredDetector(coco_detector, class_id=COCO_PERSON)
+        person_detector.eval()
+        print("Filtered detector loaded successfully!")
+
+        _ = get_filtered_coco_detector_inference_runner(
+            model_name=detector_name,
+            category_id=COCO_PERSON,
+            batch_size=1,
+            model_config=superanimal_config,
+        )
+        print("Filtered detector runner created successfully!")
+
+    print("\n✅ All tests passed! The torchvision detector integration is working correctly.")
+    return True
+
+if __name__ == "__main__":
+    print("Testing superanimal_humanbody with torchvision detector...")
+    success = test_torchvision_detector()
+    if success:
+        print("\n✅ Test passed! The torchvision detector works with superanimal_humanbody")
+    else:
+        print("\n❌ Test failed! There's an issue with the torchvision detector integration") 
\ No newline at end of file

From 425484b5d24c4b2365e10add9b315e998571db77 Mon Sep 17 00:00:00 2001
From: maximpavliv <maxim.pavliv@gmail.com>
Date: Thu, 31 Jul 2025 13:27:33 +0200
Subject: [PATCH 28/34] Disable video adaptation for superanimal_humanbody

---
 deeplabcut/modelzoo/video_inference.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/deeplabcut/modelzoo/video_inference.py b/deeplabcut/modelzoo/video_inference.py
index 46ed21e7d3..339772ca6d 100644
--- a/deeplabcut/modelzoo/video_inference.py
+++ b/deeplabcut/modelzoo/video_inference.py
@@ -390,7 +390,13 @@ def video_inference_superanimal(
         )
 
         config = update_config(config, max_individuals, device)
+
         output_suffix = "_before_adapt"
+
+        if superanimal_name == "superanimal_humanbody" and video_adapt:
+            print(f"Video adaptation currently not supported for {superanimal_name}. Setting it to false.")
+            video_adapt = False
+
         if video_adapt:
             # the users can pass in many videos. For now, we only use one video for
             # video adaptation. As reported in Ye et al. 2024, one video should be
@@ -414,6 +420,7 @@ def video_inference_superanimal(
                 output_suffix=output_suffix,
                 plot_bboxes=plot_bboxes,
                 bboxes_pcutoff=bbox_threshold,
+                torchvision_detector_name=torchvision_detector_name,
             )
 
             # we prepare the pseudo dataset in the same folder of the target video

From 4b04013819ac8f07107b961dd228e6dd7f83a673 Mon Sep 17 00:00:00 2001
From: maximpavliv <maxim.pavliv@gmail.com>
Date: Thu, 31 Jul 2025 13:28:00 +0200
Subject: [PATCH 29/34] Fix testscript_superanimal_inference.py

---
 examples/testscript_superanimal_inference.py | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/examples/testscript_superanimal_inference.py b/examples/testscript_superanimal_inference.py
index ee32b3b02a..c0a042e08a 100644
--- a/examples/testscript_superanimal_inference.py
+++ b/examples/testscript_superanimal_inference.py
@@ -47,14 +47,3 @@
         videotype=".avi",
         scale_list=scale_list,
     )
-
-    print("testing superanimal_humanbody")
-    superanimal_name = "superanimal_humanbody"
-    deeplabcut.video_inference_superanimal(
-        video,
-        superanimal_name,
-        model_name="rtmpose_x",
-        detector_name="fasterrcnn_mobilenet_v3_large_fpn",
-        videotype=".avi",
-        scale_list=scale_list,
-    )

From eea470e7f1a2e2f98616d5ed685e2daa3f988ff6 Mon Sep 17 00:00:00 2001
From: maximpavliv <maxim.pavliv@gmail.com>
Date: Thu, 31 Jul 2025 13:28:18 +0200
Subject: [PATCH 30/34] Remove debug print

---
 examples/utils.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/examples/utils.py b/examples/utils.py
index 6b735a1c40..3877b335b4 100644
--- a/examples/utils.py
+++ b/examples/utils.py
@@ -256,7 +256,6 @@ def generate_video_from_images(image_dir: Path, output_video: Path) -> None:
 
 def create_fake_project(path: Path, params: SyntheticProjectParameters) -> None:
     if path.exists():
-        print(f"[DEBUG] Path exists: {path} (is_dir={path.is_dir()}, is_file={path.is_file()})")
         raise ValueError(f"Cannot create a fake project at an existing path")
 
     scorer = "synthetic"

From 3d47a36e438321a2ef8ef1200c3e440d64bd4673 Mon Sep 17 00:00:00 2001
From: maximpavliv <maxim.pavliv@gmail.com>
Date: Thu, 31 Jul 2025 13:45:23 +0200
Subject: [PATCH 31/34] Black formatting

---
 deeplabcut/modelzoo/utils.py                  |  12 +-
 deeplabcut/modelzoo/video_inference.py        |  10 +-
 .../apis/analyze_images.py                    |  25 ++--
 .../pose_estimation_pytorch/apis/utils.py     |   6 +-
 .../pose_estimation_pytorch/apis/videos.py    | 109 +++++++++++++-----
 .../data/transforms.py                        |   4 +-
 .../models/detectors/filtered_detector.py     |   2 +-
 .../modelzoo/inference.py                     |   7 +-
 .../pose_estimation_pytorch/modelzoo/utils.py |   8 +-
 .../pose_estimation_pytorch/runners/base.py   |   6 +-
 examples/utils.py                             |   1 +
 ...test_filtered_detector_inference_runner.py |  24 +++-
 12 files changed, 155 insertions(+), 59 deletions(-)

diff --git a/deeplabcut/modelzoo/utils.py b/deeplabcut/modelzoo/utils.py
index 00de90afb9..066c5f6dbc 100644
--- a/deeplabcut/modelzoo/utils.py
+++ b/deeplabcut/modelzoo/utils.py
@@ -78,10 +78,16 @@ def get_super_animal_scorer(
         The DLC scorer name to use for the given SuperAnimal models.
     """
     if detector_snapshot_path is not None and torchvision_detector_name is not None:
-        raise ValueError("Provide only one of `detector_snapshot_path` or `torchvision_detector_name`, not both.")
+        raise ValueError(
+            "Provide only one of `detector_snapshot_path` or `torchvision_detector_name`, not both."
+        )
     super_animal_prefix = super_animal + "_"
     # Always use model name first
-    model_name = model_snapshot_path.stem if hasattr(model_snapshot_path, "stem") else str(model_snapshot_path)
+    model_name = (
+        model_snapshot_path.stem
+        if hasattr(model_snapshot_path, "stem")
+        else str(model_snapshot_path)
+    )
     if model_name.startswith(super_animal_prefix):
         model_name = model_name[len(super_animal_prefix) :]
     dlc_scorer = f"{super_animal_prefix}{model_name}"
@@ -90,7 +96,7 @@ def get_super_animal_scorer(
     if detector_snapshot_path is not None:
         detector_name = detector_snapshot_path.stem
         if detector_name.startswith(super_animal_prefix):
-            detector_name = detector_name[len(super_animal_prefix):]
+            detector_name = detector_name[len(super_animal_prefix) :]
         dlc_scorer += f"_{detector_name}_"
     elif torchvision_detector_name is not None:
         dlc_scorer += f"_{torchvision_detector_name}_"
diff --git a/deeplabcut/modelzoo/video_inference.py b/deeplabcut/modelzoo/video_inference.py
index 339772ca6d..e2dfaca256 100644
--- a/deeplabcut/modelzoo/video_inference.py
+++ b/deeplabcut/modelzoo/video_inference.py
@@ -368,7 +368,11 @@ def video_inference_superanimal(
             config = load_super_animal_config(
                 super_animal=superanimal_name,
                 model_name=model_name,
-                detector_name=detector_name if superanimal_name != "superanimal_humanbody" else None,
+                detector_name=(
+                    detector_name
+                    if superanimal_name != "superanimal_humanbody"
+                    else None
+                ),
             )
 
         pose_model_path = customized_pose_checkpoint
@@ -394,7 +398,9 @@ def video_inference_superanimal(
         output_suffix = "_before_adapt"
 
         if superanimal_name == "superanimal_humanbody" and video_adapt:
-            print(f"Video adaptation currently not supported for {superanimal_name}. Setting it to false.")
+            print(
+                f"Video adaptation currently not supported for {superanimal_name}. Setting it to false."
+            )
             video_adapt = False
 
         if video_adapt:
diff --git a/deeplabcut/pose_estimation_pytorch/apis/analyze_images.py b/deeplabcut/pose_estimation_pytorch/apis/analyze_images.py
index aca51296df..05668cf8ba 100644
--- a/deeplabcut/pose_estimation_pytorch/apis/analyze_images.py
+++ b/deeplabcut/pose_estimation_pytorch/apis/analyze_images.py
@@ -35,7 +35,8 @@
     get_pose_inference_runner,
     get_scorer_name,
     get_scorer_uid,
-    parse_snapshot_index_for_analysis, get_filtered_coco_detector_inference_runner,
+    parse_snapshot_index_for_analysis,
+    get_filtered_coco_detector_inference_runner,
 )
 from deeplabcut.pose_estimation_pytorch.modelzoo.utils import update_config
 from deeplabcut.pose_estimation_pytorch.task import Task
@@ -103,7 +104,7 @@ def superanimal_analyze_images(
             The device to use to run image analysis.
 
         pose_threshold: float, default=0.4
-            The cutoff score when plotting pose predictions. To note, this is called 
+            The cutoff score when plotting pose predictions. To note, this is called
             pcutoff in other parts of the code. Must be in (0, 1).
 
         bbox_threshold: float, default=0.1
@@ -182,7 +183,9 @@ def superanimal_analyze_images(
         config = modelzoo.load_super_animal_config(
             super_animal=superanimal_name,
             model_name=model_name,
-            detector_name=detector_name if superanimal_name != "superanimal_humanbody" else None,
+            detector_name=(
+                detector_name if superanimal_name != "superanimal_humanbody" else None
+            ),
         )
     elif isinstance(customized_model_config, (str, Path)):
         config = config_utils.read_config_as_dict(customized_model_config)
@@ -211,9 +214,7 @@ def superanimal_analyze_images(
         skeleton = []
         bodyparts = config["metadata"]["bodyparts"]
         for bpt_0, bpt_1 in skeleton_bodyparts:
-            skeleton.append(
-                (bodyparts.index(bpt_0), bodyparts.index(bpt_1))
-            )
+            skeleton.append((bodyparts.index(bpt_0), bodyparts.index(bpt_1)))
 
     visualization.create_labeled_images(
         predictions=predictions,
@@ -438,7 +439,11 @@ def analyze_image_folder(
         model_cfg = config_utils.read_config_as_dict(model_cfg)
 
     pose_task = Task(model_cfg["method"])
-    if pose_task == Task.TOP_DOWN and detector_path is None and filtered_detector_config is None:
+    if (
+        pose_task == Task.TOP_DOWN
+        and detector_path is None
+        and filtered_detector_config is None
+    ):
         raise ValueError(
             "A detector path or filtered_detector_config must be specified for image analysis using top-down models"
             f" Please specify the `detector_path` parameter or the `filtered_detector_config` parameter."
@@ -459,7 +464,7 @@ def analyze_image_folder(
 
     image_suffixes = ".png", ".jpg", ".jpeg"
     if frame_type is not None:
-        image_suffixes = (frame_type, )
+        image_suffixes = (frame_type,)
 
     image_paths = parse_images_and_image_folders(images, image_suffixes)
     pose_inputs = image_paths
@@ -477,7 +482,9 @@ def analyze_image_folder(
         model_name = filtered_detector_config["torchvision_detector_name"]
         category_id = filtered_detector_config["category_id"]
 
-        logging.info(f"Running object detection with filtered torchvision detector '{model_name}', category_id={category_id}")
+        logging.info(
+            f"Running object detection with filtered torchvision detector '{model_name}', category_id={category_id}"
+        )
         detector_runner = get_filtered_coco_detector_inference_runner(
             model_name=model_name,
             category_id=category_id,
diff --git a/deeplabcut/pose_estimation_pytorch/apis/utils.py b/deeplabcut/pose_estimation_pytorch/apis/utils.py
index 958146142a..58314ccd79 100644
--- a/deeplabcut/pose_estimation_pytorch/apis/utils.py
+++ b/deeplabcut/pose_estimation_pytorch/apis/utils.py
@@ -47,7 +47,9 @@
 )
 from deeplabcut.pose_estimation_pytorch.data.transforms import build_transforms
 from deeplabcut.pose_estimation_pytorch.models import DETECTORS, PoseModel
-from deeplabcut.pose_estimation_pytorch.models.detectors.filtered_detector import FilteredDetector
+from deeplabcut.pose_estimation_pytorch.models.detectors.filtered_detector import (
+    FilteredDetector,
+)
 from deeplabcut.pose_estimation_pytorch.runners import (
     build_inference_runner,
     CTDTrackingConfig,
@@ -693,6 +695,8 @@ def get_detector_inference_runner(
         "weights": FasterRCNN_MobileNet_V3_Large_FPN_Weights.DEFAULT,
     },
 }
+
+
 def get_filtered_coco_detector_inference_runner(
     model_name: str,
     category_id: int,
diff --git a/deeplabcut/pose_estimation_pytorch/apis/videos.py b/deeplabcut/pose_estimation_pytorch/apis/videos.py
index 96cc879ecb..29d6d55225 100644
--- a/deeplabcut/pose_estimation_pytorch/apis/videos.py
+++ b/deeplabcut/pose_estimation_pytorch/apis/videos.py
@@ -396,7 +396,10 @@ def analyze_videos(
     pose_cfg = auxiliaryfunctions.read_plainconfig(pose_cfg_path)
 
     snapshot_index, detector_snapshot_index = utils.parse_snapshot_index_for_analysis(
-        loader.project_cfg, loader.model_cfg, snapshot_index, detector_snapshot_index,
+        loader.project_cfg,
+        loader.model_cfg,
+        snapshot_index,
+        detector_snapshot_index,
     )
 
     if cropping is None and loader.project_cfg.get("cropping", False):
@@ -456,10 +459,12 @@ def analyze_videos(
     except (ValueError, IndexError) as e:
         print(f"Error loading snapshot with index {snapshot_index}: {e}")
         print("Attempting to find available snapshots...")
-        
+
         # Try to get all available snapshots
         try:
-            all_snapshots = utils.get_model_snapshots("all", loader.model_folder, loader.pose_task)
+            all_snapshots = utils.get_model_snapshots(
+                "all", loader.model_folder, loader.pose_task
+            )
             if all_snapshots:
                 # Try to find a "best" snapshot first
                 best_snapshots = [s for s in all_snapshots if s.best]
@@ -469,21 +474,31 @@ def analyze_videos(
                 else:
                     # Use the last available snapshot
                     snapshot = all_snapshots[-1]
-                    print(f"No best snapshot found, using last available: {snapshot.path}")
+                    print(
+                        f"No best snapshot found, using last available: {snapshot.path}"
+                    )
             else:
                 raise FileNotFoundError(f"No snapshots found in {loader.model_folder}")
         except Exception as fallback_error:
-            raise FileNotFoundError(f"Failed to load any snapshots from {loader.model_folder}. Original error: {e}. Fallback error: {fallback_error}")
+            raise FileNotFoundError(
+                f"Failed to load any snapshots from {loader.model_folder}. Original error: {e}. Fallback error: {fallback_error}"
+            )
 
     # Additional validation for best snapshots
     if "best" in str(snapshot.path) and not snapshot.path.exists():
-        print(f"Warning: Best snapshot path {snapshot.path} does not exist. Checking for alternative snapshots...")
+        print(
+            f"Warning: Best snapshot path {snapshot.path} does not exist. Checking for alternative snapshots..."
+        )
         # Try to find any available snapshot
         try:
-            all_snapshots = utils.get_model_snapshots("all", loader.model_folder, loader.pose_task)
+            all_snapshots = utils.get_model_snapshots(
+                "all", loader.model_folder, loader.pose_task
+            )
             if all_snapshots:
                 # Try to find a different best snapshot
-                best_snapshots = [s for s in all_snapshots if s.best and s.path.exists()]
+                best_snapshots = [
+                    s for s in all_snapshots if s.best and s.path.exists()
+                ]
                 if best_snapshots:
                     snapshot = best_snapshots[0]
                     print(f"Using alternative best snapshot: {snapshot.path}")
@@ -499,7 +514,7 @@ def analyze_videos(
     # Verify the snapshot file exists
     if not snapshot.path.exists():
         raise FileNotFoundError(f"Snapshot file not found: {snapshot.path}")
-    
+
     print(f"Successfully loaded snapshot: {snapshot.path}")
 
     # Load the BU model for the conditions provider
@@ -512,7 +527,8 @@ def analyze_videos(
             )
         elif isinstance(ctd_conditions, dict):
             cond_provider = get_condition_provider(
-                condition_cfg=ctd_conditions, config=config,
+                condition_cfg=ctd_conditions,
+                config=config,
             )
         else:
             cond_provider = ctd_conditions
@@ -551,50 +567,83 @@ def analyze_videos(
                 detector_snapshot_index, loader.model_folder, Task.DETECT
             )[0]
         except (ValueError, IndexError) as e:
-            print(f"Error loading detector snapshot with index {detector_snapshot_index}: {e}")
+            print(
+                f"Error loading detector snapshot with index {detector_snapshot_index}: {e}"
+            )
             print("Attempting to find available detector snapshots...")
-            
+
             # Try to get all available detector snapshots
             try:
-                all_detector_snapshots = utils.get_model_snapshots("all", loader.model_folder, Task.DETECT)
+                all_detector_snapshots = utils.get_model_snapshots(
+                    "all", loader.model_folder, Task.DETECT
+                )
                 if all_detector_snapshots:
                     # Try to find a "best" detector snapshot first
-                    best_detector_snapshots = [s for s in all_detector_snapshots if s.best]
+                    best_detector_snapshots = [
+                        s for s in all_detector_snapshots if s.best
+                    ]
                     if best_detector_snapshots:
                         detector_snapshot = best_detector_snapshots[0]
-                        print(f"Found and using best detector snapshot: {detector_snapshot.path}")
+                        print(
+                            f"Found and using best detector snapshot: {detector_snapshot.path}"
+                        )
                     else:
                         # Use the last available detector snapshot
                         detector_snapshot = all_detector_snapshots[-1]
-                        print(f"No best detector snapshot found, using last available: {detector_snapshot.path}")
+                        print(
+                            f"No best detector snapshot found, using last available: {detector_snapshot.path}"
+                        )
                 else:
-                    raise FileNotFoundError(f"No detector snapshots found in {loader.model_folder}")
+                    raise FileNotFoundError(
+                        f"No detector snapshots found in {loader.model_folder}"
+                    )
             except Exception as fallback_error:
-                raise FileNotFoundError(f"Failed to load any detector snapshots from {loader.model_folder}. Original error: {e}. Fallback error: {fallback_error}")
+                raise FileNotFoundError(
+                    f"Failed to load any detector snapshots from {loader.model_folder}. Original error: {e}. Fallback error: {fallback_error}"
+                )
 
         # Additional validation for detector snapshots
-        if "best" in str(detector_snapshot.path) and not detector_snapshot.path.exists():
-            print(f"Warning: Best detector snapshot path {detector_snapshot.path} does not exist. Checking for alternative detector snapshots...")
+        if (
+            "best" in str(detector_snapshot.path)
+            and not detector_snapshot.path.exists()
+        ):
+            print(
+                f"Warning: Best detector snapshot path {detector_snapshot.path} does not exist. Checking for alternative detector snapshots..."
+            )
             try:
-                all_detector_snapshots = utils.get_model_snapshots("all", loader.model_folder, Task.DETECT)
+                all_detector_snapshots = utils.get_model_snapshots(
+                    "all", loader.model_folder, Task.DETECT
+                )
                 if all_detector_snapshots:
                     # Try to find a different best detector snapshot
-                    best_detector_snapshots = [s for s in all_detector_snapshots if s.best and s.path.exists()]
+                    best_detector_snapshots = [
+                        s for s in all_detector_snapshots if s.best and s.path.exists()
+                    ]
                     if best_detector_snapshots:
                         detector_snapshot = best_detector_snapshots[0]
-                        print(f"Using alternative best detector snapshot: {detector_snapshot.path}")
+                        print(
+                            f"Using alternative best detector snapshot: {detector_snapshot.path}"
+                        )
                     else:
                         # Use the last available detector snapshot
                         detector_snapshot = all_detector_snapshots[-1]
-                        print(f"Using alternative detector snapshot: {detector_snapshot.path}")
+                        print(
+                            f"Using alternative detector snapshot: {detector_snapshot.path}"
+                        )
                 else:
-                    raise FileNotFoundError(f"No detector snapshots found in {loader.model_folder}")
+                    raise FileNotFoundError(
+                        f"No detector snapshots found in {loader.model_folder}"
+                    )
             except Exception as e:
-                raise FileNotFoundError(f"Failed to find alternative detector snapshots: {e}")
+                raise FileNotFoundError(
+                    f"Failed to find alternative detector snapshots: {e}"
+                )
 
         # Verify the detector snapshot file exists
         if not detector_snapshot.path.exists():
-            raise FileNotFoundError(f"Detector snapshot file not found: {detector_snapshot.path}")
+            raise FileNotFoundError(
+                f"Detector snapshot file not found: {detector_snapshot.path}"
+            )
 
         print(f"  -> Using detector {detector_snapshot.path}")
         detector_runner = utils.get_detector_inference_runner(
@@ -610,7 +659,7 @@ def analyze_videos(
     # Reading video and init variables
     videos = utils.list_videos_in_folder(videos, videotype, shuffle=in_random_order)
     h5_files_created = False  # Track if any .h5 files were created
-    
+
     for video in videos:
         if destfolder is None:
             output_path = video.parent
@@ -706,7 +755,9 @@ def analyze_videos(
                     for i in range(num_frames):
                         frame_data = full_data.get("frame" + str(i).zfill(str_width))
                         if frame_data is None:
-                            pose = np.full((len(individuals), len(bodyparts), 3), np.nan)
+                            pose = np.full(
+                                (len(individuals), len(bodyparts), 3), np.nan
+                            )
                             ctd_predictions.append(dict(bodyparts=pose))
                             continue
 
diff --git a/deeplabcut/pose_estimation_pytorch/data/transforms.py b/deeplabcut/pose_estimation_pytorch/data/transforms.py
index 7a296525c1..cb321a0aca 100644
--- a/deeplabcut/pose_estimation_pytorch/data/transforms.py
+++ b/deeplabcut/pose_estimation_pytorch/data/transforms.py
@@ -127,7 +127,7 @@ def build_transforms(augmentations: dict) -> A.BaseCompose:
             noise = 0.05 * 255
         transforms.append(
             A.GaussNoise(
-                var_limit=(0, noise ** 2),
+                var_limit=(0, noise**2),
                 mean=0,
                 per_channel=True,
                 # Albumentations doesn't support per_channel = 0.5
@@ -475,7 +475,7 @@ def __init__(
             p,
         )
         self._neighbor_dist = 3
-        self._neighbor_dist_square = self._neighbor_dist ** 2
+        self._neighbor_dist_square = self._neighbor_dist**2
 
     def apply_to_keypoints(
         self, keypoints: Sequence[float], random_state: int | None = None, **params
diff --git a/deeplabcut/pose_estimation_pytorch/models/detectors/filtered_detector.py b/deeplabcut/pose_estimation_pytorch/models/detectors/filtered_detector.py
index e4278dba65..7ea4da46a5 100644
--- a/deeplabcut/pose_estimation_pytorch/models/detectors/filtered_detector.py
+++ b/deeplabcut/pose_estimation_pytorch/models/detectors/filtered_detector.py
@@ -37,4 +37,4 @@ def forward(self, images: list[torch.Tensor]) -> list[dict[str, torch.Tensor]]:
             filtered_outputs.append(filtered_output)
 
         losses = {}
-        return losses, filtered_outputs
\ No newline at end of file
+        return losses, filtered_outputs
diff --git a/deeplabcut/pose_estimation_pytorch/modelzoo/inference.py b/deeplabcut/pose_estimation_pytorch/modelzoo/inference.py
index 970e977865..e7a8c64909 100644
--- a/deeplabcut/pose_estimation_pytorch/modelzoo/inference.py
+++ b/deeplabcut/pose_estimation_pytorch/modelzoo/inference.py
@@ -24,7 +24,7 @@
 from deeplabcut.pose_estimation_pytorch.apis.utils import (
     get_inference_runners,
     get_pose_inference_runner,
-    get_filtered_coco_detector_inference_runner
+    get_filtered_coco_detector_inference_runner,
 )
 from deeplabcut.pose_estimation_pytorch.modelzoo.utils import (
     raise_warning_if_called_directly,
@@ -150,7 +150,10 @@ def _video_inference_superanimal(
         print(f"Processing video {video_path}")
 
         dlc_scorer = get_super_animal_scorer(
-            superanimal_name, model_snapshot_path, detector_snapshot_path, torchvision_detector_name
+            superanimal_name,
+            model_snapshot_path,
+            detector_snapshot_path,
+            torchvision_detector_name,
         )
 
         output_prefix = f"{Path(video_path).stem}_{dlc_scorer}"
diff --git a/deeplabcut/pose_estimation_pytorch/modelzoo/utils.py b/deeplabcut/pose_estimation_pytorch/modelzoo/utils.py
index 9ff66b4240..2cb06e2ffe 100644
--- a/deeplabcut/pose_estimation_pytorch/modelzoo/utils.py
+++ b/deeplabcut/pose_estimation_pytorch/modelzoo/utils.py
@@ -117,7 +117,9 @@ def load_super_animal_config(
     else:
         model_config["method"] = "TD"
         if super_animal != "superanimal_humanbody":
-            detector_cfg_path = get_super_animal_model_config_path(model_name=detector_name)
+            detector_cfg_path = get_super_animal_model_config_path(
+                model_name=detector_name
+            )
             detector_cfg = read_config_as_dict(detector_cfg_path)
             model_config["detector"] = detector_cfg
     return model_config
@@ -141,7 +143,9 @@ def download_super_animal_snapshot(dataset: str, model_name: str) -> Path:
     model_filename = f"{model_name}.pt"
     model_path = snapshot_dir / model_filename
 
-    download_huggingface_model(model_name, target_dir=str(snapshot_dir), rename_mapping=model_filename)
+    download_huggingface_model(
+        model_name, target_dir=str(snapshot_dir), rename_mapping=model_filename
+    )
     if not model_path.exists():
         raise RuntimeError(f"Failed to download {model_name} to {model_path}")
 
diff --git a/deeplabcut/pose_estimation_pytorch/runners/base.py b/deeplabcut/pose_estimation_pytorch/runners/base.py
index ee8dbd6d1a..f0b4dd735a 100644
--- a/deeplabcut/pose_estimation_pytorch/runners/base.py
+++ b/deeplabcut/pose_estimation_pytorch/runners/base.py
@@ -23,8 +23,9 @@
 
 ModelType = TypeVar("ModelType", bound=nn.Module)
 
-_load_weights_only: bool = (
-    os.getenv("TORCH_LOAD_WEIGHTS_ONLY", "true").lower() in ("true", "1")
+_load_weights_only: bool = os.getenv("TORCH_LOAD_WEIGHTS_ONLY", "true").lower() in (
+    "true",
+    "1",
 )
 
 
@@ -218,6 +219,7 @@ def _add_numpy_to_torch_safe_globals():
     try:
         from numpy.core.multiarray import scalar
         from numpy.dtypes import Float64DType
+
         torch.serialization.add_safe_globals([np.dtype, Float64DType, scalar])
     except Exception:
         pass
diff --git a/examples/utils.py b/examples/utils.py
index 3877b335b4..656f15f300 100644
--- a/examples/utils.py
+++ b/examples/utils.py
@@ -18,6 +18,7 @@
 from typing import Any
 
 import matplotlib
+
 matplotlib.use("Agg")  # Non-interactive backend, for CI/CD on Windows
 
 import cv2
diff --git a/tests/pose_estimation_pytorch/runners/test_filtered_detector_inference_runner.py b/tests/pose_estimation_pytorch/runners/test_filtered_detector_inference_runner.py
index 8fa3242850..5d52050026 100644
--- a/tests/pose_estimation_pytorch/runners/test_filtered_detector_inference_runner.py
+++ b/tests/pose_estimation_pytorch/runners/test_filtered_detector_inference_runner.py
@@ -3,11 +3,16 @@
 Test script for superanimal_humanbody with torchvision detector
 """
 
-from deeplabcut.pose_estimation_pytorch.apis.utils import TORCHVISION_DETECTORS, \
-    get_filtered_coco_detector_inference_runner
-from deeplabcut.pose_estimation_pytorch.models.detectors.filtered_detector import FilteredDetector
+from deeplabcut.pose_estimation_pytorch.apis.utils import (
+    TORCHVISION_DETECTORS,
+    get_filtered_coco_detector_inference_runner,
+)
+from deeplabcut.pose_estimation_pytorch.models.detectors.filtered_detector import (
+    FilteredDetector,
+)
 from deeplabcut.pose_estimation_pytorch.modelzoo import load_super_animal_config
 
+
 def test_torchvision_detector():
     """Test that the torchvision detector works with superanimal_humanbody"""
     for detector_name in TORCHVISION_DETECTORS:
@@ -42,13 +47,20 @@ def test_torchvision_detector():
         )
         print("Filtered detector runner created successfully!")
 
-    print("\n✅ All tests passed! The torchvision detector integration is working correctly.")
+    print(
+        "\n✅ All tests passed! The torchvision detector integration is working correctly."
+    )
     return True
 
+
 if __name__ == "__main__":
     print("Testing superanimal_humanbody with torchvision detector...")
     success = test_torchvision_detector()
     if success:
-        print("\n✅ Test passed! The torchvision detector works with superanimal_humanbody")
+        print(
+            "\n✅ Test passed! The torchvision detector works with superanimal_humanbody"
+        )
     else:
-        print("\n❌ Test failed! There's an issue with the torchvision detector integration") 
\ No newline at end of file
+        print(
+            "\n❌ Test failed! There's an issue with the torchvision detector integration"
+        )

From 03456da84c05fa5e6575c52ed3e0c38b12ebb2bb Mon Sep 17 00:00:00 2001
From: maximpavliv <maxim.pavliv@gmail.com>
Date: Fri, 22 Aug 2025 10:16:05 +0200
Subject: [PATCH 32/34] SimCCPredictor: add visilibity computation

---
 .../models/predictors/sim_cc.py                  | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/deeplabcut/pose_estimation_pytorch/models/predictors/sim_cc.py b/deeplabcut/pose_estimation_pytorch/models/predictors/sim_cc.py
index 25d953c178..3f769fc633 100644
--- a/deeplabcut/pose_estimation_pytorch/models/predictors/sim_cc.py
+++ b/deeplabcut/pose_estimation_pytorch/models/predictors/sim_cc.py
@@ -42,21 +42,33 @@ class SimCCPredictor(BasePredictor):
     def __init__(
         self,
         simcc_split_ratio: float = 2.0,
-        apply_softmax: bool = False,
         normalize_outputs: bool = False,
+        apply_softmax: bool = True,
+        sigma: float | int | tuple[float, ...] = 6.0,
+        decode_beta: float = 150.0,
     ) -> None:
         super().__init__()
         self.simcc_split_ratio = simcc_split_ratio
-        self.apply_softmax = apply_softmax
         self.normalize_outputs = normalize_outputs
+        self.apply_softmax = apply_softmax
+
+        if isinstance(sigma, (float, int)):
+            self.sigma = np.array([sigma, sigma])
+        else:
+            self.sigma = np.array(sigma)
+        self.decode_beta = decode_beta
 
     def forward(
         self, stride: float, outputs: dict[str, torch.Tensor]
     ) -> dict[str, torch.Tensor]:
         x, y = outputs["x"].detach(), outputs["y"].detach()
+
         if self.normalize_outputs:
             x = get_simcc_normalized(x)
             y = get_simcc_normalized(y)
+        else:
+            x = x * (self.sigma[0] * self.decode_beta)
+            y = y * (self.sigma[1] * self.decode_beta)
 
         keypoints, scores = get_simcc_maximum(
             x.cpu().numpy(), y.cpu().numpy(), self.apply_softmax

From c6bd02b59a9f8da4bdf06972fe0e706583d03c6b Mon Sep 17 00:00:00 2001
From: maximpavliv <maxim.pavliv@gmail.com>
Date: Fri, 22 Aug 2025 10:17:43 +0200
Subject: [PATCH 33/34] Add new SimCCPredictor params to model configs

---
 deeplabcut/modelzoo/model_configs/rtmpose_s.yaml              | 4 ++++
 deeplabcut/modelzoo/model_configs/rtmpose_x.yaml              | 4 ++++
 .../config/ctd/ctd_prenet_rtmpose_m.yaml                      | 2 ++
 .../config/ctd/ctd_prenet_rtmpose_s.yaml                      | 2 ++
 .../config/ctd/ctd_prenet_rtmpose_x.yaml                      | 2 ++
 .../config/ctd/ctd_prenet_rtmpose_x_human.yaml                | 2 ++
 .../pose_estimation_pytorch/config/rtmpose/rtmpose_m.yaml     | 2 ++
 .../pose_estimation_pytorch/config/rtmpose/rtmpose_s.yaml     | 2 ++
 .../pose_estimation_pytorch/config/rtmpose/rtmpose_x.yaml     | 2 ++
 9 files changed, 22 insertions(+)

diff --git a/deeplabcut/modelzoo/model_configs/rtmpose_s.yaml b/deeplabcut/modelzoo/model_configs/rtmpose_s.yaml
index 2e7d693ba0..9c80b0583c 100644
--- a/deeplabcut/modelzoo/model_configs/rtmpose_s.yaml
+++ b/deeplabcut/modelzoo/model_configs/rtmpose_s.yaml
@@ -51,6 +51,10 @@ model:
       predictor:
         type: SimCCPredictor
         simcc_split_ratio: 2.0
+        sigma:
+          - 5.66
+          - 5.66
+        decode_beta: 150.0
       input_size:
       - 256
       - 256
diff --git a/deeplabcut/modelzoo/model_configs/rtmpose_x.yaml b/deeplabcut/modelzoo/model_configs/rtmpose_x.yaml
index 9a7df70196..0d1fb8a547 100644
--- a/deeplabcut/modelzoo/model_configs/rtmpose_x.yaml
+++ b/deeplabcut/modelzoo/model_configs/rtmpose_x.yaml
@@ -102,6 +102,10 @@ model:
       predictor:
         type: SimCCPredictor
         simcc_split_ratio: 2.0
+        sigma:
+          - 6.0
+          - 6.93
+        decode_beta: 150.0
       input_size:
       - 288
       - 384
diff --git a/deeplabcut/pose_estimation_pytorch/config/ctd/ctd_prenet_rtmpose_m.yaml b/deeplabcut/pose_estimation_pytorch/config/ctd/ctd_prenet_rtmpose_m.yaml
index ca35c8a3ed..88b708cc39 100644
--- a/deeplabcut/pose_estimation_pytorch/config/ctd/ctd_prenet_rtmpose_m.yaml
+++ b/deeplabcut/pose_estimation_pytorch/config/ctd/ctd_prenet_rtmpose_m.yaml
@@ -55,6 +55,8 @@ model:
       predictor:
         type: SimCCPredictor
         simcc_split_ratio: 2.0
+        sigma: [5.66, 5.66]
+        decode_beta: 150.0
       input_size: [256, 256]
       in_channels: 768
       out_channels: "num_bodyparts"
diff --git a/deeplabcut/pose_estimation_pytorch/config/ctd/ctd_prenet_rtmpose_s.yaml b/deeplabcut/pose_estimation_pytorch/config/ctd/ctd_prenet_rtmpose_s.yaml
index c1870e8160..6ae8b5364a 100644
--- a/deeplabcut/pose_estimation_pytorch/config/ctd/ctd_prenet_rtmpose_s.yaml
+++ b/deeplabcut/pose_estimation_pytorch/config/ctd/ctd_prenet_rtmpose_s.yaml
@@ -55,6 +55,8 @@ model:
       predictor:
         type: SimCCPredictor
         simcc_split_ratio: 2.0
+        sigma: [5.66, 5.66]
+        decode_beta: 150.0
       input_size: [256, 256]
       in_channels: 512
       out_channels: "num_bodyparts"
diff --git a/deeplabcut/pose_estimation_pytorch/config/ctd/ctd_prenet_rtmpose_x.yaml b/deeplabcut/pose_estimation_pytorch/config/ctd/ctd_prenet_rtmpose_x.yaml
index 75c7228d21..f809a0c569 100644
--- a/deeplabcut/pose_estimation_pytorch/config/ctd/ctd_prenet_rtmpose_x.yaml
+++ b/deeplabcut/pose_estimation_pytorch/config/ctd/ctd_prenet_rtmpose_x.yaml
@@ -55,6 +55,8 @@ model:
       predictor:
         type: SimCCPredictor
         simcc_split_ratio: 2.0
+        sigma: [6.93, 6.93]
+        decode_beta: 150.0
       input_size: [384, 384]
       in_channels: 1280
       out_channels: "num_bodyparts"
diff --git a/deeplabcut/pose_estimation_pytorch/config/ctd/ctd_prenet_rtmpose_x_human.yaml b/deeplabcut/pose_estimation_pytorch/config/ctd/ctd_prenet_rtmpose_x_human.yaml
index ad6579fe5e..1d47cb3306 100644
--- a/deeplabcut/pose_estimation_pytorch/config/ctd/ctd_prenet_rtmpose_x_human.yaml
+++ b/deeplabcut/pose_estimation_pytorch/config/ctd/ctd_prenet_rtmpose_x_human.yaml
@@ -56,6 +56,8 @@ model:
       predictor:
         type: SimCCPredictor
         simcc_split_ratio: 2.0
+        sigma: [6., 6.93]
+        decode_beta: 150.0
       input_size: [288, 384]
       in_channels: 1280
       out_channels: "num_bodyparts"
diff --git a/deeplabcut/pose_estimation_pytorch/config/rtmpose/rtmpose_m.yaml b/deeplabcut/pose_estimation_pytorch/config/rtmpose/rtmpose_m.yaml
index d6bc515f94..d2ed3ae52e 100644
--- a/deeplabcut/pose_estimation_pytorch/config/rtmpose/rtmpose_m.yaml
+++ b/deeplabcut/pose_estimation_pytorch/config/rtmpose/rtmpose_m.yaml
@@ -49,6 +49,8 @@ model:
       predictor:
         type: SimCCPredictor
         simcc_split_ratio: 2.0
+        sigma: [5.66, 5.66]
+        decode_beta: 150.0
       input_size: [256, 256]
       in_channels: 768
       out_channels: "num_bodyparts"
diff --git a/deeplabcut/pose_estimation_pytorch/config/rtmpose/rtmpose_s.yaml b/deeplabcut/pose_estimation_pytorch/config/rtmpose/rtmpose_s.yaml
index fbc4ff7ed4..463e104654 100644
--- a/deeplabcut/pose_estimation_pytorch/config/rtmpose/rtmpose_s.yaml
+++ b/deeplabcut/pose_estimation_pytorch/config/rtmpose/rtmpose_s.yaml
@@ -49,6 +49,8 @@ model:
       predictor:
         type: SimCCPredictor
         simcc_split_ratio: 2.0
+        sigma: [5.66, 5.66]
+        decode_beta: 150.0
       input_size: [256, 256]
       in_channels: 512
       out_channels: "num_bodyparts"
diff --git a/deeplabcut/pose_estimation_pytorch/config/rtmpose/rtmpose_x.yaml b/deeplabcut/pose_estimation_pytorch/config/rtmpose/rtmpose_x.yaml
index 0a49baec75..f1d6f61e67 100644
--- a/deeplabcut/pose_estimation_pytorch/config/rtmpose/rtmpose_x.yaml
+++ b/deeplabcut/pose_estimation_pytorch/config/rtmpose/rtmpose_x.yaml
@@ -49,6 +49,8 @@ model:
       predictor:
         type: SimCCPredictor
         simcc_split_ratio: 2.0
+        sigma: [6.93, 6.93]
+        decode_beta: 150.0
       input_size: [384, 384]
       in_channels: 1280
       out_channels: "num_bodyparts"

From bd9e618c206ffc8ac6918ba9ee4267fa41971acf Mon Sep 17 00:00:00 2001
From: maximpavliv <maxim.pavliv@gmail.com>
Date: Tue, 2 Sep 2025 14:02:16 +0200
Subject: [PATCH 34/34] SimCCPredictor constructor: restore args order

---
 .../pose_estimation_pytorch/models/predictors/sim_cc.py       | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/deeplabcut/pose_estimation_pytorch/models/predictors/sim_cc.py b/deeplabcut/pose_estimation_pytorch/models/predictors/sim_cc.py
index 3f769fc633..b36a9639ab 100644
--- a/deeplabcut/pose_estimation_pytorch/models/predictors/sim_cc.py
+++ b/deeplabcut/pose_estimation_pytorch/models/predictors/sim_cc.py
@@ -42,15 +42,15 @@ class SimCCPredictor(BasePredictor):
     def __init__(
         self,
         simcc_split_ratio: float = 2.0,
-        normalize_outputs: bool = False,
         apply_softmax: bool = True,
+        normalize_outputs: bool = False,
         sigma: float | int | tuple[float, ...] = 6.0,
         decode_beta: float = 150.0,
     ) -> None:
         super().__init__()
         self.simcc_split_ratio = simcc_split_ratio
-        self.normalize_outputs = normalize_outputs
         self.apply_softmax = apply_softmax
+        self.normalize_outputs = normalize_outputs
 
         if isinstance(sigma, (float, int)):
             self.sigma = np.array([sigma, sigma])