DeepLabCut · arashsm79 · Jan 9, 2026 · Copilot · Jan 13, 2026 · Copilot
diff --git a/deeplabcut/pose_estimation_pytorch/data/transforms.py b/deeplabcut/pose_estimation_pytorch/data/transforms.py
@@ -14,6 +14,7 @@
 from typing import Any, Iterable, Sequence
 
 import albumentations as A
+import albumentations.augmentations.crops.functional as fcrops
 import cv2
 import numpy as np
 from albumentations.augmentations.geometric import functional as F
@@ -90,7 +91,7 @@ def build_transforms(augmentations: dict) -> A.BaseCompose:
                 min_height=crop_sampling["height"],
                 min_width=crop_sampling["width"],
                 border_mode=cv2.BORDER_CONSTANT,
-                always_apply=True,
+                p=1.0 # always apply
-                p=1.0 # always apply
+                p=1.0  # always apply
-                p=1.0 # always apply
+                p=1.0  # always apply
             )
         )
         transforms.append(
@@ -127,8 +128,8 @@ def build_transforms(augmentations: dict) -> A.BaseCompose:
             noise = 0.05 * 255
         transforms.append(
             A.GaussNoise(
-                var_limit=(0, noise**2),
-                mean=0,
+                std_range=(0, noise / 255.0),
+                mean_range=(0, 0),
                 per_channel=True,
                 # Albumentations doesn't support per_channel = 0.5
                 p=0.5,
@@ -152,6 +153,7 @@ def build_transforms(augmentations: dict) -> A.BaseCompose:
             "xy", remove_invisible=False, label_fields=["class_labels"]
         ),
         bbox_params=A.BboxParams(format="coco", label_fields=["bbox_labels"]),
+        strict=True,
     )
 
 
@@ -161,7 +163,7 @@ def build_auto_padding(
     pad_height_divisor: int | None = 1,
     pad_width_divisor: int | None = 1,
     position: str = "random",  # TODO: Which default to set?
-    border_mode: str = "reflect_101",  # TODO: Which default to set?
+    border_mode: str = "constant",  # TODO: Which default to set?
     border_value: float | None = None,
     border_mask_value: float | None = None,
 ) -> A.PadIfNeeded:
@@ -203,8 +205,8 @@ def build_auto_padding(
         pad_width_divisor=pad_width_divisor,
         position=position,
         border_mode=border_modes[border_mode],
-        value=border_value,
-        mask_value=border_mask_value,
+        fill=border_value if border_value is not None else 0,
+        fill_mask=border_mask_value if border_mask_value is not None else 0,
     )
 
 
@@ -219,7 +221,7 @@ def build_resize_transforms(resize_cfg: dict) -> list[A.BasicTransform]:
                 min_height=height,
                 min_width=width,
                 border_mode=cv2.BORDER_CONSTANT,
-                position=A.PadIfNeeded.PositionType.TOP_LEFT,
+                position="top_left",
             )
         )
     else:
@@ -267,7 +269,7 @@ def __init__(
         max_shift: float = 0.4,
         crop_sampling: str = "hybrid",
     ):
-        super().__init__(height, width, always_apply=True)
+        super().__init__(height, width, p=1.0)  # always apply
         # Clamp to 40% of crop size to ensure that at least
         # the center keypoint remains visible after the offset is applied.
         self.max_shift = max(0.0, min(max_shift, 0.4))
@@ -288,9 +290,9 @@ def calc_n_neighbors(xy: NDArray, radius: float) -> NDArray:
     def targets_as_params(self) -> list[str]:
         return ["image", "keypoints"]
 
-    def get_params_dependent_on_targets(self, params: dict[str, Any]) -> dict[str, Any]:
-        img = params["image"]
-        kpts = params["keypoints"]
+    def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]:
+        img = data["image"]
+        kpts = data["keypoints"]
         shift_factors = np.random.random(2)
         shift = self.max_shift * shift_factors * np.array([self.width, self.height])
         sampling = self.crop_sampling
@@ -321,13 +323,21 @@ def get_params_dependent_on_targets(self, params: dict[str, Any]) -> dict[str, A
             # and normalize to the original image dimensions.
             center = (center + shift) / [w, h]
             center = np.clip(center, 0, np.nextafter(1, 0))  # Clip to 1 exclusive
-        return {"h_start": center[1], "w_start": center[0]}
+
+        h_start, w_start = center[1], center[0]
+        crop_coords = fcrops.get_crop_coords(img.shape[:2], (self.height, self.width), h_start, w_start)
+
+        return {
+            "h_start": h_start,
+            "w_start": w_start,
+            "crop_coords": crop_coords,
+        }
 
     def apply_to_keypoints(
         self,
         keypoints,
         **params,
-    ) -> list[tuple[float]]:
+    ) -> np.ndarray:
         keypoints = super().apply_to_keypoints(keypoints, **params)
         new_keypoints = []
         for kp in keypoints:
@@ -337,7 +347,7 @@ def apply_to_keypoints(
                 kp[:2] = np.nan, np.nan
                 kp = tuple(kp)
             new_keypoints.append(kp)
-        return new_keypoints
+        return np.array(new_keypoints)
 
     def get_transform_init_args_names(self) -> tuple[str, ...]:
         return "width", "height", "max_shift", "crop_sampling"
@@ -362,31 +372,30 @@ def __init__(
         mode: str = "pad",
         interpolation: Any = cv2.INTER_LINEAR,
         p: float = 1.0,
-        always_apply: bool = True,
     ) -> None:
-        super().__init__(always_apply=always_apply, p=p)
+        super().__init__(p=p)
         self.height = height
         self.width = width
         self.mode = mode
         self.interpolation = interpolation
 
-    def apply(self, img, scale=0, interpolation=cv2.INTER_LINEAR, **params):
-        return A.scale(img, scale, interpolation)
+    def apply(self, img, scale=1.0, interpolation=cv2.INTER_LINEAR, **params):
+        return F.scale(img, scale, interpolation)
 
-    def apply_to_bbox(self, bbox, **params):
+    def apply_to_bboxes(self, bboxes, **params):
         # Bounding box coordinates are scale invariant
-        return bbox
+        return bboxes
 
-    def apply_to_keypoint(self, keypoint, scale=0, **params):
-        keypoint = A.keypoint_scale(keypoint, scale, scale)
+    def apply_to_keypoints(self, keypoints, scale=1.0, **params):
+        keypoint = F.keypoints_scale(keypoints, scale, scale)
         return keypoint
 
     @property
     def targets_as_params(self) -> list[str]:
         return ["image"]
 
-    def get_params_dependent_on_targets(self, params: dict[str, Any]) -> dict[str, Any]:
-        h, w, _ = params["image"].shape
+    def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]:
+        h, w, _ = data["image"].shape
         if self.mode == "pad":
             scale = min(self.height / h, self.width / w)
         else:
@@ -417,7 +426,7 @@ def __init__(
             * If a tuple ``(a, b)``, a random value from the range
               ``a <= x <= b`` will be sampled per image.
         """
-        super().__init__(always_apply, p)
+        super().__init__(p=1.0 if always_apply else p)
         if isinstance(alpha, (float, int)):
             self._alpha = self._validate_alpha(alpha)
         elif isinstance(alpha, tuple):
@@ -514,7 +523,7 @@ def apply_to_keypoints(
             kp = list(kp)
             kp[:2] = new_coords
             new_keypoints.append(tuple(kp))
-        return new_keypoints
+        return np.array(new_keypoints)
 
 
 class CoarseDropout(A.CoarseDropout):
@@ -532,27 +541,23 @@ def __init__(
         p: float = 0.5,
     ):
         super().__init__(
-            max_holes,
-            max_height,
-            max_width,
-            min_holes,
-            min_height,
-            min_width,
-            fill_value,
-            mask_fill_value,
-            always_apply,
-            p,
+            num_holes_range=(min_holes if min_holes is not None else max_holes, max_holes),
+            hole_height_range=(min_height if min_height is not None else max_height, max_height),
+            hole_width_range=(min_width if min_width is not None else max_width, max_width),
+            fill=fill_value,
+            fill_mask=mask_fill_value,
+            p=1.0 if always_apply else p,
         )
 
-    def apply_to_bboxes(self, bboxes: Sequence[float], **params) -> list[float]:
-        return list(bboxes)
+    def apply_to_bboxes(self, bboxes: np.ndarray, **params) -> np.ndarray:
+        return bboxes
 
     def apply_to_keypoints(
         self,
-        keypoints: Sequence[float],
-        holes: Iterable[tuple[int, int, int, int]] = (),
-        **params,
-    ) -> list[float]:
+        keypoints: np.ndarray,
+        holes: np.ndarray,
+        **params: Any,
+    ) -> np.ndarray:
         new_keypoints = []
         for kp in keypoints:
             in_hole = False
@@ -565,7 +570,7 @@ def apply_to_keypoints(
                 kp[:2] = np.nan, np.nan
                 kp = tuple(kp)
             new_keypoints.append(kp)
-        return new_keypoints
+        return np.array(new_keypoints)
 
     def _keypoint_in_hole(self, keypoint, hole: tuple[int, int, int, int]) -> bool:
         """Reimplemented from Albumentations as was removed in v1.4.0"""
@@ -650,10 +655,10 @@ def apply_to_bboxes(self, bboxes, **params):
         bbox_xyxy[:, 2:] = bbox_cxcy + bbox_half_wh
         bbox_xyxy = np.clip(bbox_xyxy, 0, 1)
 
-        # add the extra information back; tuples for albumentations<=1.4.3
-        bboxes_out = [tuple(bbox) for bbox in bbox_xyxy]
+        # add the extra information back
+        bboxes_out = bbox_xyxy
         if bboxes_extra is not None:
-            bboxes_out = [bbox + extra for bbox, extra in zip(bboxes_out, bboxes_extra)]
+            bboxes_out = np.column_stack([bbox_xyxy] + [np.array(bboxes_extra)])
-            bboxes_out = np.column_stack([bbox_xyxy] + [np.array(bboxes_extra)])
+            bboxes_extra_arr = np.asarray(bboxes_extra)
+            bboxes_out = np.concatenate([bbox_xyxy, bboxes_extra_arr], axis=1)
-            bboxes_out = np.column_stack([bbox_xyxy] + [np.array(bboxes_extra)])
+            bboxes_extra_arr = np.asarray(bboxes_extra)
+            bboxes_out = np.concatenate([bbox_xyxy, bboxes_extra_arr], axis=1)
         return bboxes_out
 
     def get_transform_init_args_names(self):
@@ -676,7 +681,7 @@ def _sample(
 
 class ScaleToUnitRange(A.ImageOnlyTransform):
     def __init__(self, always_apply=True, p=1.0):
-        super().__init__(always_apply=always_apply, p=p)
+        super().__init__(p= 1.0 if always_apply else p)
-        super().__init__(p= 1.0 if always_apply else p)
+        super().__init__(p=1.0 if always_apply else p)
-        super().__init__(p= 1.0 if always_apply else p)
+        super().__init__(p=1.0 if always_apply else p)
 
     def apply(self, img, **params):
         return img.astype(np.float32) / 255.0
diff --git a/deeplabcut/pose_estimation_pytorch/data/utils.py b/deeplabcut/pose_estimation_pytorch/data/utils.py
@@ -499,7 +499,7 @@ def _apply_transform(
     """
     transformed = transform(
         image=image,
-        keypoints=keypoints,
+        keypoints=np.array(keypoints),
         class_labels=class_labels,
         bboxes=bboxes,
         bbox_labels=np.arange(len(bboxes)),

diff --git a/requirements.txt b/requirements.txt
@@ -1,5 +1,5 @@
 # novel for pytorch DLC:
-albumentations<=1.4.3
+albumentations>=2
 einops
 pycocotools<=2.0.8
 timm

diff --git a/setup.py b/setup.py
@@ -55,7 +55,7 @@ def pytorch_config_paths() -> list[str]:
     long_description_content_type="text/markdown",
     url="https://github.com/DeepLabCut/DeepLabCut",
     install_requires=[
-        "albumentations<=1.4.3",
+        "albumentations>=2",
         "dlclibrary>=0.0.7",
         "einops",
         "filterpy>=1.4.4",

diff --git a/tests/pose_estimation_pytorch/data/test_transforms.py b/tests/pose_estimation_pytorch/data/test_transforms.py
@@ -69,16 +69,16 @@ def test_dlc_resize_pad_bad_aspect_ratio(data):
             "width": 200,
             "in_shape": (100, 50, 3),
             "out_shape": (200, 100, 3),
-            "in_keypoints": [(50.0, 50.0), (25.0, 10.0)],
-            "out_keypoints": [(100.0, 100.0), (50.0, 20.0)],
+            "in_keypoints": [[50.0, 50.0], [25.0, 10.0]],
+            "out_keypoints": [[100.0, 100.0], [50.0, 20.0]],
         },
         {
             "height": 512,
             "width": 256,
             "in_shape": (1024, 1024, 3),
             "out_shape": (256, 256, 3),
-            "in_keypoints": [(512.0, 512.0), (100.0, 10.0)],
-            "out_keypoints": [(128.0, 128.0), (25.0, 2.5)],
+            "in_keypoints": [[512.0, 512.0], [100.0, 10.0]],
+            "out_keypoints": [[128.0, 128.0], [25.0, 2.5]],
         },
     ],
 )
@@ -151,7 +151,7 @@ def test_random_bbox_transform_does_not_modify_with_base_config(data: dict) -> N
     print("bboxes")
     print(bboxes_out)
     print()
-    np.testing.assert_array_almost_equal(bboxes, bboxes_out)
+    np.testing.assert_array_almost_equal(bboxes, bboxes_out, decimal=4)
 
 
 @pytest.mark.parametrize(

diff --git a/tests/pose_estimation_pytorch/other/test_custom_transforms.py b/tests/pose_estimation_pytorch/other/test_custom_transforms.py
@@ -17,7 +17,7 @@
 @pytest.mark.parametrize("width, height", [(200, 200), (300, 300), (400, 400)])
 def test_keypoint_aware_cropping(width, height):
     fake_image = np.empty((600, 600, 3))
-    fake_keypoints = [(i * 100, i * 100, 0, 0) for i in range(1, 6)]
+    fake_keypoints = np.array([(i * 100, i * 100, 0, 0) for i in range(1, 6)])
     aug = transforms.KeypointAwareCrop(
         width=width, height=height, crop_sampling="density"
     )