Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 52 additions & 47 deletions deeplabcut/pose_estimation_pytorch/data/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from typing import Any, Iterable, Sequence

import albumentations as A
import albumentations.augmentations.crops.functional as fcrops
import cv2
import numpy as np
from albumentations.augmentations.geometric import functional as F
Expand Down Expand Up @@ -90,7 +91,7 @@ def build_transforms(augmentations: dict) -> A.BaseCompose:
min_height=crop_sampling["height"],
min_width=crop_sampling["width"],
border_mode=cv2.BORDER_CONSTANT,
always_apply=True,
p=1.0 # always apply
Copy link

Copilot AI Jan 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing space after the '#' symbol in the comment. Should be '# always apply'.

Suggested change
p=1.0 # always apply
p=1.0 # always apply

Copilot uses AI. Check for mistakes.
)
)
transforms.append(
Expand Down Expand Up @@ -127,8 +128,8 @@ def build_transforms(augmentations: dict) -> A.BaseCompose:
noise = 0.05 * 255
transforms.append(
A.GaussNoise(
var_limit=(0, noise**2),
mean=0,
std_range=(0, noise / 255.0),
mean_range=(0, 0),
per_channel=True,
# Albumentations doesn't support per_channel = 0.5
p=0.5,
Expand All @@ -152,6 +153,7 @@ def build_transforms(augmentations: dict) -> A.BaseCompose:
"xy", remove_invisible=False, label_fields=["class_labels"]
),
bbox_params=A.BboxParams(format="coco", label_fields=["bbox_labels"]),
strict=True,
)


Expand All @@ -161,7 +163,7 @@ def build_auto_padding(
pad_height_divisor: int | None = 1,
pad_width_divisor: int | None = 1,
position: str = "random", # TODO: Which default to set?
border_mode: str = "reflect_101", # TODO: Which default to set?
border_mode: str = "constant", # TODO: Which default to set?
border_value: float | None = None,
border_mask_value: float | None = None,
) -> A.PadIfNeeded:
Expand Down Expand Up @@ -203,8 +205,8 @@ def build_auto_padding(
pad_width_divisor=pad_width_divisor,
position=position,
border_mode=border_modes[border_mode],
value=border_value,
mask_value=border_mask_value,
fill=border_value if border_value is not None else 0,
fill_mask=border_mask_value if border_mask_value is not None else 0,
)


Expand All @@ -219,7 +221,7 @@ def build_resize_transforms(resize_cfg: dict) -> list[A.BasicTransform]:
min_height=height,
min_width=width,
border_mode=cv2.BORDER_CONSTANT,
position=A.PadIfNeeded.PositionType.TOP_LEFT,
position="top_left",
)
)
else:
Expand Down Expand Up @@ -267,7 +269,7 @@ def __init__(
max_shift: float = 0.4,
crop_sampling: str = "hybrid",
):
super().__init__(height, width, always_apply=True)
super().__init__(height, width, p=1.0) # always apply
# Clamp to 40% of crop size to ensure that at least
# the center keypoint remains visible after the offset is applied.
self.max_shift = max(0.0, min(max_shift, 0.4))
Expand All @@ -288,9 +290,9 @@ def calc_n_neighbors(xy: NDArray, radius: float) -> NDArray:
def targets_as_params(self) -> list[str]:
return ["image", "keypoints"]

def get_params_dependent_on_targets(self, params: dict[str, Any]) -> dict[str, Any]:
img = params["image"]
kpts = params["keypoints"]
def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]:
img = data["image"]
kpts = data["keypoints"]
shift_factors = np.random.random(2)
shift = self.max_shift * shift_factors * np.array([self.width, self.height])
sampling = self.crop_sampling
Expand Down Expand Up @@ -321,13 +323,21 @@ def get_params_dependent_on_targets(self, params: dict[str, Any]) -> dict[str, A
# and normalize to the original image dimensions.
center = (center + shift) / [w, h]
center = np.clip(center, 0, np.nextafter(1, 0)) # Clip to 1 exclusive
return {"h_start": center[1], "w_start": center[0]}

h_start, w_start = center[1], center[0]
crop_coords = fcrops.get_crop_coords(img.shape[:2], (self.height, self.width), h_start, w_start)

return {
"h_start": h_start,
"w_start": w_start,
"crop_coords": crop_coords,
}

def apply_to_keypoints(
self,
keypoints,
**params,
) -> list[tuple[float]]:
) -> np.ndarray:
keypoints = super().apply_to_keypoints(keypoints, **params)
new_keypoints = []
for kp in keypoints:
Expand All @@ -337,7 +347,7 @@ def apply_to_keypoints(
kp[:2] = np.nan, np.nan
kp = tuple(kp)
new_keypoints.append(kp)
return new_keypoints
return np.array(new_keypoints)

def get_transform_init_args_names(self) -> tuple[str, ...]:
return "width", "height", "max_shift", "crop_sampling"
Expand All @@ -362,31 +372,30 @@ def __init__(
mode: str = "pad",
interpolation: Any = cv2.INTER_LINEAR,
p: float = 1.0,
always_apply: bool = True,
) -> None:
super().__init__(always_apply=always_apply, p=p)
super().__init__(p=p)
self.height = height
self.width = width
self.mode = mode
self.interpolation = interpolation

def apply(self, img, scale=0, interpolation=cv2.INTER_LINEAR, **params):
return A.scale(img, scale, interpolation)
def apply(self, img, scale=1.0, interpolation=cv2.INTER_LINEAR, **params):
return F.scale(img, scale, interpolation)

def apply_to_bbox(self, bbox, **params):
def apply_to_bboxes(self, bboxes, **params):
# Bounding box coordinates are scale invariant
return bbox
return bboxes

def apply_to_keypoint(self, keypoint, scale=0, **params):
keypoint = A.keypoint_scale(keypoint, scale, scale)
def apply_to_keypoints(self, keypoints, scale=1.0, **params):
keypoint = F.keypoints_scale(keypoints, scale, scale)
return keypoint

@property
def targets_as_params(self) -> list[str]:
return ["image"]

def get_params_dependent_on_targets(self, params: dict[str, Any]) -> dict[str, Any]:
h, w, _ = params["image"].shape
def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]:
h, w, _ = data["image"].shape
if self.mode == "pad":
scale = min(self.height / h, self.width / w)
else:
Expand Down Expand Up @@ -417,7 +426,7 @@ def __init__(
* If a tuple ``(a, b)``, a random value from the range
``a <= x <= b`` will be sampled per image.
"""
super().__init__(always_apply, p)
super().__init__(p=1.0 if always_apply else p)
if isinstance(alpha, (float, int)):
self._alpha = self._validate_alpha(alpha)
elif isinstance(alpha, tuple):
Expand Down Expand Up @@ -514,7 +523,7 @@ def apply_to_keypoints(
kp = list(kp)
kp[:2] = new_coords
new_keypoints.append(tuple(kp))
return new_keypoints
return np.array(new_keypoints)


class CoarseDropout(A.CoarseDropout):
Expand All @@ -532,27 +541,23 @@ def __init__(
p: float = 0.5,
):
super().__init__(
max_holes,
max_height,
max_width,
min_holes,
min_height,
min_width,
fill_value,
mask_fill_value,
always_apply,
p,
num_holes_range=(min_holes if min_holes is not None else max_holes, max_holes),
hole_height_range=(min_height if min_height is not None else max_height, max_height),
hole_width_range=(min_width if min_width is not None else max_width, max_width),
fill=fill_value,
fill_mask=mask_fill_value,
p=1.0 if always_apply else p,
)

def apply_to_bboxes(self, bboxes: Sequence[float], **params) -> list[float]:
return list(bboxes)
def apply_to_bboxes(self, bboxes: np.ndarray, **params) -> np.ndarray:
return bboxes

def apply_to_keypoints(
self,
keypoints: Sequence[float],
holes: Iterable[tuple[int, int, int, int]] = (),
**params,
) -> list[float]:
keypoints: np.ndarray,
holes: np.ndarray,
**params: Any,
) -> np.ndarray:
new_keypoints = []
for kp in keypoints:
in_hole = False
Expand All @@ -565,7 +570,7 @@ def apply_to_keypoints(
kp[:2] = np.nan, np.nan
kp = tuple(kp)
new_keypoints.append(kp)
return new_keypoints
return np.array(new_keypoints)

def _keypoint_in_hole(self, keypoint, hole: tuple[int, int, int, int]) -> bool:
"""Reimplemented from Albumentations as was removed in v1.4.0"""
Expand Down Expand Up @@ -650,10 +655,10 @@ def apply_to_bboxes(self, bboxes, **params):
bbox_xyxy[:, 2:] = bbox_cxcy + bbox_half_wh
bbox_xyxy = np.clip(bbox_xyxy, 0, 1)

# add the extra information back; tuples for albumentations<=1.4.3
bboxes_out = [tuple(bbox) for bbox in bbox_xyxy]
# add the extra information back
bboxes_out = bbox_xyxy
if bboxes_extra is not None:
bboxes_out = [bbox + extra for bbox, extra in zip(bboxes_out, bboxes_extra)]
bboxes_out = np.column_stack([bbox_xyxy] + [np.array(bboxes_extra)])
Copy link

Copilot AI Jan 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The bboxes_extra stacking logic is incorrect. bboxes_extra is already a list of arrays/tuples from line 620. Wrapping it in [np.array(bboxes_extra)] creates a 2D array instead of properly stacking the extra columns. This should be np.column_stack([bbox_xyxy, bboxes_extra]) or similar to correctly append the extra columns.

Suggested change
bboxes_out = np.column_stack([bbox_xyxy] + [np.array(bboxes_extra)])
bboxes_extra_arr = np.asarray(bboxes_extra)
bboxes_out = np.concatenate([bbox_xyxy, bboxes_extra_arr], axis=1)

Copilot uses AI. Check for mistakes.
return bboxes_out

def get_transform_init_args_names(self):
Expand All @@ -676,7 +681,7 @@ def _sample(

class ScaleToUnitRange(A.ImageOnlyTransform):
def __init__(self, always_apply=True, p=1.0):
super().__init__(always_apply=always_apply, p=p)
super().__init__(p= 1.0 if always_apply else p)
Copy link

Copilot AI Jan 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Extra space after '=' in 'p= 1.0'. Should be 'p=1.0' to follow consistent formatting with other lines in the file (e.g., lines 429, 549).

Suggested change
super().__init__(p= 1.0 if always_apply else p)
super().__init__(p=1.0 if always_apply else p)

Copilot uses AI. Check for mistakes.

def apply(self, img, **params):
return img.astype(np.float32) / 255.0
2 changes: 1 addition & 1 deletion deeplabcut/pose_estimation_pytorch/data/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,7 @@ def _apply_transform(
"""
transformed = transform(
image=image,
keypoints=keypoints,
keypoints=np.array(keypoints),
class_labels=class_labels,
bboxes=bboxes,
bbox_labels=np.arange(len(bboxes)),
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# novel for pytorch DLC:
albumentations<=1.4.3
albumentations>=2
einops
pycocotools<=2.0.8
timm
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def pytorch_config_paths() -> list[str]:
long_description_content_type="text/markdown",
url="https://github.com/DeepLabCut/DeepLabCut",
install_requires=[
"albumentations<=1.4.3",
"albumentations>=2",
"dlclibrary>=0.0.7",
"einops",
"filterpy>=1.4.4",
Expand Down
10 changes: 5 additions & 5 deletions tests/pose_estimation_pytorch/data/test_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,16 +69,16 @@ def test_dlc_resize_pad_bad_aspect_ratio(data):
"width": 200,
"in_shape": (100, 50, 3),
"out_shape": (200, 100, 3),
"in_keypoints": [(50.0, 50.0), (25.0, 10.0)],
"out_keypoints": [(100.0, 100.0), (50.0, 20.0)],
"in_keypoints": [[50.0, 50.0], [25.0, 10.0]],
"out_keypoints": [[100.0, 100.0], [50.0, 20.0]],
},
{
"height": 512,
"width": 256,
"in_shape": (1024, 1024, 3),
"out_shape": (256, 256, 3),
"in_keypoints": [(512.0, 512.0), (100.0, 10.0)],
"out_keypoints": [(128.0, 128.0), (25.0, 2.5)],
"in_keypoints": [[512.0, 512.0], [100.0, 10.0]],
"out_keypoints": [[128.0, 128.0], [25.0, 2.5]],
},
],
)
Expand Down Expand Up @@ -151,7 +151,7 @@ def test_random_bbox_transform_does_not_modify_with_base_config(data: dict) -> N
print("bboxes")
print(bboxes_out)
print()
np.testing.assert_array_almost_equal(bboxes, bboxes_out)
np.testing.assert_array_almost_equal(bboxes, bboxes_out, decimal=4)


@pytest.mark.parametrize(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
@pytest.mark.parametrize("width, height", [(200, 200), (300, 300), (400, 400)])
def test_keypoint_aware_cropping(width, height):
fake_image = np.empty((600, 600, 3))
fake_keypoints = [(i * 100, i * 100, 0, 0) for i in range(1, 6)]
fake_keypoints = np.array([(i * 100, i * 100, 0, 0) for i in range(1, 6)])
aug = transforms.KeypointAwareCrop(
width=width, height=height, crop_sampling="density"
)
Expand Down
Loading