diff --git a/.github/workflows/build-dev-docs.yml b/.github/workflows/build-dev-docs.yml new file mode 100644 index 0000000000..8c31275ff8 --- /dev/null +++ b/.github/workflows/build-dev-docs.yml @@ -0,0 +1,37 @@ +name: Build dev-docs (mkdocs) + +on: + workflow_call: + inputs: + python-version: + description: "Python version used to build the dev docs." + required: false + default: "3.10" + type: string + +jobs: + build: + runs-on: ubuntu-latest + permissions: + contents: read + steps: + - uses: actions/checkout@v6 + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: ${{ inputs.python-version }} + + - name: Install system dependencies for mkdocs-material social plugin + run: | + sudo apt-get update -qq + sudo apt-get install -y libcairo2-dev libfreetype6-dev libffi-dev \ + libjpeg-dev libpng-dev libz-dev + + - name: Install dev-docs dependencies + run: | + python -m pip install --upgrade pip + python -m pip install ".[dev-docs]" + + - name: Build dev docs + run: mkdocs build -v -f dev-docs/mkdocs.yml diff --git a/.github/workflows/build-book.yml b/.github/workflows/build-main-docs.yml similarity index 95% rename from .github/workflows/build-book.yml rename to .github/workflows/build-main-docs.yml index a1c55b9a0e..aedc113f5a 100644 --- a/.github/workflows/build-book.yml +++ b/.github/workflows/build-main-docs.yml @@ -1,4 +1,4 @@ -name: Build (and optionally deploy) Jupyter Book +name: Build main docs (Jupyter Book) on: workflow_call: @@ -18,8 +18,6 @@ on: default: false type: boolean - - jobs: build: runs-on: ubuntu-latest diff --git a/.github/workflows/deploy-docs.yml b/.github/workflows/deploy-docs.yml new file mode 100644 index 0000000000..5f6b86adcc --- /dev/null +++ b/.github/workflows/deploy-docs.yml @@ -0,0 +1,78 @@ +name: Deploy docs (main + dev-docs latest) + +on: + push: + branches: [ main ] + +permissions: + contents: write + +jobs: + build: + uses: ./.github/workflows/build-main-docs.yml + with: + python-version: "3.10" + build_dir: "./_build/html" + upload_artifact: true + secrets: inherit + + deploy-main-docs: + needs: build + runs-on: ubuntu-latest + permissions: + contents: write + steps: + - name: Download built Jupyter Book artifact + uses: actions/download-artifact@v4 + with: + name: built-book + path: site + + - name: Deploy main docs to gh-pages + uses: peaceiris/actions-gh-pages@v4 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: site + keep_files: true # preserve /dev/ versions managed by mike + + deploy-dev-docs-latest: + needs: deploy-main-docs + runs-on: ubuntu-latest + permissions: + contents: write + steps: + - uses: actions/checkout@v6 + with: + fetch-depth: 0 # mike needs full history to read/write gh-pages branch + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: "3.10" + + - name: Install system dependencies for mkdocs-material social plugin + run: | + sudo apt-get update -qq + sudo apt-get install -y libcairo2-dev libfreetype6-dev libffi-dev \ + libjpeg-dev libpng-dev libz-dev + + - name: Install dev-docs dependencies + run: | + python -m pip install --upgrade pip + python -m pip install ".[dev-docs]" + + - name: Configure git for mike + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + + - name: Deploy latest dev-docs with mike (main branch) + run: | + mike deploy --push \ + --config-file dev-docs/mkdocs.yml \ + --deploy-prefix dev \ + main + mike set-default --push \ + --config-file dev-docs/mkdocs.yml \ + --deploy-prefix dev \ + main diff --git a/.github/workflows/intelligent-testing.yml b/.github/workflows/intelligent-testing.yml index ae85292c75..e4ba7a6d79 100644 --- a/.github/workflows/intelligent-testing.yml +++ b/.github/workflows/intelligent-testing.yml @@ -76,16 +76,25 @@ jobs: docs: - name: Docs build + name: Main docs build needs: intelligent-test-selection if: needs.intelligent-test-selection.outputs.run_docs == 'true' - uses: ./.github/workflows/build-book.yml + uses: ./.github/workflows/build-main-docs.yml with: python-version: "3.10" build_dir: "./_build/html" upload_artifact: false secrets: inherit + dev-docs: + name: Dev docs build + needs: intelligent-test-selection + if: needs.intelligent-test-selection.outputs.run_docs == 'true' + uses: ./.github/workflows/build-dev-docs.yml + with: + python-version: "3.10" + secrets: inherit + fast-tests: name: Fast lane (targeted pytest + selected functional) diff --git a/.github/workflows/manage-dev-docs.yml b/.github/workflows/manage-dev-docs.yml new file mode 100644 index 0000000000..6f16ca7e62 --- /dev/null +++ b/.github/workflows/manage-dev-docs.yml @@ -0,0 +1,100 @@ +name: Manage dev-docs versions (manual) + +on: + workflow_dispatch: + inputs: + action: + description: "Action to perform" + required: true + type: choice + options: + - deploy-version + - delete-version + version_label: + description: "Version label to deploy or delete (e.g. 3.0)" + required: true + type: string + git_tag: + description: "Git tag to check out for deploy-version (e.g. v3.0.0rc14). Ignored for delete-version." + required: false + type: string + +permissions: + contents: write + +jobs: + deploy-version: + if: ${{ inputs.action == 'deploy-version' }} + runs-on: ubuntu-latest + steps: + - name: Validate inputs + run: | + if [ -z "${{ inputs.git_tag }}" ]; then + echo "::error::git_tag is required for deploy-version" + exit 1 + fi + + - uses: actions/checkout@v6 + with: + fetch-depth: 0 # mike needs full history to read/write gh-pages branch + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: "3.10" + + - name: Install system dependencies for mkdocs-material social plugin + run: | + sudo apt-get update -qq + sudo apt-get install -y libcairo2-dev libfreetype6-dev libffi-dev \ + libjpeg-dev libpng-dev libz-dev + + - name: Check out tagged source tree + run: git checkout "${{ inputs.git_tag }}" -- deeplabcut + + - name: Install dev-docs dependencies + run: | + python -m pip install --upgrade pip + python -m pip install ".[dev-docs]" + + - name: Configure git for mike + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + + - name: Deploy version with mike + run: | + mike deploy --push \ + --config-file dev-docs/mkdocs.yml \ + --deploy-prefix dev \ + "${{ inputs.version_label }}" + + delete-version: + if: ${{ inputs.action == 'delete-version' }} + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + with: + fetch-depth: 0 # mike needs full history to read/write gh-pages branch + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: "3.10" + + - name: Install dev-docs dependencies + run: | + python -m pip install --upgrade pip + python -m pip install ".[dev-docs]" + + - name: Configure git for mike + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + + - name: Delete version with mike + run: | + mike delete --push \ + --config-file dev-docs/mkdocs.yml \ + --deploy-prefix dev \ + "${{ inputs.version_label }}" diff --git a/.github/workflows/publish-book.yml b/.github/workflows/publish-book.yml deleted file mode 100644 index 7996b0672b..0000000000 --- a/.github/workflows/publish-book.yml +++ /dev/null @@ -1,35 +0,0 @@ -name: publish-book - -on: - push: - branches: [ main ] - -permissions: - contents: write - -jobs: - build: - uses: ./.github/workflows/build-book.yml - with: - python-version: "3.10" - build_dir: "./_build/html" - upload_artifact: true - secrets: inherit - - deploy: - needs: build - runs-on: ubuntu-latest - permissions: - contents: write - steps: - - name: Download built site artifact - uses: actions/download-artifact@v4 - with: - name: built-book - path: site - - - name: Deploy via gh-pages branch - uses: peaceiris/actions-gh-pages@v4 - with: - github_token: ${{ secrets.GITHUB_TOKEN }} - publish_dir: site diff --git a/_toc.yml b/_toc.yml index 3c369280c3..ab17f8e78a 100644 --- a/_toc.yml +++ b/_toc.yml @@ -87,6 +87,11 @@ parts: - file: docs/benchmark - file: docs/recipes/TechHardware + - caption: Development + chapters: + - file: docs/dev + title: Developer Documentation + - caption: Additional guides (Recipes) chapters: - file: docs/recipes/index diff --git a/dev-docs/docs/assets/favicon.ico b/dev-docs/docs/assets/favicon.ico new file mode 100644 index 0000000000..e1bff26790 Binary files /dev/null and b/dev-docs/docs/assets/favicon.ico differ diff --git a/dev-docs/docs/assets/javascripts/.gitkeep b/dev-docs/docs/assets/javascripts/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/dev-docs/docs/assets/logo.png b/dev-docs/docs/assets/logo.png new file mode 100644 index 0000000000..1c99d7d1c1 Binary files /dev/null and b/dev-docs/docs/assets/logo.png differ diff --git a/dev-docs/docs/assets/stylesheets/.gitkeep b/dev-docs/docs/assets/stylesheets/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/dev-docs/docs/configuration.md b/dev-docs/docs/configuration.md new file mode 100644 index 0000000000..c1a656ad69 --- /dev/null +++ b/dev-docs/docs/configuration.md @@ -0,0 +1,221 @@ +# Model Configuration + +Model architectures in DeepLabCut PyTorch are defined using configuration files written in YAML format. These configuration files specify the model architecture, training hyperparameters, data augmentation settings, and more. + +## Configuration File Structure + +The primary configuration file is named `pytorch_cfg.yaml` and is stored in the model's training directory. This file is automatically generated during the standard DeepLabCut workflow, but can also be created manually for custom projects. + +## Creating Configuration Files + +The [`deeplabcut.pose_estimation_pytorch.config`][] module provides functions to create and manipulate configuration files. + +### Basic Configuration Creation + +Use [`make_pytorch_pose_config`][deeplabcut.pose_estimation_pytorch.config.make_pytorch_pose_config] to generate a model configuration: + +```python +from pathlib import Path +import deeplabcut.pose_estimation_pytorch as dlc_torch + +# Configuration for a DeepLabCut project +project_cfg = { + "bodyparts": ["nose", "left_ear", "right_ear", "tail_base"], + "individuals": ["mouse1", "mouse2"], + # ... other project settings +} + +pose_config_path = Path("/path/to/model/train") +model_cfg = dlc_torch.config.make_pytorch_pose_config( + project_config=project_cfg, + pose_config_path=pose_config_path, + net_type="hrnet_w32", + top_down=True, + save=True, # Save the configuration to disk +) +``` + +### Configuration for COCO Datasets + +For COCO-format datasets without a DeepLabCut project, use [`make_basic_project_config`][deeplabcut.pose_estimation_pytorch.config.make_basic_project_config]: + +```python +from pathlib import Path +import deeplabcut.pose_estimation_pytorch as dlc_torch + +# Create a minimal project configuration +project_cfg = dlc_torch.config.make_basic_project_config( + dataset_path="/path/to/COCOProject", + bodyparts=["nose", "left_eye", "right_eye", "left_ear", "right_ear"], + max_individuals=2, + multi_animal=True, +) + +# Generate model configuration +model_cfg = dlc_torch.config.make_pytorch_pose_config( + project_config=project_cfg, + pose_config_path=Path("/path/to/experiment/train"), + net_type="dlcrnet_ms5", + top_down=False, + save=True, +) +``` + +## Configuration File Components + +A complete `pytorch_cfg.yaml` file contains the following sections. + +### Model Architecture + +Specifies the backbone, optional neck, and head: + +```yaml +model: + backbone: + type: HRNet + variant: w32 + neck: null # omit or set to null for no neck + head: + type: HeatmapHead + weight_init: normal + predictor: + type: HeatmapPredictor + location_refinement: true + locref_std: 7.2801 + target_generator: + type: HeatmapGaussianGenerator + num_heatmaps: "num_bodyparts" + pos_dist_thresh: 17 + generate_locref: true + criterion: + heatmap: + type: WeightedMSECriterion + weight: 1.0 + locref: + type: WeightedHuberCriterion + weight: 0.05 +``` + +### Data Configuration + +Controls data loading, augmentation, and preprocessing. Augmentations under `train` are applied only during training; `inference` augmentations are applied during evaluation and video analysis: + +```yaml +data: + colormode: RGB # RGB or GRAY + bbox_margin: 20 # pixels added around bounding boxes (top-down only) + train: + normalize_images: true + crop_sampling: + width: 448 + height: 448 + max_shift: 0.1 + method: hybrid + affine: + p: 0.5 + rotation: 30 + scaling: [0.5, 1.25] + translation: 0 + gaussian_noise: 12.75 + motion_blur: true + hflip: true + inference: + normalize_images: true +``` + +### Training Settings + +Controls the training loop — batch size, number of epochs, data loading, and random seed: + +```yaml +train_settings: + batch_size: 8 + epochs: 200 + seed: 42 + dataloader_workers: 4 + dataloader_pin_memory: true + display_iters: 500 +``` + +### Runner Configuration + +The runner manages the training loop, optimisation, checkpointing, and evaluation. Use any optimizer from [`torch.optim`](https://pytorch.org/docs/stable/optim.html) and any scheduler from [`torch.optim.lr_scheduler`](https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate): + +```yaml +runner: + type: PoseTrainingRunner + gpus: null # null = use device setting; list of ints for multi-GPU + key_metric: "test.mAP" + key_metric_asc: true # true if higher is better + eval_interval: 10 # evaluate every N epochs + + optimizer: + type: AdamW + params: + lr: 0.0001 + weight_decay: 0.01 + + scheduler: + type: LRListScheduler + params: + milestones: [160, 190] + lr_list: [[1e-5], [1e-6]] + + snapshots: + max_snapshots: 5 # keep only the N most recent snapshots + save_epochs: 25 # save a snapshot every N epochs + save_optimizer_state: false + + logger: + type: WandbLogger # omit or set to null for local-only logging + project_name: my-project + tags: ["model=hrnet_w32"] +``` + +### Resuming Training + +Resume from a specific snapshot by setting: + +```yaml +resume_training_from: /path/to/model/train/snapshot-010.pt +``` + +### Inference Configuration + +Controls inference-specific behaviour, set independently of training augmentations: + +```yaml +method: td # bu = bottom-up, td = top-down, ctd = conditional top-down +device: auto # auto, cpu, cuda, or cuda:N +``` + +### Top-Down Detector Configuration + +Top-down models require a separate detector. The detector `pytorch_cfg.yaml` mirrors the pose model structure but uses `DetectorTrainingRunner`: + +```yaml +runner: + type: DetectorTrainingRunner + key_metric: "test.mAP@50:95" + key_metric_asc: true + eval_interval: 10 + optimizer: + type: AdamW + params: + lr: 1e-4 + scheduler: + type: LRListScheduler + params: + milestones: [160] + lr_list: [[1e-5]] + snapshots: + max_snapshots: 5 + save_epochs: 25 + save_optimizer_state: false + +train_settings: + batch_size: 1 + epochs: 250 + dataloader_workers: 0 + display_iters: 500 +``` diff --git a/dev-docs/docs/custom_models.md b/dev-docs/docs/custom_models.md new file mode 100644 index 0000000000..916bd0b865 --- /dev/null +++ b/dev-docs/docs/custom_models.md @@ -0,0 +1,201 @@ +# Adding Custom Models + +DeepLabCut's PyTorch backend uses a registry system that makes it easy to add custom model components. This guide explains how to extend the framework with your own backbones, necks, heads, predictors, and target generators. + +## Model Registry System + +The registry system allows you to register custom components that can be instantiated from configuration files. Five registries are available for different model building blocks: + +- [`BACKBONES`][deeplabcut.pose_estimation_pytorch.models.backbones] - Feature extraction networks +- [`NECKS`][deeplabcut.pose_estimation_pytorch.models.necks] - Intermediate processing layers +- [`HEADS`][deeplabcut.pose_estimation_pytorch.models.heads] - Task-specific output layers +- [`PREDICTORS`][deeplabcut.pose_estimation_pytorch.models.predictors] - Output-to-keypoint converters +- [`TARGET_GENERATORS`][deeplabcut.pose_estimation_pytorch.models.target_generators] - Annotation-to-target converters + +## Custom Backbones + +A backbone extracts features from input images. Subclass [`BaseBackbone`][deeplabcut.pose_estimation_pytorch.models.backbones.BaseBackbone] and decorate with `@BACKBONES.register_module`: + +```python +import torch +import torch.nn as nn +from deeplabcut.pose_estimation_pytorch.models.backbones import BACKBONES, BaseBackbone + + +@BACKBONES.register_module +class MyBackbone(BaseBackbone): + def __init__(self, out_channels: int = 256, pretrained: bool = False): + # stride is the total downsampling factor of the network + super().__init__(stride=32) + self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3) + self.conv2 = nn.Conv2d(64, out_channels, kernel_size=3, stride=2, padding=1) + # ... rest of architecture + + def forward(self, x: torch.Tensor) -> torch.Tensor: + x = self.conv1(x) + x = self.conv2(x) + return x +``` + +Use the custom backbone in a configuration file: + +```yaml +model: + backbone: + type: MyBackbone + out_channels: 256 + pretrained: false +``` + +## Custom Necks + +A neck processes the backbone's feature map before it reaches the head. Subclass [`BaseNeck`][deeplabcut.pose_estimation_pytorch.models.necks.BaseNeck] and register with `@NECKS.register_module`: + +```python +import torch +import torch.nn as nn +from deeplabcut.pose_estimation_pytorch.models.necks import NECKS, BaseNeck + + +@NECKS.register_module +class MyNeck(BaseNeck): + def __init__(self, in_channels: int = 256, out_channels: int = 256): + super().__init__() + self.project = nn.Conv2d(in_channels, out_channels, kernel_size=1) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return self.project(x) +``` + +Reference in configuration: + +```yaml +model: + neck: + type: MyNeck + in_channels: 256 + out_channels: 128 +``` + +## Custom Heads + +A head produces task-specific predictions from the neck's or backbone's output. Subclass [`BaseHead`][deeplabcut.pose_estimation_pytorch.models.heads.BaseHead] and register with `@HEADS.register_module`: + +```python +import torch +import torch.nn as nn +from deeplabcut.pose_estimation_pytorch.models.heads import HEADS, BaseHead +from deeplabcut.pose_estimation_pytorch.models.predictors import BasePredictor +from deeplabcut.pose_estimation_pytorch.models.target_generators import BaseGenerator + + +@HEADS.register_module +class MyHead(BaseHead): + def __init__( + self, + predictor: BasePredictor, + target_generator: BaseGenerator, + criterion: dict, + aggregator=None, + in_channels: int = 256, + num_keypoints: int = 17, + ): + super().__init__( + predictor=predictor, + target_generator=target_generator, + criterion=criterion, + aggregator=aggregator, + ) + self.conv = nn.Conv2d(in_channels, num_keypoints, kernel_size=1) + + def forward(self, x: torch.Tensor) -> dict[str, torch.Tensor]: + heatmap = self.conv(x) + return {"heatmap": heatmap} +``` + +## Custom Predictors + +A predictor converts a head's raw output tensors into keypoint coordinates. Subclass [`BasePredictor`][deeplabcut.pose_estimation_pytorch.models.predictors.BasePredictor] and register with `@PREDICTORS.register_module`: + +```python +import torch +from deeplabcut.pose_estimation_pytorch.models.predictors import PREDICTORS, BasePredictor + + +@PREDICTORS.register_module +class MyPredictor(BasePredictor): + def forward( + self, + stride: float, + outputs: dict[str, torch.Tensor], + ) -> dict[str, torch.Tensor]: + heatmap = outputs["heatmap"] + # Locate the maximum value in each heatmap channel + batch, num_kpts, h, w = heatmap.shape + flat = heatmap.flatten(2) + idx = flat.argmax(dim=-1) + x = (idx % w).float() * stride + y = (idx // w).float() * stride + coords = torch.stack([x, y], dim=-1) # (batch, num_kpts, 2) + scores = flat.max(dim=-1).values + return {"coordinates": coords, "scores": scores} +``` + +## Custom Target Generators + +A target generator converts ground-truth annotations into training target tensors. Subclass [`BaseGenerator`][deeplabcut.pose_estimation_pytorch.models.target_generators.BaseGenerator] and register with `@TARGET_GENERATORS.register_module`: + +```python +import torch +import torch.nn as nn +from deeplabcut.pose_estimation_pytorch.models.target_generators import ( + TARGET_GENERATORS, + BaseGenerator, +) + + +@TARGET_GENERATORS.register_module +class MyTargetGenerator(BaseGenerator): + def __init__(self, num_heatmaps: int, sigma: float = 2.0): + super().__init__() + self.num_heatmaps = num_heatmaps + self.sigma = sigma + + def forward( + self, + annotations: dict, + stride: float, + output_size: tuple[int, int], + ) -> dict[str, torch.Tensor]: + # Build Gaussian heatmaps for each keypoint annotation + # Returns a dict matching the keys produced by the head + ... + return {"heatmap": target_heatmaps, "heatmap_mask": mask} +``` + +## Using Custom Components in Configuration + +Once registered, custom components are referenced by class name in `pytorch_cfg.yaml`. Make sure the module containing your class is imported before the configuration is loaded (e.g. at the top of your training script): + +```python +# my_components.py — import this before calling dlc_torch.train() +from deeplabcut.pose_estimation_pytorch.models.backbones import BACKBONES, BaseBackbone +import torch.nn as nn + + +@BACKBONES.register_module +class MyBackbone(BaseBackbone): + ... +``` + +Then reference it in the configuration: + +```yaml +model: + backbone: + type: MyBackbone # matched by class name in the registry + out_channels: 256 + head: + type: HeatmapHead # built-in head, paired with your backbone + ... +``` diff --git a/dev-docs/docs/dataprep.md b/dev-docs/docs/dataprep.md new file mode 100644 index 0000000000..905cb24512 --- /dev/null +++ b/dev-docs/docs/dataprep.md @@ -0,0 +1,140 @@ +# Data Preparation and Loading + +The [`deeplabcut.pose_estimation_pytorch.data`][] package provides comprehensive tools for dataset creation, train/test splitting, and data augmentation. This guide covers the data loaders, datasets, and transforms available in the PyTorch backend. + +## Data Loaders + +DeepLabCut provides two main data loader classes for different dataset formats: + +### DLCLoader + +The [`DLCLoader`][deeplabcut.pose_estimation_pytorch.data.DLCLoader] class loads labeled data from a DeepLabCut project. It handles train/test splitting, configuration loading, and dataset creation for a specific shuffle. + +**Basic Usage:** + +```python +import deeplabcut.pose_estimation_pytorch as dlc_torch + +loader = dlc_torch.DLCLoader( + config="/path/to/project/config.yaml", + trainset_index=0, + shuffle=1, +) + +# Access loader properties +print(loader.model_folder) # Path to model directory +print(loader.evaluation_folder) # Path to evaluation directory +print(loader.pose_task) # Task type (BOTTOM_UP, TOP_DOWN, etc.) + +# View the data +print(loader.df) # Complete dataset as DataFrame +print(loader.df_train) # Training split +print(loader.df_test) # Test split +``` + +**Creating Datasets:** + +```python +# Create training dataset +train_dataset = loader.create_dataset( + transform=dlc_torch.build_transforms(loader.model_cfg["data"]["train"]), + mode="train", + task=loader.pose_task, +) + +# Create validation dataset +valid_dataset = loader.create_dataset( + transform=dlc_torch.build_transforms(loader.model_cfg["data"]["inference"]), + mode="test", + task=loader.pose_task, +) + +# Check dataset size +print(f"Training samples: {len(train_dataset)}") +print(f"Validation samples: {len(valid_dataset)}") +``` + +### COCOLoader + +The [`COCOLoader`][deeplabcut.pose_estimation_pytorch.data.COCOLoader] class enables training on datasets in COCO format without creating a DeepLabCut project. This is useful for working with public datasets or custom data pipelines. + +**COCO Dataset Structure:** + +``` +COCOProject/ +├── annotations/ +│ ├── train.json +│ └── test.json +└── images/ + ├── img0000.png + ├── img0001.png + └── ... +``` + +**Working with COCO Data:** + +```python +from pathlib import Path +import deeplabcut.pose_estimation_pytorch as dlc_torch + +project_root = Path("/path/to/COCOProject") + +# Parse dataset information +train_dict = dlc_torch.COCOLoader.load_json( + project_root, + filename="train.json" +) +max_num_individuals, bodyparts = dlc_torch.COCOLoader.get_project_parameters(train_dict) + +# Create model configuration +model_cfg = dlc_torch.config.make_pytorch_pose_config( + project_config=dlc_torch.config.make_basic_project_config( + dataset_path=str(project_root), + bodyparts=bodyparts, + max_individuals=max_num_individuals, + multi_animal=True, + ), + pose_config_path=project_root / "experiments" / "hrnet_w32" / "train", + net_type="hrnet_w32", + top_down=True, + save=True, +) + +# Create loader +loader = dlc_torch.COCOLoader( + project_root=project_root, + model_config_path=project_root / "experiments" / "hrnet_w32" / "train" / "pytorch_cfg.yaml", + train_json_filename="train.json", + test_json_filename="test.json", +) + +# Create datasets +train_dataset = loader.create_dataset( + transform=dlc_torch.build_transforms(loader.model_cfg["data"]["train"]), + mode="train", + task=loader.pose_task, +) +``` + +**Image Path Resolution:** + +COCO JSON files can specify image paths in two ways: + +1. **Relative paths**: Resolved to the `images/` folder + - `"file_name": "img0000.png"` → `/path/to/COCOProject/images/img0000.png` + - `"file_name": "subfolder/img0000.png"` → `/path/to/COCOProject/images/subfolder/img0000.png` + +2. **Absolute paths**: Used directly without resolution + - `"file_name": "/data/disk2/images/img0000.png"` → `/data/disk2/images/img0000.png` + +This allows you to keep images on different disks or reuse images across projects without duplication. + +## PoseDataset + +The [`PoseDataset`][deeplabcut.pose_estimation_pytorch.data.PoseDataset] class extends `torch.utils.data.Dataset` and converts raw images and keypoints into tensors for training and evaluation. + +- Loads images and annotations +- Applies data augmentation transforms +- Generates training targets using the model's target generator +- Handles multi-animal and single-animal data +- Supports dynamic cropping for top-down models diff --git a/dev-docs/docs/devguides.md b/dev-docs/docs/devguides.md new file mode 100644 index 0000000000..4714191dd8 --- /dev/null +++ b/dev-docs/docs/devguides.md @@ -0,0 +1,181 @@ +# Developer Guides + +This section provides comprehensive guides for developers working with the DeepLabCut PyTorch backend. Whether you're training models, running inference, or building custom architectures, these guides will help you understand and extend the framework. + +## Guide Overview + +### Model Architecture + +The [Model Architecture](models.md) guide explains the modular design of DeepLabCut models. Learn about: + +- **Backbones**: Feature extraction networks (ResNet, HRNet, CSPNext) +- **Necks**: Optional intermediate processing layers +- **Heads**: Task-specific output layers for pose estimation +- **Predictors**: Converting model outputs to keypoint predictions +- **Target Generators**: Creating training targets from annotations +- **Available Models**: Pre-configured architectures for various tasks + +This guide is essential for understanding how components work together to form complete pose estimation systems. + +### Configuration Files + +The [Configuration Files](configuration.md) guide covers model and training configuration. Topics include: + +- Creating configuration files with [`make_pytorch_pose_config`][deeplabcut.pose_estimation_pytorch.config.make_pytorch_pose_config] +- YAML configuration structure and parameters +- Model architecture specification +- Training hyperparameters (optimizer, scheduler, batch size) +- Data augmentation settings +- Reading and writing configurations +- Using configuration templates + +Understanding configuration is crucial for customizing model behavior and training settings. + +### Data Preparation + +The [Data Preparation](dataprep.md) guide explains data loading and augmentation. Learn about: + +- [`DLCLoader`][deeplabcut.pose_estimation_pytorch.data.DLCLoader] for DeepLabCut projects +- [`COCOLoader`][deeplabcut.pose_estimation_pytorch.data.COCOLoader] for COCO-format datasets +- [`PoseDataset`][deeplabcut.pose_estimation_pytorch.data.PoseDataset] for PyTorch data loading +- Data augmentation transforms +- Train/test splitting +- Custom collate functions for multi-animal scenarios +- Snapshot management + +Proper data preparation is key to training robust models. + +### Training Models + +The [Training Models](training.md) guide covers the complete training workflow. Topics include: + +- High-level training APIs: [`train`][deeplabcut.pose_estimation_pytorch.apis.train] and [`train_network`][deeplabcut.pose_estimation_pytorch.apis.train_network] +- Training on DeepLabCut projects and COCO datasets +- [`PoseTrainingRunner`][deeplabcut.pose_estimation_pytorch.runners.PoseTrainingRunner] for custom training loops +- Optimizers and learning rate schedulers +- Checkpointing and resuming training +- Logging with Weights & Biases +- Training top-down models (detector and pose estimation) +- Distributed training across multiple GPUs +- Performance optimization tips + +This guide helps you train models efficiently and monitor progress effectively. + +### Inference and Analysis + +The [Inference & Analysis](inference.md) guide explains how to run predictions. Learn about: + +- High-level APIs: [`analyze_videos`][deeplabcut.pose_estimation_pytorch.apis.analyze_videos] and [`analyze_images`][deeplabcut.pose_estimation_pytorch.apis.analyze_images] +- Low-level [`video_inference`][deeplabcut.pose_estimation_pytorch.apis.video_inference] API +- [`PoseInferenceRunner`][deeplabcut.pose_estimation_pytorch.runners.PoseInferenceRunner] for pose estimation +- [`DetectorInferenceRunner`][deeplabcut.pose_estimation_pytorch.runners.DetectorInferenceRunner] for object detection +- [`VideoIterator`][deeplabcut.pose_estimation_pytorch.apis.VideoIterator] for efficient video processing +- Running inference outside DeepLabCut projects +- Using custom bounding boxes with top-down models +- Model evaluation and metrics +- Visualization and labeled video creation +- Batch processing and performance optimization + +Master inference to efficiently analyze your data and evaluate model performance. + +### Adding Custom Models + +The [Adding Custom Models](custom_models.md) guide shows how to extend the framework. Topics include: + +- The model registry system (BACKBONES, NECKS, HEADS, PREDICTORS, TARGET_GENERATORS) +- Creating custom backbones for feature extraction +- Implementing custom heads for new output types +- Building custom predictors for keypoint extraction +- Designing custom target generators for training +- Creating custom necks for feature processing +- Using custom components in configuration files +- Best practices for extensibility + +This guide empowers you to implement novel architectures and techniques. + +## Core Concepts + +### Modular Design + +DeepLabCut PyTorch uses a modular architecture where models are composed of interchangeable components. + +### Registry System + +Components are registered using decorators, making them available for instantiation from configuration files: + +```python +from deeplabcut.pose_estimation_pytorch.models.backbones import BACKBONES, BaseBackbone + +@BACKBONES.register_module +class MyBackbone(BaseBackbone): + def __init__(self, ...): + super().__init__(stride=32) + # Implementation + + def forward(self, x): + # Forward pass + return x +``` + +This pattern is used for all model components (backbones, necks, heads, predictors, target generators). + +### Configuration-Driven + +Models and training are controlled through YAML configuration files: + +```yaml +model: + backbone: + type: "HRNet" + variant: "w32" + head: + type: "HeatmapHead" + num_bodyparts: 17 + +train: + optimizer: + type: "AdamW" + lr: 0.0001 + epochs: 200 +``` + +This approach separates architecture definition from code, making experimentation easier. + +### Task-Based Design + +Different pose estimation approaches are handled through the [`Task`][deeplabcut.pose_estimation_pytorch.task.Task] enum: + +- `BOTTOM_UP`: Detect all keypoints then group into individuals +- `TOP_DOWN`: Detect individuals then estimate pose for each +- `COND_TOP_DOWN`: Conditional top-down approach + +The same APIs work across tasks, with the framework handling task-specific logic internally. + +## API Patterns + +### High-Level vs Low-Level APIs + +DeepLabCut provides both high-level and low-level APIs: + +- **High-Level**: Simple functions for common tasks (training, video analysis) +- **Low-Level**: Detailed control through runners and components + +Start with high-level APIs and move to low-level when you need more control. + +### Loaders + +Data loaders abstract dataset access: + +- Use [`DLCLoader`][deeplabcut.pose_estimation_pytorch.data.DLCLoader] for standard DeepLabCut projects +- Use [`COCOLoader`][deeplabcut.pose_estimation_pytorch.data.COCOLoader] for COCO-format data +- Both provide consistent interfaces for dataset creation + +### Runners + +Runners encapsulate training and inference logic: + +- [`PoseTrainingRunner`][deeplabcut.pose_estimation_pytorch.runners.PoseTrainingRunner]: Manages training loops +- [`PoseInferenceRunner`][deeplabcut.pose_estimation_pytorch.runners.PoseInferenceRunner]: Handles pose estimation +- [`DetectorInferenceRunner`][deeplabcut.pose_estimation_pytorch.runners.DetectorInferenceRunner]: Handles object detection + +Runners can be created from configuration or instantiated directly for custom workflows. diff --git a/dev-docs/docs/examples.md b/dev-docs/docs/examples.md new file mode 100644 index 0000000000..2cdc537d99 --- /dev/null +++ b/dev-docs/docs/examples.md @@ -0,0 +1,134 @@ +# Code Examples + +This guide provides practical examples for common DeepLabCut PyTorch workflows. + +## Training a Model on a COCO Dataset + +```python +from pathlib import Path + +import deeplabcut.pose_estimation_pytorch as dlc_torch + +# Specify project paths +project_root = Path("/path/to/my/COCOProject") +train_json_filename = "train.json" +test_json_filename = "test.json" + +loader = dlc_torch.COCOLoader( + project_root=project_root, + model_config_path="/path/to/my/project/experiments/pytorch_config.yaml", + train_json_filename=train_json_filename, + test_json_filename=test_json_filename, +) +dlc_torch.train( + loader=loader, + run_config=loader.model_cfg, + task=dlc_torch.Task(loader.model_cfg["method"]), + device="cuda:2", + logger_config=dict( + type="WandbLogger", + project_name="MyWandbProject", + tags=["model=hrnet_w32"], + ), + snapshot_path=None, +) +``` + +## Running Video Analysis outside a DeepLabCut Project + +DeepLabCut provides high-level APIs (via the GUI or the python package) to analyze your data. The usage of this API assumes the existence of a DLC project (with `config.yaml` file, etc.). + +Sometimes it might be more convenient to just run a model on your data via a low-level API. We also use this API under the hood, in particular for the Model Zoo. Check out the example below: + +```python +from deeplabcut.core.config import read_config_as_dict +from pathlib import Path + +import deeplabcut.pose_estimation_pytorch as dlc_torch + +train_dir = Path("/Users/Jaylen/my-dlc-models/train") +pytorch_config_path = train_dir / "pytorch_config.yaml" +snapshot_path = train_dir / "snapshot-100.pt" + +# for top-down models, otherwise None +detector_snapshot_path = train_dir / "detector-snapshot-100.pt" + +# video and inference parameters +video_path = Path("/Users/Jaylen/my-dlc-models/videos/test-video.mp4") +max_num_animals = 5 +batch_size = 16 +detector_batch_size = 8 + +# read model configuration +model_cfg = read_config_as_dict(pytorch_config_path) +pose_task = dlc_torch.Task(model_cfg["method"]) +pose_runner = dlc_torch.get_pose_inference_runner( + model_config=model_cfg, + snapshot_path=snapshot_path, + max_individuals=max_num_animals, + batch_size=batch_size, +) + +detector_runner = None +if pose_task == dlc_torch.Task.TOP_DOWN: + detector_runner = dlc_torch.get_detector_inference_runner( + model_config=model_cfg, + snapshot_path=detector_snapshot_path, + max_individuals=max_num_animals, + batch_size=detector_batch_size, + ) + +predictions = dlc_torch.video_inference( + video=video_path, + pose_runner=pose_runner, + detector_runner=detector_runner, +) +``` + +## Running Top-Down Video Analysis with Existing Bounding Boxes + +When `deeplabcut.pose_estimation_pytorch.apis.videos.video_inference` is called with a top-down model, it is assumed that a detector snapshot is given as well to obtain bounding boxes with which to run pose estimation. It's possible that you've already obtained bounding boxes for your video (with another object detector or through some other means), and you want to reuse those bounding boxes instead of running an object detector again. + +You can easily do so by writing a bit of custom code, as shown in the example below: + +```python +from deeplabcut.core.config import read_config_as_dict +from pathlib import Path + +import numpy as np +import deeplabcut.pose_estimation_pytorch as dlc_torch +from tqdm import tqdm + +# create an iterator for your video +video = dlc_torch.VideoIterator("/Users/Jayson/my-cool-video.mp4") + +# dummy bboxes - you can load yours from a file or in another way +# the bboxes should be in `xywh` format, i.e. (x_top_left, y_top_left, width, height) +bounding_boxes = [ + dict( # frame 0 bounding boxes + bboxes=np.array([[12, 37, 120, 78]]), + ), + dict( # frame 1 bounding boxes + bboxes=np.array([[17, 45, 128, 73], [532, 34, 117, 87]]), + ), + # ... + dict( # frame N bboxes -> must be equal to the number of frames in the video! + bboxes=np.array([[17, 45, 128, 73], [532, 34, 117, 87]]), + ), +] +video.set_context(bounding_boxes) +max_individuals = np.max([len(context["bboxes"]) for context in bounding_boxes]) + +# run inference! +model_cfg = read_config_as_dict("/Users/Jayson/pytorch_config.yaml") +pose_runner = dlc_torch.get_pose_inference_runner( + model_config=model_cfg, + snapshot_path=Path("/Users/Jayson/model-snapshot.pt"), + max_individuals=max_individuals, + batch_size=32, +) + +# your predictions will be a list, containing the predictions made for each frame +# as a dict (with keys for "bodyparts" but also "bboxes")! +predictions = pose_runner.inference(images=tqdm(video)) +``` diff --git a/dev-docs/docs/index.md b/dev-docs/docs/index.md new file mode 100644 index 0000000000..7c0f4d7d39 --- /dev/null +++ b/dev-docs/docs/index.md @@ -0,0 +1,76 @@ +# DeepLabCut PyTorch API Documentation + +
+
+
"
+ exclude_private: true
+ show_full_namespace: false
+ on_implicit_namespace_package: "warn"
+ - mkdocstrings:
+ default_handler: python
+ handlers:
+ python:
+ paths: [".."] # repo root, so `import deeplabcut` resolves correctly
+ options:
+ summary: true
+ show_root_heading: true
+ show_symbol_type_heading: true
+ parameter_headings: true
+ docstring_options:
+ returns_multiple_items: false
+ docstring_section_style: table
+ separate_signature: true
+ line_length: 88
+ show_signature_annotations: true
+ inventories:
+ - url: https://docs.python.org/3/objects.inv
+ domains: [py, std]
+ - url: https://typing-extensions.readthedocs.io/en/latest/objects.inv
+ - url: https://pytorch.org/docs/stable/objects.inv
+ - search # adds a search bar to the header
+ - social # generates social cards rendering as preview images on social media
+
+markdown_extensions:
+ - attr_list # adds support for setting attributes (ids) on elements
+ - pymdownx.highlight: # adds support for syntax highlighting of code blocks
+ anchor_linenums: true
+ line_spans: __span
+ pygments_lang_class: true
+ - pymdownx.arithmatex # adds support for rendering LaTeX math expressions
+ - pymdownx.inlinehilite # add support for syntax highlighting of inline code blocks
+ - pymdownx.snippets # adds the ability to embed content from arbitrary files into a document
+ - pymdownx.superfences # allows for arbitrary nesting of code and content blocks inside each other
+ - pymdownx.tabbed: # provides a syntax to easily add tabbed Markdown content.
+ alternate_style: true
+ - pymdownx.blocks.details # allows for the creation of collapsible details/summary constructs.
+ - pymdownx.blocks.admonition # allows for the creation of admonitions.
+ - pymdownx.blocks.definition # allows for the creation of definition lists.
+ - pymdownx.blocks.tab # allows for the creation of tab containers.
+ - pymdownx.blocks.html # allows for the arbitrary creation of HTML elements of various types.
+
+extra:
+ version:
+ provider: mike
+ default: main
+
+copyright: "Copyright © 2026 The DeepLabCut Team"
+
+nav:
+ - Get Started:
+ - Overview: index.md
+ # - Installation: installation.md
+ - Quick Start: quickstart.md
+ - Developer Guides:
+ - Overview: devguides.md
+ - Model Architecture: models.md
+ # - Configuration Files: configuration.md
+ - Data Preparation: dataprep.md
+ - Training Models: training.md
+ - Inference & Analysis: inference.md
+ # - Adding Custom Models: custom_models.md
+ # - Multi-animal Tracking: tracking.md
+ - Examples:
+ - Practical Examples: examples.md
+ - API Reference
diff --git a/docs/dev.md b/docs/dev.md
new file mode 100644
index 0000000000..678a867867
--- /dev/null
+++ b/docs/dev.md
@@ -0,0 +1,5 @@
+# Developer Documentation
+
+
+
+If you are not redirected automatically, please go to [Developer docs](../dev/index.html).
diff --git a/pyproject.toml b/pyproject.toml
index a578f17dd9..43e8680123 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -79,6 +79,16 @@ docs = [
"numpydoc",
"sphinxcontrib-mermaid",
]
+dev-docs = [
+ "black>=24",
+ "mike>=2.1",
+ "mkdocs>=1.6",
+ "mkdocs-api-autonav>=0.1",
+ "mkdocs-autorefs>=1.2",
+ "mkdocs-jupyter>=0.25",
+ "mkdocs-material[imaging]>=9.5",
+ "mkdocstrings[python]>=0.27",
+]
fmpose3d = [ "fmpose3d>=0.0.8" ]
# Use only one of [tf, tf-cu11, tf-cu12, tf-latest]. Do not combine extras.
tf = [