diff --git a/README.md b/README.md deleted file mode 100755 index 9f97006..0000000 --- a/README.md +++ /dev/null @@ -1,159 +0,0 @@ -# Welcome - -This is the codebase of the paper titled "Vector Detection Network: Pointer Detector for Robots Reading Analog Meters in the Wild". -We assume you download it with `git clone `, and the code folder `/VDN` is located in `~`. - -OS: Ubuntu 16.04 or 18.04 -Language: Python 3.6+ -Deep learning framework: PyTorch - -# Prerequisites - -## Hardware - -Make sure the PC is with 8 GB or more GRAM. - -Please make sure you have the correct version of the Nvidia driver installed, and that is compatible with your card. - -## Software - -We use Docker to ease the process of building the environment for running VDN. The installation of Docker can be done by: - -``` -wget -qO- https://get.docker.com/ | sh -systemctl enable docker.service -``` - -Meanwhile, you should also install [nvidia-docker][nv] plugin. To be brief, this is a quick guide: - -``` -# Add the package repositories -curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add - - -distribution=$(. /etc/os-release;echo $ID$VERSION_ID) - -curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | \ - sudo tee /etc/apt/sources.list.d/nvidia-docker.list - -sudo apt-get update - -# Install nvidia-docker2 and reload the Docker daemon configuration -sudo apt-get install -y nvidia-docker2 -sudo pkill -SIGHUP dockerd -``` - -Other than that, all software dependence can be handled within the Docker container. -A detailed software dependence list could be found in `VDN/Dockerfile`. -For anonymity concern, we do not provide our docker image, yet you may build one exactly as ours by: - -``` -cd ~/VDN -docker build --tag=vdn/vdn . -``` - -# File structure - -This repo is organized as follows: - -``` -. -+-- cfgs # The configurations of different network architectures -| -+-- compiled # Compiled third-party libraries -| -+-- data -| +-- demo -| +-- result -| -+-- libs # VDN libraries -| -+-- modules # The VDN class -| -+-- utils # Some handy utilities -| -+-- weights -| +-- pretrained -| -+-- .dockerignore -+-- .gitignore -+-- add_aliases.sh # Bash script for adding Docker shortcuts to bash_aliaes -+-- Dockerfile -+-- LICENSE -+-- README.md -| -+-- demo.py # Quick demo for a demonstration -+-- train.py # Training script of VDN -+-- test.py # Evaluation and experiments for VDN - -``` - -# Compile - -We have provided a bash script `add_aliases.sh` to insert some handy bash scripts within the file `~/.bash_aliases`. -It is recommended to do so in the root folder of this project: - -``` -bash add_aliases.sh -source ~/.bash_aliases -``` - -Then, before training or testing VDN, run this to compile the code: - -``` -vdn_compile -``` - -# The Pointer-10K dataset - -The Pointer-10K dataset referred to in our paper is publicly available for non-commercial usage. -If you are interested in the data, please contact us via email. The address will be released afterward. - - -# Basic Training - -Before training the VDN model, (i) make sure you have the Pointer-10K dataset located in `~/Database/Done/pointer_10k`. -(ii) Download the pre-trained ResNet model `resnet50-19c8e357.pth` for parameter initialization from -[torchvision](https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py) -and put it in `weights/pretrained/` (you may need to create the path manually). - -``` -# start the docker container -vdn_run - -# train -python train.py -``` - -# Run the demo - -To run the demo, put the trained model named as `vdn_best.pth.tar` into `weights/`, and -run the code below - -``` -# start the docker container -vdn_run - -# run the demo within the container -python demo.py -``` - -We provide the model trained by us: [download]() - -You can put your image into `VDN/data/demo`, and the algorithm will automatically find all images within the folder -and detect pointers in these images if any analog meters exist. Please note that VDN takes the image patches output -by a meter detector, with this the provided demo images should contain the whole dial face but not much background -nor only a part of the meters. - -The results of the demo will be output to automatically created folder `output/demo`. - -# Experiments - -You can use the `eval.py` script to perform the experiments conducted in the paper. -For example, to evaluate the performance of the default configuration (ResNet34 backbone -with 384x384 input size), just issue `python eval.py` in the root folder. Evaluations -of ResNet18 and ResNet50 could be executed in the `master` branch, -whereas Res2Net50 is evaluated in individual branch `res2net50`. - -The evaluation output could be found in `/VDN/output/eval-`. - - [nv]: \ No newline at end of file diff --git a/cfgs/resnet50/eval.yaml b/cfgs/res2net50/eval.yaml old mode 100755 new mode 100644 similarity index 81% rename from cfgs/resnet50/eval.yaml rename to cfgs/res2net50/eval.yaml index d877f3c..d52e653 --- a/cfgs/resnet50/eval.yaml +++ b/cfgs/res2net50/eval.yaml @@ -1,6 +1,6 @@ GPUS: '0' -OUTPUT_DIR: '/VDN/output/eval-resnet50' -LOG_DIR: '/VDN/logs/eval-resnet50' +OUTPUT_DIR: '/VDN/output/eval-res2net50' +LOG_DIR: '/VDN/logs' WORKERS: 4 DATASET: @@ -10,8 +10,8 @@ DATASET: ROT_FACTOR: 90 SCALE_FACTOR: 0.02 MODEL: - NAME: 'vdn_model' # This name should exist in /VDN/libs/models - PRETRAINED: '/VDN/weights/vdn_model_50_best.pth.tar' + NAME: 'vdn_res2net' # This name should exist in /VDN/libs/models + PRETRAINED: '/VDN/weights/vdn_res2net_50_best.pth.tar' IMAGE_SIZE: - 384 # width - 384 # height diff --git a/cfgs/resnet50/train.yaml b/cfgs/res2net50/train.yaml old mode 100755 new mode 100644 similarity index 86% rename from cfgs/resnet50/train.yaml rename to cfgs/res2net50/train.yaml index a9035fe..bee8f22 --- a/cfgs/resnet50/train.yaml +++ b/cfgs/res2net50/train.yaml @@ -1,5 +1,5 @@ GPUS: '0' -OUTPUT_DIR: '/VDN/output/train-resnet50' +OUTPUT_DIR: '/VDN/output/train-res2net50' LOG_DIR: '/VDN/logs' WORKERS: 4 @@ -11,8 +11,8 @@ DATASET: ROT_FACTOR: 90 SCALE_FACTOR: 0.02 MODEL: - NAME: 'vdn_model' - PRETRAINED: '/VDN/weights/pretrained/resnet50-19c8e357.pth' + NAME: 'vdn_res2net' # This name should exist in /VDN/libs/models + PRETRAINED: '/VDN/weights/pretrained/res2net50_26w_4s-06e79181.pth' IMAGE_SIZE: - 384 # width - 384 # height diff --git a/cfgs/resnet18/eval.yaml b/cfgs/resnet18/eval.yaml deleted file mode 100644 index 58fea8f..0000000 --- a/cfgs/resnet18/eval.yaml +++ /dev/null @@ -1,57 +0,0 @@ -GPUS: '0' -OUTPUT_DIR: '/VDN/output/eval-resnet18' -LOG_DIR: '/VDN/logs' -WORKERS: 4 - -DATASET: - DATASET: 'coco' - ROOT: '/Database/Done/pointer_10k' - TRAIN_SET: 'train_pointer' - TEST_SET: 'test_pointer' - ROT_FACTOR: 90 - SCALE_FACTOR: 0.02 -MODEL: - NAME: 'vdn_model' # This name should exist in /VDN/libs/models - PRETRAINED: '/VDN/weights/vdn_model_18_best.pth.tar' - IMAGE_SIZE: - - 384 # width - - 384 # height - NUM_JOINTS: 1 - EXTRA: - TARGET_TYPE: 'gaussian' - HEATMAP_SIZE: - - 96 - - 96 - SIGMA: 3 - FINAL_CONV_KERNEL: 1 - DECONV_WITH_BIAS: false - NUM_DECONV_LAYERS: 3 - NUM_DECONV_FILTERS: - - 256 - - 256 - - 256 - NUM_DECONV_KERNELS: - - 4 - - 4 - - 4 - NUM_LAYERS: 18 -LOSS: - USE_TARGET_WEIGHT: true -TEST: - BATCH_SIZE: 1 - COCO_BBOX_FILE: - BBOX_THRE: 1.0 - FLIP_TEST: false - IMAGE_THRE: 0.0 - IN_VIS_THRE: 0.2 - MODEL_FILE: '' - NMS_THRE: 1.0 - OKS_THRE: 0.9 - USE_GT_BBOX: true -DEBUG: - DEBUG: true - SAVE_BATCH_IMAGES_GT: true - SAVE_BATCH_IMAGES_PRED: true - SAVE_HEATMAPS_GT: true - SAVE_HEATMAPS_PRED: true - diff --git a/cfgs/resnet18/train.yaml b/cfgs/resnet18/train.yaml deleted file mode 100644 index db74e39..0000000 --- a/cfgs/resnet18/train.yaml +++ /dev/null @@ -1,73 +0,0 @@ -GPUS: '0' -OUTPUT_DIR: '/VDN/output/train-resnet18' -LOG_DIR: '/VDN/logs' -WORKERS: 4 - -DATASET: - DATASET: 'coco' - ROOT: '/Database/Done/pointer_10k' - TRAIN_SET: 'train_pointer' - TEST_SET: 'val_pointer' - ROT_FACTOR: 90 - SCALE_FACTOR: 0.02 -MODEL: - NAME: 'vdn_model' - PRETRAINED: '/VDN/weights/pretrained/resnet18-5c106cde.pth' - IMAGE_SIZE: - - 384 # width - - 384 # height - NUM_JOINTS: 1 - EXTRA: - TARGET_TYPE: 'gaussian' - HEATMAP_SIZE: - - 96 - - 96 - SIGMA: 3 - FINAL_CONV_KERNEL: 1 - DECONV_WITH_BIAS: false - NUM_DECONV_LAYERS: 3 - NUM_DECONV_FILTERS: - - 256 - - 256 - - 256 - NUM_DECONV_KERNELS: - - 4 - - 4 - - 4 - NUM_LAYERS: 18 -LOSS: - USE_TARGET_WEIGHT: true -TRAIN: - BATCH_SIZE: 8 - SHUFFLE: true - BEGIN_EPOCH: 0 - END_EPOCH: 200 - RESUME: false - OPTIMIZER: 'adam' - LR: 0.001 - LR_FACTOR: 0.1 - LR_STEP: - - 140 - - 190 - WD: 0.0001 - GAMMA1: 0.99 - GAMMA2: 0.0 - MOMENTUM: 0.9 - NESTEROV: false -TEST: - BATCH_SIZE: 1 - COCO_BBOX_FILE: - BBOX_THRE: 1.0 - FLIP_TEST: false - IMAGE_THRE: 0.0 - IN_VIS_THRE: 0.2 - MODEL_FILE: '' - NMS_THRE: 1.0 - OKS_THRE: 0.9 - USE_GT_BBOX: true -DEBUG: - DEBUG: true - SAVE_BATCH_IMAGES_GT: false - SAVE_BATCH_IMAGES_PRED: false - SAVE_HEATMAPS_GT: false - SAVE_HEATMAPS_PRED: false diff --git a/cfgs/resnet34/eval.yaml b/cfgs/resnet34/eval.yaml deleted file mode 100755 index 7691404..0000000 --- a/cfgs/resnet34/eval.yaml +++ /dev/null @@ -1,57 +0,0 @@ -GPUS: '0' -OUTPUT_DIR: '/VDN/output/eval-resnet34' -LOG_DIR: '/VDN/logs/eval-resnet34' -WORKERS: 4 - -DATASET: - DATASET: 'coco' - ROOT: '/Database/Done/pointer_10k' - TRAIN_SET: 'train_pointer' - TEST_SET: 'test_pointer' - ROT_FACTOR: 90 - SCALE_FACTOR: 0.02 -MODEL: - NAME: 'vdn_model' # This name should exist in /VDN/libs/models - PRETRAINED: '/VDN/weights/vdn_model_34_best.pth.tar' - IMAGE_SIZE: - - 384 # width - - 384 # height - NUM_JOINTS: 1 - EXTRA: - TARGET_TYPE: 'gaussian' - HEATMAP_SIZE: - - 96 - - 96 - SIGMA: 3 - FINAL_CONV_KERNEL: 1 - DECONV_WITH_BIAS: false - NUM_DECONV_LAYERS: 3 - NUM_DECONV_FILTERS: - - 256 - - 256 - - 256 - NUM_DECONV_KERNELS: - - 4 - - 4 - - 4 - NUM_LAYERS: 34 -LOSS: - USE_TARGET_WEIGHT: true -TEST: - BATCH_SIZE: 1 - COCO_BBOX_FILE: - BBOX_THRE: 1.0 - FLIP_TEST: false - IMAGE_THRE: 0.0 - IN_VIS_THRE: 0.2 - MODEL_FILE: '' - NMS_THRE: 1.0 - OKS_THRE: 0.9 - USE_GT_BBOX: true -DEBUG: - DEBUG: true - SAVE_BATCH_IMAGES_GT: true - SAVE_BATCH_IMAGES_PRED: true - SAVE_HEATMAPS_GT: true - SAVE_HEATMAPS_PRED: true - diff --git a/cfgs/resnet34/train.yaml b/cfgs/resnet34/train.yaml deleted file mode 100755 index 5e82741..0000000 --- a/cfgs/resnet34/train.yaml +++ /dev/null @@ -1,73 +0,0 @@ -GPUS: '0' -OUTPUT_DIR: '/VDN/output/train-resnet34' -LOG_DIR: '/VDN/logs' -WORKERS: 4 - -DATASET: - DATASET: 'coco' - ROOT: '/Database/Done/pointer_10k' - TRAIN_SET: 'train_pointer' - TEST_SET: 'val_pointer' - ROT_FACTOR: 90 - SCALE_FACTOR: 0.02 -MODEL: - NAME: 'vdn_model' - PRETRAINED: '/VDN/weights/pretrained/resnet34-333f7ec4.pth' - IMAGE_SIZE: - - 384 # width - - 384 # height - NUM_JOINTS: 1 - EXTRA: - TARGET_TYPE: 'gaussian' - HEATMAP_SIZE: - - 96 - - 96 - SIGMA: 3 - FINAL_CONV_KERNEL: 1 - DECONV_WITH_BIAS: false - NUM_DECONV_LAYERS: 3 - NUM_DECONV_FILTERS: - - 256 - - 256 - - 256 - NUM_DECONV_KERNELS: - - 4 - - 4 - - 4 - NUM_LAYERS: 34 -LOSS: - USE_TARGET_WEIGHT: true -TRAIN: - BATCH_SIZE: 2 - SHUFFLE: true - BEGIN_EPOCH: 0 - END_EPOCH: 160 - RESUME: false - OPTIMIZER: 'adam' - LR: 0.001 - LR_FACTOR: 0.1 - LR_STEP: - - 100 - - 140 - WD: 0.0001 - GAMMA1: 0.99 - GAMMA2: 0.0 - MOMENTUM: 0.9 - NESTEROV: false -TEST: - BATCH_SIZE: 1 - COCO_BBOX_FILE: - BBOX_THRE: 1.0 - FLIP_TEST: false - IMAGE_THRE: 0.0 - IN_VIS_THRE: 0.2 - MODEL_FILE: '' - NMS_THRE: 1.0 - OKS_THRE: 0.9 - USE_GT_BBOX: true -DEBUG: - DEBUG: true - SAVE_BATCH_IMAGES_GT: true - SAVE_BATCH_IMAGES_PRED: true - SAVE_HEATMAPS_GT: true - SAVE_HEATMAPS_PRED: true diff --git a/demo.py b/demo.py index e693773..007c026 100755 --- a/demo.py +++ b/demo.py @@ -1,52 +1,48 @@ -#!/usr/bin/env python -# coding=utf-8 - -import cv2 -import os -import unittest - -import modules.vdn as vdn - - -class Test(unittest.TestCase): - - def test_demo_default(self): - """You can change the backbone to resnet34, resnet101, res2net50 - if those models have been trained. - - """ - VDN = vdn.VectorDetectionNetwork(backbone='resnet34') - - demo_data_dir = "./data/demo" - if not os.path.exists(demo_data_dir): - raise FileNotFoundError(f'{demo_data_dir} not exist') - - file_list = os.listdir(demo_data_dir) - file_num = len(file_list) - - if not file_num: - print('No available image in data/demo') - return - - print(f'Got {file_num} image(s) for demo') - - total_spent = 0 - cnt = 0 - for k, item in enumerate(file_list): - image_path = os.path.join(demo_data_dir, item) - src_img = cv2.imread(image_path) - - print(f'Result of image {k+1}: {item}') - _, _, _, spent = VDN.get_vectors(src_img, verbose=item[:-4]) - - if k > 0: - # The first image is not counted due to loading time - total_spent += spent - cnt += 1 - - if total_spent > 0: - print('inference rate (fps): ', cnt/total_spent) - - -if __name__ == '__main__': - unittest.main() +#!/usr/bin/env python +# coding=utf-8 + +import cv2 +import os +import unittest + +import modules.vdn as vdn + + +class Test(unittest.TestCase): + + def test_demo_default(self): + VDN = vdn.VectorDetectionNetwork(backbone='res2net50') + + demo_data_dir = "./data/demo" + if not os.path.exists(demo_data_dir): + raise FileNotFoundError(f'{demo_data_dir} not exist') + + file_list = os.listdir(demo_data_dir) + file_num = len(file_list) + + if not file_num: + print('No available image in data/demo') + return + + print(f'Got {file_num} image(s) for demo') + + total_spent = 0 + cnt = 0 + for k, item in enumerate(file_list): + image_path = os.path.join(demo_data_dir, item) + src_img = cv2.imread(image_path) + + print(f'Result of image {k+1}: {item}') + _, _, _, spent = VDN.get_vectors(src_img, verbose=item[:-4]) + + if k > 0: + # The first image is not counted due to loading time + total_spent += spent + cnt += 1 + + if total_spent > 0: + print('inference rate (fps): ', cnt/total_spent) + + +if __name__ == '__main__': + unittest.main() diff --git a/eval.py b/eval.py index ad5ed87..0121ac6 100755 --- a/eval.py +++ b/eval.py @@ -10,10 +10,7 @@ class Test(unittest.TestCase): def test_eval(self): - """Available backbones: resnet34 (default), resnet18, resnet50. - - """ - vdn_instance = vdn.VectorDetectionNetwork(backbone='resnet34') + vdn_instance = vdn.VectorDetectionNetwork(backbone='res2net50') vdn_instance.eval() diff --git a/libs/core/config.py b/libs/core/config.py index 27968e5..1d2f1cb 100755 --- a/libs/core/config.py +++ b/libs/core/config.py @@ -47,7 +47,7 @@ config.MODEL.EXTRA = MODEL_EXTRAS[config.MODEL.NAME] config.MODEL.STYLE = 'pytorch' -config.MODEL.SCALE = 6 +config.MODEL.SCALE = 4 config.MODEL.BASEWIDTH = 26 config.LOSS = edict() diff --git a/libs/models/vdn_model.py b/libs/models/vdn_model.py deleted file mode 100755 index a949fe1..0000000 --- a/libs/models/vdn_model.py +++ /dev/null @@ -1,333 +0,0 @@ -import os -import logging - -import torch -import torch.nn as nn -from collections import OrderedDict - - -BN_MOMENTUM = 0.1 -logger = logging.getLogger(__name__) - - -def conv3x3(in_planes, out_planes, stride=1): - """3x3 convolution with padding""" - return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, - padding=1, bias=False) - - -class BasicBlock(nn.Module): - expansion = 1 - - def __init__(self, inplanes, planes, stride=1, downsample=None): - super(BasicBlock, self).__init__() - self.conv1 = conv3x3(inplanes, planes, stride) - self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) - self.relu = nn.ReLU(inplace=True) - self.conv2 = conv3x3(planes, planes) - self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) - self.downsample = downsample - self.stride = stride - - def forward(self, x): - residual = x - - out = self.conv1(x) - out = self.bn1(out) - out = self.relu(out) - - out = self.conv2(out) - out = self.bn2(out) - - if self.downsample is not None: - residual = self.downsample(x) - - out += residual - out = self.relu(out) - - return out - - -class Bottleneck(nn.Module): - expansion = 4 - - def __init__(self, inplanes, planes, stride=1, downsample=None): - super(Bottleneck, self).__init__() - self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) - self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) - self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, - padding=1, bias=False) - self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) - self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, - bias=False) - self.bn3 = nn.BatchNorm2d(planes * self.expansion, - momentum=BN_MOMENTUM) - self.relu = nn.ReLU(inplace=True) - self.downsample = downsample - self.stride = stride - - def forward(self, x): - residual = x - - out = self.conv1(x) - out = self.bn1(out) - out = self.relu(out) - - out = self.conv2(out) - out = self.bn2(out) - out = self.relu(out) - - out = self.conv3(out) - out = self.bn3(out) - - if self.downsample is not None: - residual = self.downsample(x) - - out += residual - out = self.relu(out) - - return out - - -class BottleneckCAFFE(nn.Module): - expansion = 4 - - def __init__(self, inplanes, planes, stride=1, downsample=None): - super(BottleneckCAFFE, self).__init__() - # add stride to conv1x1 - self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False) - self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) - self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, - padding=1, bias=False) - self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) - self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, - bias=False) - self.bn3 = nn.BatchNorm2d(planes * self.expansion, - momentum=BN_MOMENTUM) - self.relu = nn.ReLU(inplace=True) - self.downsample = downsample - self.stride = stride - - def forward(self, x): - residual = x - - out = self.conv1(x) - out = self.bn1(out) - out = self.relu(out) - - out = self.conv2(out) - out = self.bn2(out) - out = self.relu(out) - - out = self.conv3(out) - out = self.bn3(out) - - if self.downsample is not None: - residual = self.downsample(x) - - out += residual - out = self.relu(out) - - return out - - -class VDNModel(nn.Module): - - def __init__(self, block, layers, cfg, **kwargs): - self.inplanes = 64 - extra = cfg.MODEL.EXTRA - self.deconv_with_bias = extra.DECONV_WITH_BIAS - - super(VDNModel, self).__init__() - self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) - self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM) - self.relu = nn.ReLU(inplace=True) - self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) - self.layer1 = self._make_layer(block, 64, layers[0]) - self.layer2 = self._make_layer(block, 128, layers[1], stride=2) - self.layer3 = self._make_layer(block, 256, layers[2], stride=2) - self.layer4 = self._make_layer(block, 512, layers[3], stride=2) - - # used for deconv layers - self.deconv_layers = self._make_deconv_layer( - extra.NUM_DECONV_LAYERS, - extra.NUM_DECONV_FILTERS, - extra.NUM_DECONV_KERNELS, - ) - - self.final_layer_hm = nn.Conv2d( - in_channels=extra.NUM_DECONV_FILTERS[-1], # 256 - out_channels=cfg.MODEL.NUM_JOINTS, - kernel_size=extra.FINAL_CONV_KERNEL, # 1 - stride=1, - padding=1 if extra.FINAL_CONV_KERNEL == 3 else 0 - ) - - self.final_layer_v = nn.Conv2d( - in_channels=extra.NUM_DECONV_FILTERS[-1], - out_channels=2, - kernel_size=3, - stride=1, - padding=1 - ) - - def _make_layer(self, block, planes, blocks, stride=1): - downsample = None - if stride != 1 or self.inplanes != planes * block.expansion: - downsample = nn.Sequential( - nn.Conv2d(self.inplanes, planes * block.expansion, - kernel_size=1, stride=stride, bias=False), - nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM), - ) - - layers = [block(self.inplanes, planes, stride, downsample)] - self.inplanes = planes * block.expansion - for i in range(1, blocks): - layers.append(block(self.inplanes, planes)) - - return nn.Sequential(*layers) - - @staticmethod - def _get_deconv_cfg(deconv_kernel): - if deconv_kernel == 4: - padding = 1 - output_padding = 0 - elif deconv_kernel == 3: - padding = 1 - output_padding = 1 - elif deconv_kernel == 2: - padding = 0 - output_padding = 0 - else: - raise NotImplementedError(f'Deconv kernel size {deconv_kernel} is not supported') - - return deconv_kernel, padding, output_padding - - def _make_deconv_layer(self, num_layers, num_filters, num_kernels): - assert num_layers == len(num_filters), \ - 'ERROR: num_deconv_layers is different len(num_deconv_filters)' - assert num_layers == len(num_kernels), \ - 'ERROR: num_deconv_layers is different len(num_deconv_filters)' - - layers = [] - for i in range(num_layers): - kernel, padding, output_padding = self._get_deconv_cfg(num_kernels[i]) - - planes = num_filters[i] - layers.append( - nn.ConvTranspose2d( - in_channels=self.inplanes, - out_channels=planes, - kernel_size=kernel, - stride=2, - padding=padding, - output_padding=output_padding, - bias=self.deconv_with_bias)) - layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)) - layers.append(nn.ReLU(inplace=True)) - self.inplanes = planes - - return nn.Sequential(*layers) - - def forward(self, x): - # print(f'input size: {x.size()}') - x = self.conv1(x) - x = self.bn1(x) - x = self.relu(x) - # print(f'after conv1 size: {x.size()}') - x = self.maxpool(x) - # print(f'after maxpool size: {x.size()}') - - x = self.layer1(x) - x = self.layer2(x) - x = self.layer3(x) - x = self.layer4(x) - # print(f'after residuals size: {x.size()}') - - x = self.deconv_layers(x) - # print(f'after deconv size: {x.size()}') - - hm = self.final_layer_hm(x) - # print(f'hm size: {hm.size()}') - - vm = self.final_layer_v(x) - vm = vm.tanh() - # print(f'vm size: {vm.size()}') - - return hm, vm - - def init_weights(self, pretrained=''): - if os.path.isfile(pretrained): - logger.info('=> init deconv weights from normal distribution') - for name, m in self.deconv_layers.named_modules(): - if isinstance(m, nn.ConvTranspose2d): - logger.info('=> init {}.weight as normal(0, 0.001)'.format(name)) - logger.info('=> init {}.bias as 0'.format(name)) - nn.init.normal_(m.weight, std=0.001) - if self.deconv_with_bias: - nn.init.constant_(m.bias, 0) - elif isinstance(m, nn.BatchNorm2d): - logger.info('=> init {}.weight as 1'.format(name)) - logger.info('=> init {}.bias as 0'.format(name)) - nn.init.constant_(m.weight, 1) - nn.init.constant_(m.bias, 0) - logger.info('=> init final conv weights from normal distribution') - for m in self.final_layer_hm.modules(): - if isinstance(m, nn.Conv2d): - logger.info('=> init {}.weight as normal(0, 0.001)'.format(name)) - logger.info('=> init {}.bias as 0'.format(name)) - nn.init.normal_(m.weight, std=0.001) - nn.init.constant_(m.bias, 0) - - for m in self.final_layer_v.modules(): - if isinstance(m, nn.Conv2d): - logger.info('=> init {}.weight as normal(0, 0.001)'.format(name)) - logger.info('=> init {}.bias as 0'.format(name)) - nn.init.normal_(m.weight, std=0.001) - nn.init.constant_(m.bias, 0) - - logger.info('=> loading pretrained model {}'.format(pretrained)) - checkpoint = torch.load(pretrained) - if isinstance(checkpoint, OrderedDict): - state_dict = checkpoint - elif isinstance(checkpoint, dict) and 'state_dict' in checkpoint: - state_dict_old = checkpoint['state_dict'] - state_dict = OrderedDict() - # delete 'module.' because it is saved from DataParallel module - for key in state_dict_old.keys(): - if key.startswith('module.'): - # state_dict[key[7:]] = state_dict[key] - # state_dict.pop(key) - state_dict[key[7:]] = state_dict_old[key] - else: - state_dict[key] = state_dict_old[key] - else: - raise RuntimeError( - 'No state_dict found in checkpoint file {}'.format(pretrained)) - self.load_state_dict(state_dict, strict=False) - else: - raise ValueError('pretrained model does not exist') - - -resnet_spec = {18: (BasicBlock, [2, 2, 2, 2]), - 34: (BasicBlock, [3, 4, 6, 3]), - 50: (Bottleneck, [3, 4, 6, 3]), - 101: (Bottleneck, [3, 4, 23, 3]), - 152: (Bottleneck, [3, 8, 36, 3])} - - -def get_vdn_resnet(cfg, is_train, **kwargs): - num_layers = cfg.MODEL.EXTRA.NUM_LAYERS # default 50 - style = cfg.MODEL.STYLE - - block_class, layers = resnet_spec[num_layers] - - if style == 'caffe': - block_class = BottleneckCAFFE - - model = VDNModel(block_class, layers, cfg, **kwargs) - - if is_train and cfg.MODEL.INIT_WEIGHTS: - model.init_weights(cfg.MODEL.PRETRAINED) - - return model diff --git a/modules/vdn.py b/modules/vdn.py index bbdfc31..1ded4b8 100755 --- a/modules/vdn.py +++ b/modules/vdn.py @@ -33,7 +33,7 @@ from libs.utils.transforms import get_affine_transform import libs.dataset as lib_dataset -import libs.models.vdn_model as vdn_model +import libs.models.vdn_res2net as vdn_model import utils.vis.util as vis_util from PIL import ImageDraw @@ -51,7 +51,7 @@ class VectorDetectionNetwork: """ """ - def __init__(self, train=False, backbone='resnet50'): + def __init__(self, train=False, backbone='res2net50'): if train: vdn_config = os.path.join(root_dir, f"cfgs/{backbone}/train.yaml") else: @@ -64,11 +64,11 @@ def __init__(self, train=False, backbone='resnet50'): torch.backends.cudnn.enabled = lib_config.config.CUDNN.ENABLED if not train: - model = vdn_model.get_vdn_resnet(lib_config.config, is_train=False) + model = vdn_model.get_vdn_res2net(lib_config.config, is_train=False) model_path = lib_config.config.MODEL.PRETRAINED model.load_state_dict({k.replace('module.', ''): v for k, v in torch.load(model_path).items()}) else: - model = vdn_model.get_vdn_resnet(lib_config.config, is_train=True) + model = vdn_model.get_vdn_res2net(lib_config.config, is_train=True) self.gpus = [int(i) for i in lib_config.config.GPUS.split(',')] self.model = torch.nn.DataParallel(model, device_ids=self.gpus).cuda() @@ -96,8 +96,6 @@ def train(self): } # define loss function (criterion) and optimizer - # crit_heatmap = lib_loss.JointsMSELoss(use_target_weight=cfgs.LOSS.USE_TARGET_WEIGHT).cuda() - # crit_vector = lib_loss.OrientsMSELoss().cuda() crit_heatmap = lib_loss.MSELoss().cuda() crit_vector = lib_loss.MSELoss().cuda() diff --git a/train.py b/train.py index c99df07..4355300 100755 --- a/train.py +++ b/train.py @@ -1,21 +1,18 @@ -#!/usr/bin/env python -# coding=utf-8 - - -import unittest - -import modules.vdn as vdn - - -class Test(unittest.TestCase): - - def test_train(self): - """Available backbones: resnet34 (default), resnet18, resnet50. - - """ - vdn_instance = vdn.VectorDetectionNetwork(train=True, backbone='resnet34') - vdn_instance.train() - - -if __name__ == '__main__': - unittest.main() +#!/usr/bin/env python +# coding=utf-8 + + +import unittest + +import modules.vdn as vdn + + +class Test(unittest.TestCase): + + def test_train(self): + vdn_instance = vdn.VectorDetectionNetwork(train=True, backbone='res2net50') + vdn_instance.train() + + +if __name__ == '__main__': + unittest.main()