-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathmodel.py
More file actions
89 lines (66 loc) · 3.57 KB
/
model.py
File metadata and controls
89 lines (66 loc) · 3.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import torch
import common.torch.utils.model_util as model_util
"""
This is the implementation of the ZFNet Architecture using PyTorch Library
There are few differences between the Actual Paper and this implementation.
1. Use of Batch Normalization after the activation layer instead of Local Response Normalization.
ZFNet paper does not use Batch Normalization as it wasn't published at that time. Study indicates
Batch Normalization is more robust than Local Response Normalization.
2. Use Max Pooling instead of Average Pooling.
3. Use more Dropout layers ( after MaxPool layers ) to reduce over-fitting.
4. Use Xavier Normal initialization instead of initializing just from a normal distribution.
"""
class ZFNetModel(torch.nn.Module):
def __init__(self, num_classes=256):
super(ZFNetModel, self).__init__()
self.model = torch.nn.Sequential(
# Define the Input/Output Channel Size, Kernel Size and Stride
# Changed filter size from 11 -> 7, stride from 4 -> 2
torch.nn.Conv2d(in_channels=3, out_channels=96, kernel_size=7, stride=2),
# inplace=True means that it will modify the input directly,
# without allocating any additional output. It can sometimes
# slightly decrease the memory usage, but may not always be a valid operation.
torch.nn.ReLU(inplace=True),
# num_features is C from an expected input of size (N, C, H, W)
torch.nn.BatchNorm2d(num_features=96),
torch.nn.MaxPool2d(kernel_size=3, stride=2),
# Additional Dropout Layer
torch.nn.Dropout(p=0.25),
# Changed the stride from 1 -> 2 and padding from 2 -> 1
torch.nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, stride=2, padding=1),
torch.nn.ReLU(inplace=True),
torch.nn.BatchNorm2d(num_features=256),
torch.nn.MaxPool2d(kernel_size=3, stride=2),
# Additional Dropout Layer
torch.nn.Dropout(p=0.25),
torch.nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1),
torch.nn.ReLU(inplace=True),
torch.nn.BatchNorm2d(num_features=512),
torch.nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=3, stride=1, padding=1),
torch.nn.ReLU(inplace=True),
torch.nn.BatchNorm2d(num_features=1024),
torch.nn.Conv2d(in_channels=1024, out_channels=512, kernel_size=3, stride=1, padding=1),
torch.nn.ReLU(inplace=True),
torch.nn.BatchNorm2d(num_features=512),
torch.nn.MaxPool2d(kernel_size=3, stride=2),
# Additional Dropout Layer
torch.nn.Dropout(p=0.25),
# This is to flatten the input from (N, C, H, W) -> (N, L)
model_util.Flatten(),
torch.nn.Linear(6 * 6 * 512, 4096),
torch.nn.ReLU(inplace=True),
torch.nn.BatchNorm1d(num_features=4096),
torch.nn.Dropout(p=0.5),
torch.nn.Linear(4096, 4096),
torch.nn.ReLU(inplace=True),
torch.nn.BatchNorm1d(num_features=4096),
torch.nn.Linear(4096, num_classes),
# dim - A dimension along which LogSoftmax will be computed.
# Since our inout is (N, L), we need to pass 1
torch.nn.LogSoftmax(dim=1)
)
self.model.apply(model_util.weights_init_xavier_normal)
# @torch.cuda.amp.autocast()
def forward(self, x):
x = self.model(x)
return x