FPN 相关资料总结

FPN 结构

更详细的结构图:

代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import torch
import torch.nn as nn
import torch.nn.functional as F

class Bottleneck(nn.Module):
expansion = 4

def __init__(self, in_planes, planes, stride=1):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(self.expansion*planes)

self.shortcut = nn.Sequential()
if stride != 1 or in_planes != self.expansion*planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(self.expansion*planes)
)

def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = F.relu(self.bn2(self.conv2(out)))
out = self.bn3(self.conv3(out))
out += self.shortcut(x)
out = F.relu(out)
return out


class FPN(nn.Module):
def __init__(self, block, num_blocks):
super(FPN, self).__init__()
self.in_planes = 64

self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
self.bn1 = nn.BatchNorm2d(64)

# Bottom-up layers
self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)

# Lateral layers
self.latlayer5 = nn.Conv2d(2048, 256, kernel_size=1, stride=1, padding=0)
self.latlayer4 = nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=0)
self.latlayer3 = nn.Conv2d( 512, 256, kernel_size=1, stride=1, padding=0)
self.latlayer2 = nn.Conv2d( 256, 256, kernel_size=1, stride=1, padding=0)

# Smooth layers
self.smooth2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
self.smooth3 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
self.smooth4 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
self.smooth5 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)

# p6
self.conv_p6 = nn.Conv2d(256, 256, kernel_size=3, stride=2, padding=1, bias=False)

def _make_layer(self, block, planes, num_blocks, stride):
strides = [stride] + [1]*(num_blocks-1)
layers = []
for stride in strides:
layers.append(block(self.in_planes, planes, stride))
self.in_planes = planes * block.expansion
return nn.Sequential(*layers)

def _upsample_add(self, x, y):
'''Upsample and add two feature maps.
Args:
x: (Variable) top feature map to be upsampled.
y: (Variable) lateral feature map.
Returns:
(Variable) added feature map.
Note in PyTorch, when input size is odd, the upsampled feature map
with `F.upsample(..., scale_factor=2, mode='nearest')`
maybe not equal to the lateral feature map size.
e.g.
original input size: [N,_,15,15] ->
conv2d feature map size: [N,_,8,8] ->
upsampled feature map size: [N,_,16,16]
So we choose bilinear upsample which supports arbitrary output sizes.
'''
_,_,H,W = y.size()
return F.upsample(x, size=(H,W), mode='bilinear') + y

def forward(self, x):
# Bottom-up
c1 = F.relu(self.bn1(self.conv1(x)))
c1 = F.max_pool2d(c1, kernel_size=3, stride=2, padding=1)
c2 = self.layer1(c1)
c3 = self.layer2(c2)
c4 = self.layer3(c3)
c5 = self.layer4(c4)

# Top-down
m5 = self.latlayer5(c5)
m4 = self._upsample_add(m5, self.latlayer4(c4))
m3 = self._upsample_add(m4, self.latlayer3(c3))
m2 = self._upsample_add(m3, self.latlayer2(c2))

# Smooth
p5 = self.smooth5(m5)
p4 = self.smooth4(m4)
p3 = self.smooth3(m3)
p2 = self.smooth2(m2)

# p6 is obtained by p5 down-sampling
p6 = self.conv_p6(p5)

return p2, p3, p4, p5, p6


def FPN101():
# return FPN(Bottleneck, [2,4,23,3])
return FPN(Bottleneck, [2,2,2,2])


def test():
net = FPN101()
fms = net(torch.randn(1,3,600,900))
for fm in fms:
print(fm.size())

test()
1
2
3
4
5
6
# output
p2 --> torch.Size([1, 256, 150, 225])
p3 --> torch.Size([1, 256, 75, 113])
p4 --> torch.Size([1, 256, 38, 57])
p5 --> torch.Size([1, 256, 19, 29])
p6 --> torch.Size([1, 256, 10, 15])

FPN with RPN

结构:

FPN with Fast R-CNN

结构:

参考链接:

  1. Understanding Feature Pyramid Networks for object detection (FPN)
  2. 【目标检测】FPN(Feature Pyramid Network)
  3. pytorch-fpn