opencv-134-DNN ENet实现图像分割

知识点

OpenCV DNN支持ENet网络模型的图像分割,这里采用的预先训练的ENet网络模型下载地址如下:
GitHub - e-lab/ENet-training
该模型是torch模型,加载的API为:

1
2
3
4
5
6
Net cv::dnn::readNetFromTorch(
const String & model,
bool isBinary = true
)
model参数表示二进制的模型权重文件
isBinary 默认为true

代码(c++,python)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#include <fstream>
#include <sstream>

#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>

using namespace cv;
using namespace dnn;

std::vector<std::string> classes;
std::vector<Vec3b> colors;

void showLegend();
void colorizeSegmentation(const Mat &score, Mat &segm);
String enet_model = "D:/projects/models/enet/model-best.net";
int main(int argc, char** argv)
{
Mat frame = imread("D:/projects/models/enet/test.png");
Net net = readNetFromTorch(enet_model);
net.setPreferableBackend(DNN_BACKEND_OPENCV);
net.setPreferableTarget(DNN_TARGET_CPU);

// Create a window
static const std::string kWinName = "ENet-Demo";
namedWindow(kWinName, WINDOW_AUTOSIZE);
imshow("input", frame);

// Process frames.
Mat blob = blobFromImage(frame, 0.00392, Size(1024, 512), Scalar(0, 0, 0), true, false);
net.setInput(blob);
Mat score = net.forward();

Mat segm;
colorizeSegmentation(score, segm);

resize(segm, segm, frame.size(), 0, 0, INTER_NEAREST);
addWeighted(frame, 0.1, segm, 0.9, 0.0, frame);

// Put efficiency information.
std::vector<double> layersTimes;
double freq = getTickFrequency() / 1000;
double t = net.getPerfProfile(layersTimes) / freq;
std::string label = format("Inference time: %.2f ms", t);
putText(frame, label, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));

imshow("ENet-Demo", frame);
if (!classes.empty())
showLegend();
waitKey(0);
return 0;
}

void colorizeSegmentation(const Mat &score, Mat &segm)
{
const int rows = score.size[2];
const int cols = score.size[3];
const int chns = score.size[1];

if (colors.empty())
{
// Generate colors.
colors.push_back(Vec3b());
for (int i = 1; i < chns; ++i)
{
Vec3b color;
for (int j = 0; j < 3; ++j)
color[j] = (colors[i - 1][j] + rand() % 256) / 2;
colors.push_back(color);
}
}
else if (chns != (int)colors.size())
{
CV_Error(Error::StsError, format("Number of output classes does not match "
"number of colors (%d != %zu)", chns, colors.size()));
}

Mat maxCl = Mat::zeros(rows, cols, CV_8UC1);
Mat maxVal(rows, cols, CV_32FC1, score.data);
for (int ch = 1; ch < chns; ch++)
{
for (int row = 0; row < rows; row++)
{
const float *ptrScore = score.ptr<float>(0, ch, row);
uint8_t *ptrMaxCl = maxCl.ptr<uint8_t>(row);
float *ptrMaxVal = maxVal.ptr<float>(row);
for (int col = 0; col < cols; col++)
{
if (ptrScore[col] > ptrMaxVal[col])
{
ptrMaxVal[col] = ptrScore[col];
ptrMaxCl[col] = (uchar)ch;
}
}
}
}

segm.create(rows, cols, CV_8UC3);
for (int row = 0; row < rows; row++)
{
const uchar *ptrMaxCl = maxCl.ptr<uchar>(row);
Vec3b *ptrSegm = segm.ptr<Vec3b>(row);
for (int col = 0; col < cols; col++)
{
ptrSegm[col] = colors[ptrMaxCl[col]];
}
}
}

void showLegend()
{
static const int kBlockHeight = 30;
static Mat legend;
if (legend.empty())
{
const int numClasses = (int)classes.size();
if ((int)colors.size() != numClasses)
{
CV_Error(Error::StsError, format("Number of output classes does not match "
"number of labels (%zu != %zu)", colors.size(), classes.size()));
}
legend.create(kBlockHeight * numClasses, 200, CV_8UC3);
for (int i = 0; i < numClasses; i++)
{
Mat block = legend.rowRange(i * kBlockHeight, (i + 1) * kBlockHeight);
block.setTo(colors[i]);
putText(block, classes[i], Point(0, kBlockHeight / 2), FONT_HERSHEY_SIMPLEX, 0.5, Vec3b(255, 255, 255));
}
namedWindow("Legend", WINDOW_NORMAL);
imshow("Legend", legend);
}
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
"""
DNN ENet实现图像分割
"""

import cv2 as cv
import numpy as np

# load CNN model
bin_model = "model-best.net"
net = cv.dnn.readNetFromTorch(bin_model)

# read input data
frame = cv.imread("images/cityscapes_test.jpg")
blob = cv.dnn.blobFromImage(frame, 0.00392, (1024, 512), (0, 0, 0), True, False);
cv.namedWindow("input", cv.WINDOW_NORMAL)
cv.imshow("input", frame)

# Run a model
net.setInput(blob)
score = net.forward()
print(score.shape)

# Put efficiency information.
t, _ = net.getPerfProfile()
label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency())

# generate color table
color_lut = []
n, con, h, w = score.shape
for i in range(con):
b = np.random.randint(0, 256)
g = np.random.randint(0, 256)
r = np.random.randint(0, 256)
color_lut.append((b, g, r))

# find max score for 20 channels on pixel-wise
maxCl = np.zeros((h, w), dtype=np.int32)
maxVal = np.zeros((h, w), dtype=np.float32)
for i in range(con):
for row in range(h):
for col in range(w):
t = maxVal[row, col]
s = score[0, i, row, col]
if s > t:
maxVal[row, col] = s
maxCl[row, col] = i

# colorful the segmentation image
segm = np.zeros((h, w, 3), dtype=np.uint8)
for row in range(h):
for col in range(w):
index = maxCl[row, col]
segm[row, col] = color_lut[index]

h, w = frame.shape[:2]
segm = cv.resize(segm, (w, h), None, 0, 0, cv.INTER_NEAREST)
print(segm.shape, frame.shape)
cv.namedWindow("result", cv.WINDOW_NORMAL)
cv.imshow("result", segm)

frame_segm = cv.addWeighted(frame, 0.2, segm, 0.8, 0.0)
cv.putText(frame_segm, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0))
cv.namedWindow("input_result", cv.WINDOW_NORMAL)
cv.imshow("input_result", frame_segm)

cv.waitKey(0)
cv.destroyAllWindows()

结果

代码地址

github