opencv-130-DNN 支持YOLO对象检测网络运行

知识点

OpenCV DNN模块支持YOLO对象检测网络,最新的OpenCV4.0支持YOLOv3版本的对象检测网络,YOLOv3版本同时还发布了移动端支持的网络模型YOLOv3-tiny版本,速度可以在CPU端实时运行的对象检测网络,OpenCV中通过对DarkNet框架集成支持实现YOLO网络加载与检测。因为YOLOv3对象检测网络是多个层的合并输出,所以在OpenCV中调用时候必须显示声明那些是输出层,这个对于对象检测网络,OpenCV提供了一个API来获取所有的输出层名称,该API为:

1
2
# 该函数返回所有非连接的输出层。
std::vector<String> cv::dnn::Net::getUnconnectedOutLayersNames()const

调用时候,必须显式通过输入参数完成推断,相关API如下:

1
2
3
4
5
6
void cv::dnn::Net::forward(
OutputArrayOfArrays outputBlobs,
const std::vector< String > & outBlobNames
)
outputBlobs是调用之后的输出
outBlobNames是所有输出层的名称

跟SSD/Faster-RCNN出来的结构不一样,YOLO的输出前四个为: [center_x, center_y, width, height]
后面的是所有类别的得分,这个时候只要根据score大小就可以得到score最大的对应对象类别,解析检测结果。相关模型下载到YOLO作者的官方网站:
YOLO: Real-Time Object Detection

代码(c++,python)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>

#include <fstream>
#include <iostream>
#include <algorithm>
#include <cstdlib>
using namespace std;
using namespace cv;
using namespace cv::dnn;
String yolo_cfg = "D:/projects/pose_body/hand/yolov3.cfg";
String yolo_model = "D:/projects/pose_body/hand/yolov3.weights";
int main(int argc, char** argv)
{
Net net = readNetFromDarknet(yolo_cfg, yolo_model);
net.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE);
net.setPreferableTarget(DNN_TARGET_CPU);
std::vector<String> outNames = net.getUnconnectedOutLayersNames();
for (int i = 0; i < outNames.size(); i++) {
printf("output layer name : %s\n", outNames[i].c_str());
}

// 加载COCO数据集标签
vector<string> classNamesVec;
ifstream classNamesFile("D:/projects/opencv_tutorial/data/models/object_detection_classes_yolov3.txt");
if (classNamesFile.is_open())
{
string className = "";
while (std::getline(classNamesFile, className))
classNamesVec.push_back(className);
}

// 加载图像
Mat frame = imread("D:/images/pedestrian.png");
Mat inputBlob = blobFromImage(frame, 1 / 255.F, Size(416, 416), Scalar(), true, false);
net.setInput(inputBlob);

// 检测
std::vector<Mat> outs;
net.forward(outs, outNames);
vector<double> layersTimings;
double freq = getTickFrequency() / 1000;
double time = net.getPerfProfile(layersTimings) / freq;
ostringstream ss;
ss << "detection time: " << time << " ms";
putText(frame, ss.str(), Point(20, 20), 0, 0.5, Scalar(0, 0, 255));
vector<Rect> boxes;
vector<int> classIds;
vector<float> confidences;
for (size_t i = 0; i<outs.size(); ++i)
{
// Network produces output blob with a shape NxC where N is a number of
// detected objects and C is a number of classes + 4 where the first 4
// numbers are [center_x, center_y, width, height]
float* data = (float*)outs[i].data;
for (int j = 0; j < outs[i].rows; ++j, data += outs[i].cols)
{
Mat scores = outs[i].row(j).colRange(5, outs[i].cols);
Point classIdPoint;
double confidence;
minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
if (confidence > 0.5)
{
int centerX = (int)(data[0] * frame.cols);
int centerY = (int)(data[1] * frame.rows);
int width = (int)(data[2] * frame.cols);
int height = (int)(data[3] * frame.rows);
int left = centerX - width / 2;
int top = centerY - height / 2;

classIds.push_back(classIdPoint.x);
confidences.push_back((float)confidence);
boxes.push_back(Rect(left, top, width, height));
}
}
}

// 非最大抑制操作
vector<int> indices;
NMSBoxes(boxes, confidences, 0.5, 0.2, indices);
for (size_t i = 0; i < indices.size(); ++i)
{
int idx = indices[i];
Rect box = boxes[idx];
String className = classNamesVec[classIds[idx]];
putText(frame, className.c_str(), box.tl(), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(255, 0, 0), 2, 8);
rectangle(frame, box, Scalar(0, 0, 255), 2, 8, 0);
}

imshow("YOLOv3-Detections", frame);
waitKey(0);
return;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
"""
DNN 支持YOLO对象检测网络运行
"""

import cv2 as cv
import numpy as np

model_bin = "yolov3.weights"
config_text = "yolov3.cfg"

# Load names of classes
classes = None
with open("object_detection_classes_yolov3.txt", 'rt') as f:
classes = f.read().rstrip('\n').split('\n')

# load Darknet model
net = cv.dnn.readNetFromDarknet(config_text, model_bin)
image = cv.imread("images/dog_person_horse.jpg")
h = image.shape[0]
w = image.shape[1]

# 预测
blobImage = cv.dnn.blobFromImage(image, 1.0/255.0, (416, 416), None, True, False)
outNames = net.getUnconnectedOutLayersNames()
net.setInput(blobImage)
outs = net.forward(outNames)

# Put efficiency information.
t, _ = net.getPerfProfile()
label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency())
cv.putText(image, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0))

# 绘制检测矩形
classIds = []
confidences = []
boxes = []
for out in outs:
for detection in out:
scores = detection[5:]
classId = np.argmax(scores)
confidence = scores[classId]
# numbers are [center_x, center_y, width, height]
if confidence > 0.5:
center_x = int(detection[0] * w)
center_y = int(detection[1] * h)
width = int(detection[2] * w)
height = int(detection[3] * h)
left = int(center_x - width / 2)
top = int(center_y - height / 2)
classIds.append(classId)
confidences.append(float(confidence))
boxes.append([left, top, width, height])

indices = cv.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
for i in indices:
i = i[0]
box = boxes[i]
left = box[0]
top = box[1]
width = box[2]
height = box[3]
cv.rectangle(image, (left, top), (left+width, top+height), (0, 0, 255), 2, 8, 0)
cv.putText(image, classes[classIds[i]], (left, top), cv.FONT_HERSHEY_SIMPLEX, 1.0, (255, 255, 0), 2)

cv.namedWindow("YOLOv3-Detection-Demo", cv.WINDOW_NORMAL)
cv.imshow('YOLOv3-Detection-Demo', image)

cv.waitKey(0)
cv.destroyAllWindows()

结果

代码地址

github