opencv-131-DNN 支持YOLOv3-tiny版本实时对象检测

知识点

YOLOv3的模型在CPU上无法做到实时运行,而YOLO作者提供了个YOLOv3版本的精简版对象检测模型,大小只有30MB左右,但是模型可以在CPU上做到实时运行,这个模型就是YOLOv3-tiny模型,其下载地址如下:
YOLO: Real-Time Object Detection
相比YOLOv3,YOLOv3-tiny只有两个输出层,而且权重参数层与参数文件大小都大大的下降,可以在嵌入式设备与前端实时运行。

代码(c++,python)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>

#include <fstream>
#include <iostream>
#include <algorithm>
#include <cstdlib>
using namespace std;
using namespace cv;
using namespace cv::dnn;
void image_detection();
String yolo_tiny_model = "D:/projects/opencv_tutorial/data/models/yolov3-tiny-coco/yolov3-tiny.weights";
String yolo_tiny_cfg = "D:/projects/opencv_tutorial/data/models/yolov3-tiny-coco/yolov3-tiny.cfg";
int main(int argc, char** argv)
{
image_detection();
}

void image_detection() {
Net net = readNetFromDarknet(yolo_tiny_cfg, yolo_tiny_model);
net.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE);
net.setPreferableTarget(DNN_TARGET_CPU);
std::vector<String> outNames = net.getUnconnectedOutLayersNames();
for (int i = 0; i < outNames.size(); i++) {
printf("output layer name : %s\n", outNames[i].c_str());
}

vector<string> classNamesVec;
ifstream classNamesFile("D:/projects/opencv_tutorial/data/models/object_detection_classes_yolov3.txt");
if (classNamesFile.is_open())
{
string className = "";
while (std::getline(classNamesFile, className))
classNamesVec.push_back(className);
}

// ����ͼ��
Mat frame = imread("D:/images/pedestrian.png");
Mat inputBlob = blobFromImage(frame, 1 / 255.F, Size(416, 416), Scalar(), true, false);
net.setInput(inputBlob);

// ���
std::vector<Mat> outs;
net.forward(outs, outNames);
vector<double> layersTimings;
double freq = getTickFrequency() / 1000;
double time = net.getPerfProfile(layersTimings) / freq;
ostringstream ss;
ss << "detection time: " << time << " ms";
putText(frame, ss.str(), Point(20, 20), 0, 0.5, Scalar(0, 0, 255));
vector<Rect> boxes;
vector<int> classIds;
vector<float> confidences;
for (size_t i = 0; i<outs.size(); ++i)
{
float* data = (float*)outs[i].data;
for (int j = 0; j < outs[i].rows; ++j, data += outs[i].cols)
{
Mat scores = outs[i].row(j).colRange(5, outs[i].cols);
Point classIdPoint;
double confidence;
minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
if (confidence > 0.5)
{
int centerX = (int)(data[0] * frame.cols);
int centerY = (int)(data[1] * frame.rows);
int width = (int)(data[2] * frame.cols);
int height = (int)(data[3] * frame.rows);
int left = centerX - width / 2;
int top = centerY - height / 2;

classIds.push_back(classIdPoint.x);
confidences.push_back((float)confidence);
boxes.push_back(Rect(left, top, width, height));
}
}
}

vector<int> indices;
NMSBoxes(boxes, confidences, 0.5, 0.2, indices);
for (size_t i = 0; i < indices.size(); ++i)
{
int idx = indices[i];
Rect box = boxes[idx];
String className = classNamesVec[classIds[idx]];
putText(frame, className.c_str(), box.tl(), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(255, 0, 0), 2, 8);
rectangle(frame, box, Scalar(0, 0, 255), 2, 8, 0);
}

imshow("YOLOv3-Detections", frame);
waitKey(0);
return;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
"""
DNN 支持YOLOv3-tiny版本实时对象检测
"""

import cv2 as cv
import numpy as np

yolo_tiny_model = "yolov3-tiny.weights"
yolo_tiny_cfg = "yolov3-tiny.cfg"

# Load names of classes
classes = None
with open("object_detection_classes_yolov3.txt", 'rt') as f:
classes = f.read().rstrip('\n').split('\n')

# load Darknetmodel
net = cv.dnn.readNetFromDarknet(yolo_tiny_cfg, yolo_tiny_model)
# set back-end
# net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
# net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)

cap = cv.VideoCapture(0)
height = cap.get(cv.CAP_PROP_FRAME_HEIGHT)
width = cap.get(cv.CAP_PROP_FRAME_WIDTH)

while True:
ret, image = cap.read()
if ret is False:
break
image = cv.flip(image, 1)
h, w = image.shape[:2]

# 基于多个Region层输出getUnconnectedOutLayersNames
blobImage = cv.dnn.blobFromImage(image, 1.0/255.0, (416, 416), None, True, False)
outNames = net.getUnconnectedOutLayersNames()
net.setInput(blobImage)
outs = net.forward(outNames)

# Put efficiency information.
t, _ = net.getPerfProfile()
fps = 1000 / (t * 1000.0 / cv.getTickFrequency())
label = 'FPS: %.2f' % fps
cv.putText(image, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0))

# 绘制检测矩形
classIds = []
confidences = []
boxes = []
for out in outs:
for detection in out:
scores = detection[5:]
classId = np.argmax(scores)
confidence = scores[classId]
# numbers are [center_x, center_y, width, height]
if confidence > 0.5:
center_x = int(detection[0] * w)
center_y = int(detection[1] * h)
width = int(detection[2] * w)
height = int(detection[3] * h)
left = int(center_x - width / 2)
top = int(center_y - height / 2)
classIds.append(classId)
confidences.append(float(confidence))
boxes.append([left, top, width, height])

# 使用非最大抑制
indices = cv.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
for i in indices:
i = i[0]
box = boxes[i]
left = box[0]
top = box[1]
width = box[2]
height = box[3]
cv.rectangle(image, (left, top), (left+width, top+height), (0, 0, 255), 2, 8, 0)
cv.putText(image, classes[classIds[i]], (left, top),
cv.FONT_HERSHEY_SIMPLEX, 1.0, (255, 255, 0), 2)
c = cv.waitKey(1)
if c == 27:
break

cv.imshow('YOLOv3-tiny-Detection-Demo', image)
cv.waitKey(0)
cv.destroyAllWindows()

结果

代码地址

github