opencv-125-DNN 基于SSD实现实时视频检测

知识点

SSD的mobilenet版本不仅可以检测图像,还可以检测视频,达到稳定实时的效果。

代码(c++,python)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
#include <iostream>

using namespace cv;
using namespace cv::dnn;
using namespace std;

const size_t width = 300;
const size_t height = 300;
String labelFile = "D:/projects/opencv_tutorial/data/models/ssd/labelmap_det.txt";
String modelFile = "D:/projects/opencv_tutorial/data/models/ssd/MobileNetSSD_deploy.caffemodel";
String model_text_file = "D:/projects/opencv_tutorial/data/models/ssd/MobileNetSSD_deploy.prototxt";

String objNames[] = { "background",
"aeroplane", "bicycle", "bird", "boat",
"bottle", "bus", "car", "cat", "chair",
"cow", "diningtable", "dog", "horse",
"motorbike", "person", "pottedplant",
"sheep", "sofa", "train", "tvmonitor" };

int main(int argc, char** argv) {
// load model
Net net = readNetFromCaffe(model_text_file, modelFile);
net.setPreferableBackend(DNN_BACKEND_OPENCV);
net.setPreferableTarget(DNN_TARGET_CPU);

VideoCapture cap = VideoCapture(0);
Mat frame;
while (true) {
bool ret = cap.read(frame);
if (!ret) break;
Mat blobImage = blobFromImage(frame, 0.007843,
Size(300, 300),
Scalar(127.5, 127.5, 127.5), true, false);
printf("blobImage width : %d, height: %d\n", blobImage.size[2], blobImage.size[3]);

net.setInput(blobImage, "data");
Mat detection = net.forward("detection_out");
vector<double> layersTimings;
double freq = getTickFrequency() / 1000;
double time = net.getPerfProfile(layersTimings) / freq;
printf("execute time : %.2f ms\n", time);


Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());
float confidence_threshold = 0.5;
for (int i = 0; i < detectionMat.rows; i++) {
float confidence = detectionMat.at<float>(i, 2);
if (confidence > confidence_threshold) {
size_t objIndex = (size_t)(detectionMat.at<float>(i, 1));
float tl_x = detectionMat.at<float>(i, 3) * frame.cols;
float tl_y = detectionMat.at<float>(i, 4) * frame.rows;
float br_x = detectionMat.at<float>(i, 5) * frame.cols;
float br_y = detectionMat.at<float>(i, 6) * frame.rows;

Rect object_box((int)tl_x, (int)tl_y, (int)(br_x - tl_x), (int)(br_y - tl_y));
rectangle(frame, object_box, Scalar(0, 0, 255), 2, 8, 0);
putText(frame, format(" confidence %.2f, %s", confidence, objNames[objIndex].c_str()), Point(tl_x - 10, tl_y - 5), FONT_HERSHEY_SIMPLEX, 0.7, Scalar(255, 0, 0), 2, 8);
}
}
imshow("ssd-video-demo", frame);
char c = waitKey(10);
if (c == 27) {
break;
}
}

waitKey(0);
return 0;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
"""
DNN 基于SSD实现实时视频检测
"""

import cv2 as cv

model_bin = "MobileNetSSD_deploy.caffemodel"
config_text = "MobileNetSSD_deploy.prototxt"
objName = ["background",
"aeroplane", "bicycle", "bird", "boat",
"bottle", "bus", "car", "cat", "chair",
"cow", "diningtable", "dog", "horse",
"motorbike", "person", "pottedplant",
"sheep", "sofa", "train", "tvmonitor"]

# load caffe model
net = cv.dnn.readNetFromCaffe(config_text, model_bin)

# # 获得所有层名称与索引
# layerNames = net.getLayerNames()
# lastLayerId = net.getLayerId(layerNames[-1])
# lastLayer = net.getLayer(lastLayerId)
# print(lastLayer.type)

# 检测
cap = cv.VideoCapture(0)
h, w = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT)), int(cap.get(cv.CAP_PROP_FRAME_WIDTH))
while True:
ret, frame = cap.read()
if ret is False:
break
# h, w = frame.shape[:2]
blobImage = cv.dnn.blobFromImage(frame, 0.007843, (300, 300), (127.5, 127.5, 127.5), True, False)
net.setInput(blobImage)
cvOut = net.forward()
for detection in cvOut[0, 0, :, :]:
score = float(detection[2])
objIndex = int(detection[1])
if score > 0.5:
left = detection[3] * w
top = detection[4] * h
right = detection[5] * w
bottom = detection[6] * h

# 绘制
cv.rectangle(frame, (int(left), int(top)), (int(right), int(bottom)), (255, 0, 0), thickness=2)
cv.putText(frame, "score:%.2f, %s" % (score, objName[objIndex]),
(int(left) - 10, int(top) - 5), cv.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2, 8);
cv.imshow('video-ssd-demo', frame)
c = cv.waitKey(10)
if c == 27:
break

cv.waitKey(0)
cv.destroyAllWindows()

结果

代码地址

github