opencv-127-DNN 基于残差网络的视频人脸检测

知识点

OpenCV在DNN模块中提供了基于残差SSD网络训练的人脸检测模型，还支持单精度的fp16的检测准确度更好的Caffe模型加载与使用，这里实现了一个基于Caffe Model的视频实时人脸监测模型，基于Python与C++代码CPU运行，帧率均可以到达15以上。非常好用。

代码（c++,python）

#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
#include <iostream>

using namespace cv;
using namespace cv::dnn;
using namespace std;

const size_t width = 300;
const size_t height = 300;
String model_bin = "D:/projects/opencv_tutorial/data/models/face_detector/res10_300x300_ssd_iter_140000_fp16.caffemodel";
String config_text = "D:/projects/opencv_tutorial/data/models/face_detector/deploy.prototxt";

int main(int argc, char** argv) {

	VideoCapture capture = VideoCapture(0);
	namedWindow("ssd-face-video", WINDOW_AUTOSIZE);

	Net net = readNetFromCaffe(config_text,  model_bin);
	net.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE);
	net.setPreferableTarget(DNN_TARGET_CPU);
	Mat frame;
	while (true) {
		capture.read(frame);
		Mat blobImage = blobFromImage(frame, 1.0,
			Size(300, 300),
			Scalar(104.0, 177.0, 123.0), false, false);

		net.setInput(blobImage, "data");
		Mat detection = net.forward("detection_out");
		vector<double> layersTimings;
		double freq = getTickFrequency() / 1000;
		double time = net.getPerfProfile(layersTimings) / freq;
		printf("execute time : %.2f ms\n", time);

		Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());
		float confidence_threshold = 0.5;
		for (int i = 0; i < detectionMat.rows; i++) {
			float confidence = detectionMat.at<float>(i, 2);
			if (confidence > confidence_threshold) {
				size_t objIndex = (size_t)(detectionMat.at<float>(i, 1));
				float tl_x = detectionMat.at<float>(i, 3) * frame.cols;
				float tl_y = detectionMat.at<float>(i, 4) * frame.rows;
				float br_x = detectionMat.at<float>(i, 5) * frame.cols;
				float br_y = detectionMat.at<float>(i, 6) * frame.rows;

				Rect object_box((int)tl_x, (int)tl_y, (int)(br_x - tl_x), (int)(br_y - tl_y));
				rectangle(frame, object_box, Scalar(0, 0, 255), 2, 8, 0);
				putText(frame, format(" confidence %.2f", confidence),
					Point(tl_x - 10, tl_y - 5), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(255, 0, 0), 1, 8);
			}
		}
		char c = waitKey(5);
		if (c == 27) {
			break;
		}
		imshow("ssd-face-video", frame);
	}
	
	waitKey(0);
	return 0;
}

"""
DNN 基于残差网络的视频人脸检测
"""

import cv2 as cv

model_bin = "res10_300x300_ssd_iter_140000_fp16.caffemodel"
config_text = "deploy.prototxt"

# load caffe model
net = cv.dnn.readNetFromCaffe(config_text, model_bin)

# set back-end
net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)

cap = cv.VideoCapture(0)
while True:
    ret, image = cap.read()
    image = cv.flip(image, 1)
    if ret is False:
        break

    # 人脸检测
    h, w = image.shape[:2]
    blobImage = cv.dnn.blobFromImage(image, 1.0, (300, 300), (104.0, 177.0, 123.0), False, False)
    net.setInput(blobImage)
    cvOut = net.forward()

    # Put efficiency information.
    t, _ = net.getPerfProfile()
    fps = 1000 / (t * 1000.0 / cv.getTickFrequency())
    label = 'FPS: %.2f' % fps
    cv.putText(image, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0))

    # 绘制检测矩形
    for detection in cvOut[0,0,:,:]:
        score = float(detection[2])
        objIndex = int(detection[1])
        if score > 0.5:
            left = detection[3]*w
            top = detection[4]*h
            right = detection[5]*w
            bottom = detection[6]*h

            # 绘制
            cv.rectangle(image, (int(left), int(top)), (int(right), int(bottom)), (255, 0, 0), thickness=2)
            cv.putText(image, "score:%.2f"%score, (int(left), int(top)), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)
    cv.imshow('face-detection-demo', image)
    c = cv.waitKey(2)
    if c == 27:
        break
cv.waitKey(0)
cv.destroyAllWindows()

结果

代码地址

github