opencv-128-DNN 直接调用tensorflow的导出模型

知识点

OpenCV在DNN模块中支持直接调用tensorflow object detection训练导出的模型使用,支持的模型包括

  • SSD
  • Faster-RCNN
  • Mask-RCNN

三种经典的对象检测网络,这样就可以实现从tensorflow模型训练、导出模型、在OpenCV DNN调用模型网络实现自定义对象检测的技术链路,具有非常高的实用价值。以Faster-RCNN为例,模型下载地址如下:
models/detection_model_zoo.md at master · tensorfl…
对于这些模型没有与之匹配的graph.pbtxt文件,OpenCV DNN模块提供python脚本来生成,相关详细说明请看:
tensorflow模型导出与OpenCV DNN中使用

代码(c++,python)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
#include <fstream>

using namespace cv;
using namespace cv::dnn;
using namespace std;

string label_map = "D:/tensorflow/models/research/object_detection/data/mscoco_label_map.pbtxt";
string model = "D:/tensorflow/faster_rcnn_resnet50_coco_2018_01_28/frozen_inference_graph.pb";
string config = "D:/tensorflow/faster_rcnn_resnet50_coco_2018_01_28/graph.pbtxt";

std::map<int, String> readLabelMaps();
int main(int argc, char** argv) {
Mat src = imread("D:/images/person.jpg");
int width = src.cols;
int height = src.rows;
if (src.empty()) {
printf("could not load image...\n");
return 0;
}
namedWindow("input", WINDOW_AUTOSIZE);
imshow("input", src);
map<int, string> names = readLabelMaps();

// 加载Faster-RCNN
Net net = readNetFromTensorflow(model, config);
Mat blob = blobFromImage(src, 1.0, Size(300, 300), Scalar(), true, false);
net.setInput(blob);

// 预测
Mat detection = net.forward();
Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());
float threshold = 0.5;

// 处理输出数据,绘制预测框与文本
for (int row = 0; row < detectionMat.rows; row++) {
float confidence = detectionMat.at<float>(row, 2);
if (confidence > threshold) {

// base zero
int object_class = detectionMat.at<float>(row, 1) + 1;

// predict box
int left = detectionMat.at<float>(row, 3) * width;
int top = detectionMat.at<float>(row, 4) * height;
int right = detectionMat.at<float>(row, 5) * width;
int bottom = detectionMat.at<float>(row, 6) * height;

Rect rect;
rect.x = left;
rect.y = top;
rect.width = (right - left);
rect.height = (bottom - top);

// render bounding box and label name
rectangle(src, rect, Scalar(255, 0, 255), 4, 8, 0);
map<int, string>::iterator it = names.find(object_class);
printf("id : %d, display name : %s \n", object_class, (it->second).c_str());
putText(src, (it->second).c_str(), Point(left, top - 5), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(255, 0, 0), 1);
}
}
imshow("faster-rcnn-demo", src);
waitKey(0);
return 0;
}

std::map<int, string> readLabelMaps()
{
std::map<int, string> labelNames;
std::ifstream fp(label_map);
if (!fp.is_open())
{
printf("could not open file...\n");
exit(-1);
}
string one_line;
string display_name;
while (!fp.eof())
{
std::getline(fp, one_line);
std::size_t found = one_line.find("id:");
if (found != std::string::npos) {
int index = found;
string id = one_line.substr(index + 4, one_line.length() - index);

std::getline(fp, display_name);
std::size_t found = display_name.find("display_name:");

index = found + 15;
string name = display_name.substr(index, display_name.length() - index);
name = name.replace(name.length() - 1, name.length(), "");
// printf("id : %d, name: %s \n", stoi(id.c_str()), name.c_str());
labelNames[stoi(id)] = name;
}
}
fp.close();
return labelNames;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
"""
DNN 直接调用tensorflow的导出模型
"""

import cv2 as cv

inference_pb = "frozen_inference_graph.pb"
graph_text = "graph.pbtxt"

# load tensorflow model
net = cv.dnn.readNetFromTensorflow(inference_pb, graph_text)
image = cv.imread("images/dog_person_horse.jpg")
h = image.shape[0]
w = image.shape[1]

# 获得所有层名称与索引
layerNames = net.getLayerNames()
lastLayerId = net.getLayerId(layerNames[-1])
lastLayer = net.getLayer(lastLayerId)
print(lastLayer.type)

# 检测
net.setInput(cv.dnn.blobFromImage(image, size=(300, 300), swapRB=True, crop=False))
cvOut = net.forward()
for detection in cvOut[0, 0, :, :]:
score = float(detection[2])
if score > 0.5:
left = detection[3] * w
top = detection[4] * h
right = detection[5] * w
bottom = detection[6] * h

# 绘制
cv.rectangle(image, (int(left), int(top)), (int(right), int(bottom)), (0, 255, 0), thickness=2)

cv.imshow('faster-rcnn-demo', image)
cv.waitKey(0)
cv.destroyAllWindows()

结果

代码地址

github