opencv-132-DNN单张与多张图像的推断

知识点

OpenCV DNN中支持单张图像推断,同时还支持分批次方式的图像推断,对应的两个相关API分别为blobFromImage与blobFromImages,它们的返回对象都是一个四维的Mat对象-按照顺序分别为NCHW 其组织方式详解如下:
N表示多张图像
C表示接受输入图像的通道数目
H表示接受输入图像的高度
W表示接受输入图像的宽度

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
Mat cv::dnn::blobFromImage(
InputArray image,
double scalefactor = 1.0,
const Size & size = Size(),
const Scalar & mean = Scalar(),
bool swapRB = false,
bool crop = false,
int ddepth = CV_32F
)

Mat cv::dnn::blobFromImages(
InputArrayOfArrays images,
double scalefactor = 1.0,
Size size = Size(),
const Scalar & mean = Scalar(),
bool swapRB = false,
bool crop = false,
int ddepth = CV_32F
)
参数解释
Images表示多张图像,image表示单张图像
Scalefactor表示放缩
Size表示图像大小
Mean表示均值
swapRB是否交换通道
crop是否剪切
ddepth 输出的类型,默认是浮点数格式

代码(c++,python)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
#include <iostream>
#include <fstream>
/******************************************************
*
********************************************************/
using namespace cv;
using namespace cv::dnn;
using namespace std;


String bin_model = "D:/projects/opencv_tutorial/data/models/googlenet/bvlc_googlenet.caffemodel";
String protxt = "D:/projects/opencv_tutorial/data/models/googlenet/bvlc_googlenet.prototxt";
String labels_txt_file = "D:/vcworkspaces/classification_classes_ILSVRC2012.txt";
vector<String> readClassNames();
int main(int argc, char** argv) {
Mat image1 = imread("D:/images/cat.jpg");
Mat image2 = imread("D:/images/aeroplane.jpg");
vector<Mat> images;
images.push_back(image1);
images.push_back(image2);
vector<String> labels = readClassNames();

int w = 224;
int h = 224;

// 加载网络
Net net = readNetFromCaffe(protxt, bin_model);
net.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE);
net.setPreferableTarget(DNN_TARGET_CPU);

if (net.empty()) {
printf("read caffe model data failure...\n");
return -1;
}
Mat inputBlob = blobFromImages(images, 1.0, Size(w, h), Scalar(104, 117, 123), false, false);

// 执行图像分类
Mat prob;
net.setInput(inputBlob);
prob = net.forward();
vector<double> times;
double time = net.getPerfProfile(times);
float ms = (time * 1000) / getTickFrequency();
printf("current inference time : %.2f ms \n", ms);

// 得到最可能分类输出
for (int n = 0; n < prob.rows; n++) {
Point classNumber;
double classProb;
Mat probMat = prob(Rect(0, n, 1000, 1)).clone();
Mat result = probMat.reshape(1, 1);
minMaxLoc(result, NULL, &classProb, NULL, &classNumber);
int classidx = classNumber.x;
printf("\n current image classification : %s, possible : %.2f\n", labels.at(classidx).c_str(), classProb);

// 显示文本
putText(images[n], labels.at(classidx), Point(20, 50), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(0, 0, 255), 2, 8);
imshow("Image Classification", images[n]);
waitKey(0);

}
return 0;
}

std::vector<String> readClassNames()
{
std::vector<String> classNames;

std::ifstream fp(labels_txt_file);
if (!fp.is_open())
{
printf("could not open file...\n");
exit(-1);
}
std::string name;
while (!fp.eof())
{
std::getline(fp, name);
if (name.length())
classNames.push_back(name);
}
fp.close();
return classNames;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
"""
DNN单张与多张图像的推断
"""

import cv2 as cv
import numpy as np

bin_model = "bvlc_googlenet.caffemodel"
protxt = "bvlc_googlenet.prototxt"

# Load names of classes
classes = None
with open("classification_classes_ILSVRC2012.txt", 'rt') as f:
classes = f.read().rstrip('\n').split('\n')

# load CNN model
net = cv.dnn.readNetFromCaffe(protxt, bin_model)

# read input data
image1 = cv.imread("images/dog.jpg")
image2 = cv.imread("images/airplane.jpg")
images = []
images.append(image1)
images.append(image2)
blobs = cv.dnn.blobFromImages(np.asarray(images), 1.0, (224, 224), (104, 117,123), False, crop=False)
print(blobs.shape)

# Run a model
net.setInput(blobs)
out = net.forward()
# Put efficiency information.
t, _ = net.getPerfProfile()
label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency())
print(out.shape)

# Get a class with a highest score.
for i in range(len(out)):
classId = np.argmax(out[i])
confidence = out[i][classId]
cv.putText(images[i], label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0))

# Print predicted class.
text_label = '%s: %.4f' % (classes[classId] if classes else 'Class #%d' % classId, confidence)
cv.putText(images[i], text_label, (50, 50), cv.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 255), 2)
cv.namedWindow("googlenet-demo", cv.WINDOW_NORMAL)
cv.imshow("googlenet-demo", images[i])
cv.waitKey(0)
cv.destroyAllWindows()

结果

代码地址

github