opencv-132-DNN单张与多张图像的推断

知识点

OpenCV DNN中支持单张图像推断，同时还支持分批次方式的图像推断，对应的两个相关API分别为blobFromImage与blobFromImages，它们的返回对象都是一个四维的Mat对象-按照顺序分别为NCHW 其组织方式详解如下：
N表示多张图像
C表示接受输入图像的通道数目
H表示接受输入图像的高度
W表示接受输入图像的宽度

Mat cv::dnn::blobFromImage(
	InputArray 	image,
	double 	scalefactor = 1.0,
	const Size & 	size = Size(),
	const Scalar & 	mean = Scalar(),
	bool 	swapRB = false,
	bool 	crop = false,
	int 	ddepth = CV_32F
)

Mat cv::dnn::blobFromImages(
	InputArrayOfArrays 	images,
	double 	scalefactor = 1.0,
	Size 	size = Size(),
	const Scalar & 	mean = Scalar(),
	bool 	swapRB = false,
	bool 	crop = false,
	int 	ddepth = CV_32F 
)
参数解释
Images表示多张图像,image表示单张图像
Scalefactor表示放缩
Size表示图像大小
Mean表示均值
swapRB是否交换通道
crop是否剪切
ddepth 输出的类型，默认是浮点数格式

代码（c++,python）

#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
#include <iostream>
#include <fstream>
/******************************************************
*
********************************************************/
using namespace cv;
using namespace cv::dnn;
using namespace std;


String bin_model = "D:/projects/opencv_tutorial/data/models/googlenet/bvlc_googlenet.caffemodel";
String protxt = "D:/projects/opencv_tutorial/data/models/googlenet/bvlc_googlenet.prototxt";
String labels_txt_file = "D:/vcworkspaces/classification_classes_ILSVRC2012.txt";
vector<String> readClassNames();
int main(int argc, char** argv) {
	Mat image1 = imread("D:/images/cat.jpg");
	Mat image2 = imread("D:/images/aeroplane.jpg");
	vector<Mat> images;
	images.push_back(image1);
	images.push_back(image2);
	vector<String> labels = readClassNames();

	int w = 224;
	int h = 224;

	// 加载网络
	Net net = readNetFromCaffe(protxt, bin_model);
	net.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE);
	net.setPreferableTarget(DNN_TARGET_CPU);

	if (net.empty()) {
		printf("read caffe model data failure...\n");
		return -1;
	}
	Mat inputBlob = blobFromImages(images, 1.0, Size(w, h), Scalar(104, 117, 123), false, false);

	// 执行图像分类
	Mat prob;
	net.setInput(inputBlob);
	prob = net.forward();
	vector<double> times;
	double time = net.getPerfProfile(times);
	float ms = (time * 1000) / getTickFrequency();
	printf("current inference time : %.2f ms \n", ms);

	// 得到最可能分类输出
	for (int n = 0; n < prob.rows; n++) {
		Point classNumber;
		double classProb;
		Mat probMat = prob(Rect(0, n, 1000, 1)).clone();
		Mat result = probMat.reshape(1, 1);
		minMaxLoc(result, NULL, &classProb, NULL, &classNumber);
		int classidx = classNumber.x;
		printf("\n current image classification : %s, possible : %.2f\n", labels.at(classidx).c_str(), classProb);

		// 显示文本
		putText(images[n], labels.at(classidx), Point(20, 50), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(0, 0, 255), 2, 8);
		imshow("Image Classification", images[n]);
		waitKey(0);

	}
	return 0;
}

std::vector<String> readClassNames()
{
	std::vector<String> classNames;

	std::ifstream fp(labels_txt_file);
	if (!fp.is_open())
	{
		printf("could not open file...\n");
		exit(-1);
	}
	std::string name;
	while (!fp.eof())
	{
		std::getline(fp, name);
		if (name.length())
			classNames.push_back(name);
	}
	fp.close();
	return classNames;
}

"""
DNN单张与多张图像的推断
"""

import cv2 as cv
import numpy as np

bin_model = "bvlc_googlenet.caffemodel"
protxt = "bvlc_googlenet.prototxt"

# Load names of classes
classes = None
with open("classification_classes_ILSVRC2012.txt", 'rt') as f:
    classes = f.read().rstrip('\n').split('\n')

# load CNN model
net = cv.dnn.readNetFromCaffe(protxt, bin_model)

# read input data
image1 = cv.imread("images/dog.jpg")
image2 = cv.imread("images/airplane.jpg")
images = []
images.append(image1)
images.append(image2)
blobs = cv.dnn.blobFromImages(np.asarray(images), 1.0, (224, 224), (104, 117,123), False, crop=False)
print(blobs.shape)

# Run a model
net.setInput(blobs)
out = net.forward()
# Put efficiency information.
t, _ = net.getPerfProfile()
label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency())
print(out.shape)

# Get a class with a highest score.
for i in range(len(out)):
    classId = np.argmax(out[i])
    confidence = out[i][classId]
    cv.putText(images[i], label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0))

    # Print predicted class.
    text_label = '%s: %.4f' % (classes[classId] if classes else 'Class #%d' % classId, confidence)
    cv.putText(images[i], text_label, (50, 50), cv.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 255), 2)
    cv.namedWindow("googlenet-demo", cv.WINDOW_NORMAL)
    cv.imshow("googlenet-demo", images[i])
    cv.waitKey(0)
cv.destroyAllWindows()

结果

代码地址

github