opencv-135-DNN 实时快速的图像风格迁移

知识点

OpenCV DNN模块现在还支持图像风格迁移网络模型的加载与使用，支持的模型是基于李飞飞等人在论文《Perceptual Losses for Real-Time Style Transfer and Super-Resolution》中提到的快速图像风格迁移网络，基于感知损失来提取特征，生成图像特征与高分辨率图像。整个网络模型是基于DCGAN + 5个残差层构成，是一个典型的全卷积网络，关于DCGAN可以看这里的介绍与代码实现：
使用DCGAN实现图像生成
模型下载地址
GitHub - jcjohnson/fast-neural-style: Feedforward …
这个网络可以支持任意尺寸的图像输入，作者提供了很多种预训练的风格迁移模型：

composition_vii.t7
starry_night.t7
la_muse.t7
the_wave.t7
mosaic.t7
the_scream.t7
feathers.t7
candy.t7
udnie.t7

这些模型都是torch框架支持的二进制权重文件，加载模型之后，就可以调用forward得到结果，通过对输出结果反向加上均值，rescale到0~255的RGB色彩空间，即可显示。

代码（c++,python）

#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
#include <iostream>

using namespace cv;
using namespace cv::dnn;
using namespace std;

const size_t width = 256;
const size_t height = 256;
String base_dir = "D:/projects/opencv_tutorial/data/models/fast_style/";
String styles[9] = { "composition_vii.t7", "starry_night.t7", "la_muse.t7", "the_wave.t7",
"mosaic.t7", "the_scream.t7", "feathers.t7", "candy.t7", "udnie.t7" };

int main(int argc, char** argv) {
	int index = 0;
	VideoCapture capture = VideoCapture(0);
	Net net = readNetFromTorch(format("%s%s", base_dir.c_str(), styles[index].c_str()));
	net.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE);
	net.setPreferableTarget(DNN_TARGET_CPU);
	Mat frame;
	while (true) {
		capture.read(frame);
		imshow("input", frame);
		Mat blobImage = blobFromImage(frame, 1.0,
			Size(width, height),
			Scalar(103.939, 116.779, 123.68), false, false);

		net.setInput(blobImage);
		Mat out = net.forward();
		vector<double> layersTimings;
		double freq = getTickFrequency() / 1000;
		double time = net.getPerfProfile(layersTimings) / freq;
		printf("execute time : %.2f ms\n", time);
		int ch = out.size[1];
		int h = out.size[2];
		int w = out.size[3];
		Mat result = Mat::zeros(Size(w, h), CV_32FC3);
		float* data = out.ptr<float>();

		// decode 4-d Mat object
		for (int c = 0; c < ch; c++) {
			for (int row = 0; row < h; row++) {
				for (int col = 0; col < w; col++) {
					result.at<Vec3f>(row, col)[c] = *data++;
				}
			}
		}

		// ���Ͻ�����
		printf("channels : %d, height: %d, width: %d \n", ch, h, w);
		add(result, Scalar(103.939, 116.779, 123.68), result);
		result /= 255.0;

		// ��ֵ�˲�
		medianBlur(result, result, 5);
		Mat dst;
		resize(result, dst, frame.size());
		imshow("styled-video", dst);

		// ESC means exit
		char c = waitKey(1);
		if (c == 27) {
			break;
		}
	}

	waitKey(0);
	return 0;
}

"""
DNN 实时快速的图像风格迁移
"""

import cv2 as cv
import numpy as np

styles = ["composition_vii.t7", "starry_night.t7", "la_muse.t7", "the_wave.t7",
          "mosaic.t7", "the_scream.t7", "feathers.t7", "candy.t7", "udnie.t7"]

# 加载模型
index = 2
net = cv.dnn.readNetFromTorch(styles[index])

# 读取图片
frame = cv.imread("images/test.png")
cv.imshow("input", frame)

# 执行风格迁移
blob = cv.dnn.blobFromImage(frame, 1.0, (256, 256),
                           (103.939, 116.779, 123.68), swapRB=False, crop=False)
net.setInput(blob)
out = net.forward()
print(out.shape)

# 解析输出
out = out.reshape(3, out.shape[2], out.shape[3])
print(out.shape)
out[0] += 103.939
out[1] += 116.779
out[2] += 123.68
out /= 255.0
out = out.transpose(1, 2, 0)
print(out.shape)
out = np.clip(out, 0.0, 1.0)

# rescale与中值模糊，消除极值点噪声
cv.normalize(out, out, 0, 255, cv.NORM_MINMAX)
out = cv.medianBlur(out, 5)

# resize and show
h, w = frame.shape[:2]
result = np.uint8(cv.resize(out, (w, h)))
cv.imshow('Fast Style Demo', result)

cv.waitKey(0)
cv.destroyAllWindows()

结果

代码地址

github