opencv-135-DNN 实时快速的图像风格迁移

知识点

OpenCV DNN模块现在还支持图像风格迁移网络模型的加载与使用,支持的模型是基于李飞飞等人在论文《Perceptual Losses for Real-Time Style Transfer and Super-Resolution》中提到的快速图像风格迁移网络,基于感知损失来提取特征,生成图像特征与高分辨率图像。整个网络模型是基于DCGAN + 5个残差层构成,是一个典型的全卷积网络,关于DCGAN可以看这里的介绍与代码实现:
使用DCGAN实现图像生成
模型下载地址
GitHub - jcjohnson/fast-neural-style: Feedforward …
这个网络可以支持任意尺寸的图像输入,作者提供了很多种预训练的风格迁移模型:

  • composition_vii.t7

  • starry_night.t7

  • la_muse.t7

  • the_wave.t7

  • mosaic.t7

  • the_scream.t7

  • feathers.t7

  • candy.t7

  • udnie.t7

这些模型都是torch框架支持的二进制权重文件,加载模型之后,就可以调用forward得到结果,通过对输出结果反向加上均值,rescale到0~255的RGB色彩空间,即可显示。

代码(c++,python)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
#include <iostream>

using namespace cv;
using namespace cv::dnn;
using namespace std;

const size_t width = 256;
const size_t height = 256;
String base_dir = "D:/projects/opencv_tutorial/data/models/fast_style/";
String styles[9] = { "composition_vii.t7", "starry_night.t7", "la_muse.t7", "the_wave.t7",
"mosaic.t7", "the_scream.t7", "feathers.t7", "candy.t7", "udnie.t7" };

int main(int argc, char** argv) {
int index = 0;
VideoCapture capture = VideoCapture(0);
Net net = readNetFromTorch(format("%s%s", base_dir.c_str(), styles[index].c_str()));
net.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE);
net.setPreferableTarget(DNN_TARGET_CPU);
Mat frame;
while (true) {
capture.read(frame);
imshow("input", frame);
Mat blobImage = blobFromImage(frame, 1.0,
Size(width, height),
Scalar(103.939, 116.779, 123.68), false, false);

net.setInput(blobImage);
Mat out = net.forward();
vector<double> layersTimings;
double freq = getTickFrequency() / 1000;
double time = net.getPerfProfile(layersTimings) / freq;
printf("execute time : %.2f ms\n", time);
int ch = out.size[1];
int h = out.size[2];
int w = out.size[3];
Mat result = Mat::zeros(Size(w, h), CV_32FC3);
float* data = out.ptr<float>();

// decode 4-d Mat object
for (int c = 0; c < ch; c++) {
for (int row = 0; row < h; row++) {
for (int col = 0; col < w; col++) {
result.at<Vec3f>(row, col)[c] = *data++;
}
}
}

// ���Ͻ�����
printf("channels : %d, height: %d, width: %d \n", ch, h, w);
add(result, Scalar(103.939, 116.779, 123.68), result);
result /= 255.0;

// ��ֵ�˲�
medianBlur(result, result, 5);
Mat dst;
resize(result, dst, frame.size());
imshow("styled-video", dst);

// ESC means exit
char c = waitKey(1);
if (c == 27) {
break;
}
}

waitKey(0);
return 0;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
"""
DNN 实时快速的图像风格迁移
"""

import cv2 as cv
import numpy as np

styles = ["composition_vii.t7", "starry_night.t7", "la_muse.t7", "the_wave.t7",
"mosaic.t7", "the_scream.t7", "feathers.t7", "candy.t7", "udnie.t7"]

# 加载模型
index = 2
net = cv.dnn.readNetFromTorch(styles[index])

# 读取图片
frame = cv.imread("images/test.png")
cv.imshow("input", frame)

# 执行风格迁移
blob = cv.dnn.blobFromImage(frame, 1.0, (256, 256),
(103.939, 116.779, 123.68), swapRB=False, crop=False)
net.setInput(blob)
out = net.forward()
print(out.shape)

# 解析输出
out = out.reshape(3, out.shape[2], out.shape[3])
print(out.shape)
out[0] += 103.939
out[1] += 116.779
out[2] += 123.68
out /= 255.0
out = out.transpose(1, 2, 0)
print(out.shape)
out = np.clip(out, 0.0, 1.0)

# rescale与中值模糊,消除极值点噪声
cv.normalize(out, out, 0, 255, cv.NORM_MINMAX)
out = cv.medianBlur(out, 5)

# resize and show
h, w = frame.shape[:2]
result = np.uint8(cv.resize(out, (w, h)))
cv.imshow('Fast Style Demo', result)

cv.waitKey(0)
cv.destroyAllWindows()

结果

代码地址

github