opencv-104-HOG-SVM分类训练

知识点

对于得到的结构化HOG特征数据,我们就可以通过初始化SVM进行回归分类训练,这里采用的训练器是SVM线性分类器,SVM还有另外一个分类器就是对于线性不可分数据的径向分类器。OpenCV中使用径向分类器SVM有时候会训练很长时间,而且结果很糟糕,甚至会报一些莫名其妙的错误,感觉不是特别好。所以推荐大家真对线性不可分的问题可以选择神经网络ANN模块。
在训练之前,首先简单的认识一下SVM,我们这边是通过二分类来完成,是很典型的线性可分离的SVM。

代码(c++,python)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#include <opencv2/opencv.hpp>
#include <iostream>

using namespace cv;
using namespace cv::ml;
using namespace std;

string positive_dir = "D:/images/train_data/elec_watch/positive/";
string negative_dir = "D:/images/train_data/elec_watch/negative/";
void get_hog_descripor(Mat &image, vector<float> &desc);
void generate_dataset(Mat &trainData, Mat &labels);
void svm_train(Mat &trainData, Mat &labels);

int main(int argc, char** argv) {
Mat trainData = Mat::zeros(Size(3780, 26), CV_32FC1);
Mat labels = Mat::zeros(Size(1, 26), CV_32SC1);
generate_dataset(trainData, labels);
svm_train(trainData, labels);
Ptr<SVM> svm = SVM::load("D:/vcworkspaces/hog_elec.yml");
Mat test_img = imread("D:/images/train_data/elec_watch/test/test_01.png");
// Mat test_img = imread("D:/images/train_data/elec_watch/positive/box_01.bmp");
imshow("test image", test_img);

vector<float> fv;
get_hog_descripor(test_img, fv);
Mat one_row = Mat::zeros(Size(fv.size(), 1), CV_32FC1);
for (int i = 0; i < fv.size(); i++) {
one_row.at<float>(0, i) = fv[i];
}
float result = svm->predict(one_row);
printf("\n prediction result : %.2f \n", result);
waitKey(0);
return 0;
}

void svm_train(Mat &trainData, Mat &labels) {
printf("\n start SVM training... \n");
Ptr< SVM > svm = SVM::create();
/* Default values to train SVM */
svm->setGamma(5.383);
svm->setKernel(SVM::LINEAR);
svm->setC(2.67);
svm->setType(SVM::C_SVC);
svm->train(trainData, ROW_SAMPLE, labels);
clog << "...[done]" << endl;

// save xml
svm->save("D:/vcworkspaces/hog_elec.yml");
}

void get_hog_descripor(Mat &image, vector<float> &desc) {
HOGDescriptor hog;
int h = image.rows;
int w = image.cols;
float rate = 64.0 / w;
Mat img, gray;
resize(image, img, Size(64, int(rate*h)));
cvtColor(img, gray, COLOR_BGR2GRAY);
Mat result = Mat::zeros(Size(64, 128), CV_8UC1);
result = Scalar(127);
Rect roi;
roi.x = 0;
roi.width = 64;
roi.y = (128 - gray.rows) / 2;
roi.height = gray.rows;
gray.copyTo(result(roi));
hog.compute(result, desc, Size(8, 8), Size(0, 0));
}

void generate_dataset(Mat &trainData, Mat &labels) {
vector<string> images;
glob(positive_dir, images);
int pos_num = images.size();
for (int i = 0; i < images.size(); i++) {
Mat image = imread(images[i].c_str());
vector<float> fv;
get_hog_descripor(image, fv);
printf("image path : %s, feature data length: %d \n", images[i].c_str(), fv.size());
for (int j = 0; j < fv.size(); j++) {
trainData.at<float>(i, j) = fv[j];
}
labels.at<int>(i, 0) = 1;
}

images.clear();
glob(negative_dir, images);
for (int i = 0; i < images.size(); i++) {
Mat image = imread(images[i].c_str());
vector<float> fv;
get_hog_descripor(image, fv);
printf("image path : %s, feature data length: %d \n", images[i].c_str(), fv.size());
for (int j = 0; j < fv.size(); j++) {
trainData.at<float>(i + pos_num, j) = fv[j];
}
labels.at<int>(i + pos_num, 0) = -1;
}
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
"""
HOG-SVM分类训练
"""


import cv2 as cv
import os
import numpy as np


def get_hog_descriptor(image):
hog = cv.HOGDescriptor()
h, w = image.shape[:2]
rate = 64 / w
image = cv.resize(image, (64, np.int(rate*h)))
gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
bg = np.zeros((128, 64), dtype=np.uint8)
bg[:,:] = 127
h, w = gray.shape
dy = (128 - h) // 2
bg[dy:h+dy,:] = gray
cv.waitKey(0)
# 64x128 = 3780
fv = hog.compute(bg, winStride=(8, 8), padding=(0, 0))
return fv


def generate_dataset(pdir, ndir):
train_data = []
labels = []
for file_name in os.listdir(pdir):
img_dir = os.path.join(pdir, file_name)
img = cv.imread(img_dir)
hog_desc = get_hog_descriptor(img)
one_fv = np.zeros([len(hog_desc)], dtype=np.float32)
for i in range(len(hog_desc)):
one_fv[i] = hog_desc[i][0]
train_data.append(one_fv)
labels.append(1)

for file_name in os.listdir(ndir):
img_dir = os.path.join(ndir, file_name)
img = cv.imread(img_dir)
hog_desc = get_hog_descriptor(img)
one_fv = np.zeros([len(hog_desc)], dtype=np.float32)
for i in range(len(hog_desc)):
one_fv[i] = hog_desc[i][0]
train_data.append(one_fv)
labels.append(-1)
return np.array(train_data, dtype=np.float32), np.array(labels, dtype=np.int32)


def svm_train(positive_dir, negative_dir):
svm = cv.ml.SVM_create()
svm.setKernel(cv.ml.SVM_LINEAR)
svm.setType(cv.ml.SVM_C_SVC)
svm.setC(2.67)
svm.setGamma(5.383)
trainData, responses = generate_dataset(positive_dir, negative_dir)
responses = np.reshape(responses, [-1, 1])
svm.train(trainData, cv.ml.ROW_SAMPLE, responses)
svm.save('svm_data.dat')


def elec_detect(image):
hog_desc = get_hog_descriptor(image)
print(len(hog_desc))
one_fv = np.zeros([len(hog_desc)], dtype=np.float32)
for i in range(len(hog_desc)):
one_fv[i] = hog_desc[i][0]
one_fv = np.reshape(one_fv, [-1, len(hog_desc)])
print(len(one_fv), len(one_fv[0]))
svm = cv.ml.SVM_load('svm_data.dat')
result = svm.predict(one_fv)[1]
print(result)


if __name__ == '__main__':
svm_train("images/elec_watch/positive/",
"images/elec_watch/negative/")
cv.waitKey(0)
# test_img = cv.imread("images/elec_watch/test/scene_01.jpg")
# elec_detect(test_img)
# cv.destroyAllWindows()

代码地址

github