简单实现线性回归

原理

线性回归详解

线性回归公式推导

代码

LinearRegression.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import numpy as np

class LinearRegression:
def __init__(self):
self.b = None # 偏置
self.W = None # 权重
self.b_W = None # b_W = [b, W]

def fit(self, x_trian, y_train):
"""数值化直接求出参数"""
X = np.hstack([np.ones((len(x_trian), 1)), x_trian]) # 完全向量化运算,x_train 第一列添加为1
self.b_W = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y_train)
self.b = self.b_W[0]
self.W = self.b_W[1:]

def predict(self, x_test):
X = np.hstack([np.ones((len(x_test), 1)), x_test])
y_predict = X.dot(self.b_W)

return y_predict

def score(self, x_test, y_test):
X = np.hstack([np.ones((len(x_test), 1)), x_test])
y_predict = X.dot(self.b_W)

return self._r2_score(y_test, y_predict)

def _r2_score(self, y_test, y_predict):
r2_score = 1 - (np.sum((y_test - y_predict) ** 2)) / np.sum((y_test - np.mean(y_test)) ** 2)
return r2_score

test.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from LinearRegression import LinearRegression

np.random.seed(1)

boston = datasets.load_boston()

X = boston.data
y = boston.target

X_train, X_test, y_train, y_test = train_test_split(X, y)

model = LinearRegression()
model.fit(X_train, y_train)
print(model.score(X_test, y_test)) # 0.78