前言:回归任务是监督式机器学习中的最主要的一种类别,与分类不同的是,其目标一般为连续型数值。在sklearn中提供了大量的用于回归的算法。
本文汇总了常见的9种回归算法,方便快速查询使用。(本文使用糖尿病检测数据集,使用MSE作为回归评价指标,以下为MSE得分)
1.线性回归:3424
2.岭回归:3379
3.套索回归:3787
4.弹性网络:4666
5.支持向量机(多项式核):4267
6.K近邻:4243
1 import numpy as np
2 import pandas as pd
3 import matplotlib as mpl
4 import matplotlib.pyplot as plt
5 import sklearn
6 from sklearn import datasets
7
8 from sklearn.model_selection import train_test_split
9 from sklearn.model_selection import cross_val_score
10 from sklearn.metrics import explained_variance_score
11 from sklearn.metrics import mean_squared_error
12
13 from sklearn.linear_model import LinearRegression
14 from sklearn.linear_model import Ridge
15 from sklearn.linear_model import Lasso
16 from sklearn.linear_model import ElasticNet
17
18 from sklearn.svm import SVR
19 from sklearn.neighbors import KNeighborsRegressor
20 from sklearn.tree import DecisionTreeRegressor
21 from sklearn.ensemble import RandomForestRegressor
22 from sklearn.ensemble import GradientBoostingRegressor
23
24
25 diabetes = sklearn.datasets.load_diabetes()
26 x,y = diabetes.data,diabetes.target
27 x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=0)
28 res = []
29
30 #1. 线性回归
31 regr = LinearRegression()
32 regr.fit(x_train, y_train)
33 cross_score = cross_val_score(regr, x_train, y_train, cv=3, scoring="neg_mean_squared_error")
34 print(-cross_score)
35 y_predict = regr.predict(x_test)
36 score = mean_squared_error(y_test,y_predict)
37 res.append(score)
38
39 #2. 岭回归
40 regr = Ridge()
41 regr.fit(x_train, y_train)
42 cross_score = cross_val_score(regr, x_train, y_train, cv=3, scoring="neg_mean_squared_error")
43 print(-cross_score)
44 y_predict = regr.predict(x_test)
45 score = mean_squared_error(y_test,y_predict)
46 res.append(score)
47
48 #3. 套索回归
49 regr = Lasso()
50 regr.fit(x_train, y_train)
51 cross_score = cross_val_score(regr, x_train, y_train, cv=3, scoring="neg_mean_squared_error")
52 print(-cross_score)
53 y_predict = regr.predict(x_test)
54 score = mean_squared_error(y_test,y_predict)
55 res.append(score)
56
57 #4. 弹性网络
58 regr = ElasticNet(alpha=0.1,l1_ratio=0.5)
59 regr.fit(x_train, y_train)
60 cross_score = cross_val_score(regr, x_train, y_train, cv=3, scoring="neg_mean_squared_error")
61 print(-cross_score)
62 y_predict = regr.predict(x_test)
63 score = mean_squared_error(y_test,y_predict)
64 res.append(score)
65
66 #5. SVR
67 regr = SVR(gamma='scale',kernel="poly")#linear rbf
68 regr.fit(x_train, y_train)
69 cross_score = cross_val_score(regr, x_train, y_train, cv=3, scoring="neg_mean_squared_error")
70 print(-cross_score)
71 y_predict = regr.predict(x_test)
72 score = mean_squared_error(y_test,y_predict)
73 res.append(score)
74
75 #6. K近邻回归
76 regr = KNeighborsRegressor(weights="uniform")
77 regr.fit(x_train, y_train)
78 cross_score = cross_val_score(regr, x_train, y_train, cv=3, scoring="neg_mean_squared_error")
79 print(-cross_score)
80 y_predict = regr.predict(x_test)
81 score = mean_squared_error(y_test,y_predict)
82 res.append(score)
83
84 #7. 决策树回归
85 regr =DecisionTreeRegressor()
86 regr.fit(x_train, y_train)
87 cross_score = cross_val_score(regr, x_train, y_train, cv=3, scoring="neg_mean_squared_error")
88 print(-cross_score)
89 y_predict = regr.predict(x_test)
90 score = mean_squared_error(y_test,y_predict)
91 res.append(score)
92
93 #8. 随机森林回归
94 regr = RandomForestRegressor(n_estimators=100)
95 regr.fit(x_train, y_train)
96 cross_score = cross_val_score(regr, x_train, y_train, cv=3, scoring="neg_mean_squared_error")
97 print(-cross_score)
98 y_predict = regr.predict(x_test)
99 score = mean_squared_error(y_test,y_predict)
100 res.append(score)
101
102 #9. 梯度提升树回归
103 regr = GradientBoostingRegressor()
104 regr.fit(x_train, y_train)
105 cross_score = cross_val_score(regr, x_train, y_train, cv=3, scoring="neg_mean_squared_error")
106 print(-cross_score)
107 y_predict = regr.predict(x_test)
108 score = mean_squared_error(y_test,y_predict)
109 res.append(score)
110
111 #10. 得分比较
112 print(res)