一個程式員在海濱遊泳時溺水身亡。他死前拼命的呼救,當時海灘上有許多救生員,但是沒有人救他。因為他一直大喊“F1!”“F1!”,誰都不知道“F1”究竟是什麼意思。
分類樹和回歸樹的差別
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
import warnings
import sklearn
from sklearn.linear_model import LinearRegression, LassoCV, RidgeCV
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeRegressor
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model.coordinate_descent import ConvergenceWarning
def notEmpty(s):
return s != ''
mpl.rcParams['font.sans-serif']=[u'simHei']
mpl.rcParams['axes.unicode_minus']=False
## 攔截異常
warnings.filterwarnings(action = 'ignore', category=ConvergenceWarning)
names = ['CRIM','ZN', 'INDUS','CHAS','NOX','RM','AGE','DIS','RAD','TAX','PTRATIO','B','LSTAT']
path = "datas/boston_housing.data"
## 由于資料檔案格式不統一,是以讀取的時候,先按照一行一個字段屬性讀取資料,然後再按照每行資料進行處理
fd = pd.read_csv(path, header=None)
data = np.empty((len(fd), 14))
for i, d in enumerate(fd.values):
d = map(float, filter(notEmpty, d[0].split(' ')))
data[i] = list(d)
x, y = np.split(data, (13,), axis=1)
y = y.ravel()
print ("樣本資料量:%d, 特征個數:%d" % x.shape)
print ("target樣本資料量:%d" % y.shape[0])
樣本資料量:506, 特征個數:13
target樣本資料量:506
#資料的分割,
x_train1, x_test1, y_train1, y_test1 = train_test_split(x, y, train_size=0.8, random_state=14)
x_train, x_test, y_train, y_test = x_train1, x_test1, y_train1, y_test1
print ("訓練資料集樣本數目:%d, 測試資料集樣本數目:%d" % (x_train.shape[0], x_test.shape[0]))
訓練資料集樣本數目:404, 測試資料集樣本數目:102
#标準化
ss = MinMaxScaler()
x_train = ss.fit_transform(x_train, y_train)
x_test = ss.transform(x_test)
print ("原始資料各個特征屬性的調整最小值:",ss.min_)
print ("原始資料各個特征屬性的縮放資料值:",ss.scale_)
原始資料各個特征屬性的調整最小值: [-7.10352762e-05 0.00000000e+00 -1.68621701e-02 0.00000000e+00
-7.92181070e-01 -6.82314620e-01 -2.98661174e-02 -1.02719857e-01
-4.34782609e-02 -3.56870229e-01 -1.34042553e+00 -6.38977636e-03
-4.90780142e-02]
原始資料各個特征屬性的縮放資料值: [1.12397589e-02 1.00000000e-02 3.66568915e-02 1.00000000e+00
2.05761317e+00 1.91607588e-01 1.02986612e-02 9.09347180e-02
4.34782609e-02 1.90839695e-03 1.06382979e-01 2.53562554e-03
2.83687943e-02]
#構模組化型(回歸)
model = DecisionTreeRegressor(criterion='mae',max_depth=5)
#模型訓練
model.fit(x_train, y_train)
#模型預測
y_test_hat = model.predict(x_test)
#評估模型
score = model.score(x_test, y_test)
print ("Score:", score)
Score: 0.7900379894156064
# 方式三:直接生成圖檔
from sklearn import tree
from IPython.display import Image
import pydotplus
dot_data = tree.export_graphviz(model, out_file=None,
filled=True, rounded=True,
special_characters=True)
graph = pydotplus.graph_from_dot_data(dot_data)
Image(graph.create_png())