天天看點

Sklearn 決策樹與随機森林

通過sklearn中的紅酒訓練集測試并且畫樹

儲存到了桌面的pdf檔案中,目前還沒有進行中文顯示問題。

from sklearn import tree
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split

import pandas as pd

wine = load_wine()
wine.data.shape
wine.target
pd.concat([pd.DataFrame(wine.data),pd.DataFrame(wine.target)],axis=1)

wine.feature_names
wine.target_names
Xtrain, Xtest, Ytrain, Ytest = train_test_split(wine.data,wine.target,test_size=0.3)

clf = tree.DecisionTreeClassifier(criterion="entropy")
clf = clf.fit(Xtrain, Ytrain)
score = clf.score(Xtest, Ytest) #傳回預測的準确度
print(score)

feature_name = ['酒精','蘋果酸','灰','灰的堿性','鎂','總酚','類黃酮','非黃烷類酚類','花青素','顔 色強度','色調','od280/od315稀釋葡萄酒','脯氨酸']

import graphviz
dot_data=tree.export_graphviz(clf
                              ,feature_names=feature_name
                              ,class_names=["琴酒","雪莉","貝爾摩德"]
                              ,filled=True
                              ,rounded=True
                              )
graph = graphviz.Source(dot_data)
graph.render("C:\\Users\\JYuXuAN\\Desktop\\tree")
           

sklearn決策樹與随機森林的差異

随機森林是利用了裝袋法,把許多棵樹集合在一起得到最優的值。

精度一定比單一的決策樹要高出很多。

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split, cross_val_score
import matplotlib.pyplot as plt

wine = load_wine()
Xtrain, Xtest, Ytrain, Ytest = train_test_split(wine.data, wine.target, test_size=0.3)

# clf = DecisionTreeClassifier(random_state=0)
# rfc = RandomForestClassifier(random_state=0)
# clf = clf.fit(Xtrain, Ytrain)
# rfc = rfc.fit(Xtrain, Ytrain)
# score_c = clf.score(Xtest, Ytest)
# score_r = rfc.score(Xtest, Ytest)
#
# print("Single Tree:{}".format(score_c))
# print("Random Forest:{}.".format(score_r))

rfc_l=[]
clf_l=[]

for i in range(10):
    rfc = RandomForestClassifier(n_estimators=25)
    rfc_s = cross_val_score(rfc,wine.data,wine.target,cv=10).mean()
    print(rfc_s)
#   print(f"随機森林第{i}次:"+rfc_s)
    rfc_l.append(rfc_s)
    clf = DecisionTreeClassifier()
    clf_s = cross_val_score(clf,wine.data,wine.target,cv=10).mean()
    print(clf_s)
#  print(f"決策樹第{i}次:"+clf_s)
    clf_l.append(clf_s)

plt.plot(range(1,11),rfc_l,label = "Random Forest")
plt.plot(range(1,11),clf_l,label = "Decision Tree")
plt.legend()
plt.show()
           

畫出的圖像

Sklearn 決策樹與随機森林
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split, cross_val_score
import matplotlib.pyplot as plt

wine=load_wine()

superpa = []
for i in range(200):
    rfc = RandomForestClassifier(n_estimators=i+1,n_jobs=-1)
    rfc_s = cross_val_score(rfc,wine.data,wine.target,cv=10).mean()
    superpa.append(rfc_s)

print(max(superpa),superpa.index(max(superpa)))
plt.figure(figsize=[20,5])
plt.plot(range(1,201),superpa)
plt.show()
           

調參n_estimators 200 次 左右的結果資料圖

Sklearn 決策樹與随機森林

繼續閱讀