天天看點

《模型融合》投票法、stacking和blending

import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
%matplotlib inline
plt.rcParams["font.sans-serif"] = ["FangSong"] 
plt.rcParams["axes.unicode_minus"] = False 
import warnings
warnings.filterwarnings("ignore")
           
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,mean_squared_error
import lightgbm
           
clf1 = LogisticRegression(random_state=0)
clf2 = RandomForestClassifier(random_state=0)
clf3 = SVC(random_state=0,probability=True)
clf4 = lightgbm.LGBMClassifier(random_state=0)
           
X,y = data.data,data.target
           
for model in ['clf1','clf2','clf3','clf4']:
    model = eval(model)
    model.fit(X_train,y_train)
    pre = model.predict_proba(X_test)
    print(model)
    print(mean_squared_error(model .predict(X_test),y_test))
           
LogisticRegression(random_state=0)
0.02631578947368421
RandomForestClassifier(random_state=0)
0.03508771929824561
SVC(probability=True, random_state=0)
0.08771929824561403
LGBMClassifier(random_state=0)
0.03508771929824561
           

投票法

VotingClassifier?
           
vclf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('svc', clf3),('lgb',clf4)],voting='soft')
vclf = vclf .fit(X_train,y_train)
           
0.025698284205186814
           
vclf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('svc', clf3),('lgb',clf4)], voting='soft', weights=[3, 1, 1,2])
vclf = vclf .fit(X_train,y_train)
print(vclf .predict_proba(X_test))
           
0.021581896019948466
           

stacking

from mlxtend.classifier import StackingClassifier
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
           
sclf = StackingClassifier(classifiers=[clf2, clf3, clf4], 
                          meta_classifier=lr)
           
sclf.fit(X_train,y_train)
mean_squared_error(vclf .predict_proba(X_test)[:,1],y_test)
           
0.021581896019948466
           

blending

new_test = []
new_train = []
for model in ['clf1','clf2','clf3','clf4']:
    model = eval(model)
    model.fit(X_train,y_train)
    pre_test = model.predict_proba(X_test)
    pre_train =  model.predict_proba(X_train)
    new_test.append(pre_test[:,1])
    new_train.append(pre_train[:,1])
           
LinearRegression()
           
0.024812030295457036