import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
plt.rcParams["font.sans-serif"] = ["FangSong"]
plt.rcParams["axes.unicode_minus"] = False
import warnings
warnings.filterwarnings("ignore")
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,mean_squared_error
import lightgbm
clf1 = LogisticRegression(random_state=0)
clf2 = RandomForestClassifier(random_state=0)
clf3 = SVC(random_state=0,probability=True)
clf4 = lightgbm.LGBMClassifier(random_state=0)
X,y = data.data,data.target
for model in ['clf1','clf2','clf3','clf4']:
model = eval(model)
model.fit(X_train,y_train)
pre = model.predict_proba(X_test)
print(model)
print(mean_squared_error(model .predict(X_test),y_test))
LogisticRegression(random_state=0)
0.02631578947368421
RandomForestClassifier(random_state=0)
0.03508771929824561
SVC(probability=True, random_state=0)
0.08771929824561403
LGBMClassifier(random_state=0)
0.03508771929824561
投票法
VotingClassifier?
vclf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('svc', clf3),('lgb',clf4)],voting='soft')
vclf = vclf .fit(X_train,y_train)
0.025698284205186814
vclf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('svc', clf3),('lgb',clf4)], voting='soft', weights=[3, 1, 1,2])
vclf = vclf .fit(X_train,y_train)
print(vclf .predict_proba(X_test))
0.021581896019948466
stacking
from mlxtend.classifier import StackingClassifier
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
sclf = StackingClassifier(classifiers=[clf2, clf3, clf4],
meta_classifier=lr)
sclf.fit(X_train,y_train)
mean_squared_error(vclf .predict_proba(X_test)[:,1],y_test)
0.021581896019948466
blending
new_test = []
new_train = []
for model in ['clf1','clf2','clf3','clf4']:
model = eval(model)
model.fit(X_train,y_train)
pre_test = model.predict_proba(X_test)
pre_train = model.predict_proba(X_train)
new_test.append(pre_test[:,1])
new_train.append(pre_train[:,1])
LinearRegression()
0.024812030295457036