sklearn模型評估--Cross-validation

Cross--validation: evaluating estimator performance

出處:https://scikit-learn.org/stable/modules/cross_validation.html

import numpyas np

from sklearn.model_selectionimport train_test_split,cross_val_score

from sklearnimport svm,datasets

from sklearnimport preprocessing

'''官方文檔說明:https://scikit-learn.org/stable/modules/cross_validation.html'''

iris = datasets.load_iris()

iris.data.shape, iris.target.shape

#((150, 4), (150,))

'''交叉驗證-----法一:train_test_split'''

X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target,test_size=0.4,random_state=0)

X_train.shape, y_train.shape

#((90, 4), (90,))

X_test.shape, y_test.shape

#((60, 4), (60,))

clf = svm.SVC(kernel='linear',C=1).fit(X_train, y_train)

clf.score(X_test, y_test)

'''The simplest way to use cross-validation is to call the cross_val_score helper function

on the estimator and the dataset.'''

#交叉驗證-----法e二:cross_val_score? 喂入全部數(shù)據(jù) 通過cv設(shè)定

clf = svm.SVC(kernel='linear',C=1)

scores = cross_val_score(clf, iris.data, iris.target,cv=5,scoring='f1_macro')

#scoring='score'默認(rèn)

print(scores)

print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() *2))

#Accuracy: 0.98 (+/- 0.03)

'''對驗證集和訓(xùn)練集同時標(biāo)準(zhǔn)化處理'''

X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target,test_size=0.4,random_state=0)

scaler = preprocessing.StandardScaler().fit(X_train)

X_train_transformed = scaler.transform(X_train)

clf = svm.SVC(C=1).fit(X_train_transformed, y_train)

X_test_transformed = scaler.transform(X_test)

clf.score(X_test_transformed, y_test)#0.9333...

''' Pipeline 對標(biāo)準(zhǔn)化處理和交叉驗證同時進(jìn)行'''

from sklearn.pipelineimport make_pipeline

clf = make_pipeline(preprocessing.StandardScaler(), svm.SVC(C=1))

cross_val_score(clf, iris.data, iris.target,cv=5)

#-----------cross_validate----------

''' The cross_validate function and multiple metric evaluation:不同于cross_val_score,允許評估多個值'''

from sklearn.model_selectionimport cross_validate

from sklearn.metricsimport recall_score

scoring = ['precision_macro','recall_macro']

clf = svm.SVC(kernel='linear',C=1,random_state=0)

scores = cross_validate(clf, iris.data, iris.target,scoring=scoring,cv=5,return_train_score=False)

sorted(scores.keys())

#['fit_time', 'score_time', 'test_precision_macro', 'test_recall_macro']

print(scores['test_recall_macro'])

#--------Cross validation iterators------------

'''根據(jù)不同的交叉驗證策略生成數(shù)據(jù)集分割。應(yīng)用于獨(dú)立同分布'''

'''1.KFold--Note that KFold is not affected by classes or groups,of equal sizes (if possible)'''

from sklearn.model_selectionimport KFold

X = ["a","b","c","d"]

kf = KFold(n_splits=2)

for train, testin kf.split(X):

print("%s %s" % (train, test))

'''2.Repeated KFold--repeats Stratified K-Fold n times with different randomization in each repetition'''

from sklearn.model_selectionimport RepeatedKFold

X = np.array([[1,2], [3,4], [1,2], [3,4]])

random_state =12883823

rkf = RepeatedKFold(n_splits=2,n_repeats=2,random_state=random_state)

for train, testin rkf.split(X):

print("%s %s" % (train, test))

'''3.Each learning set is created by taking all the samples except one,

the test set being the sample left out.'''

'''作為一般規(guī)則,大多數(shù)作者和經(jīng)驗證據(jù)表明,5或10倍的交叉驗證應(yīng)該優(yōu)先于LOO。'''

from sklearn.model_selectionimport LeaveOneOut

X = [1,2,3,4]

loo = LeaveOneOut()

for train, testin loo.split(X):

print("%s %s" % (train, test))

'''Cross-validation iterators with stratification based on class labels樣本標(biāo)簽非平衡問題'''

'''1.StratifiedKFold is a variation of k-fold which returns stratified folds:

each set contains approximately the same percentage of samples of each target class as the complete set.'''

from sklearn.model_selectionimport StratifiedKFold

X = np.ones(10)

y = [0,0,0,0,1,1,1,1,1,1]

skf = StratifiedKFold(n_splits=3)

for train, testin skf.split(X, y):

print("%s %s" % (train, test))

#RepeatedStratifiedKFold :

#can be used to repeat Stratified K-Fold n times with different randomization in each repetition.

'''Cross-validation iterators for grouped data'''

'''GroupKFold is a variation of k-fold which

ensures that the same group is not represented in both testing and training sets. '''

from sklearn.model_selectionimport GroupKFold

X = [0.1,0.2,2.2,2.4,2.3,4.55,5.8,8.8,9,10]

y = ["a","b","b","b","c","c","c","d","d","d"]

groups = [1,1,1,2,2,2,3,3,3,3]

gkf = GroupKFold(n_splits=3)

for train, testin gkf.split(X, y,groups=groups):

print("%s %s" % (train, test))

最后編輯于
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時請結(jié)合常識與多方信息審慎甄別。
平臺聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點(diǎn),簡書系信息發(fā)布平臺,僅提供信息存儲服務(wù)。

相關(guān)閱讀更多精彩內(nèi)容

  • 第一步:打開“終端”: 找不到終端的用戶,可以點(diǎn)擊桌面右上角搜索,如下圖: 第二步:復(fù)制粘貼對應(yīng)命令回車,具體命令...
    黛黛學(xué)堂閱讀 893評論 0 0
  • 前段時間一個泉州希爾頓前廳副理過來培訓(xùn),臨走送一個杯子。領(lǐng)班主管去弄的。剛才收到短信,一人分10塊,像吃了蒼蠅一樣...
    bbd77fe72ff7閱讀 308評論 1 0
  • 夜長安 火樹銀花 笙歌漫舞喧 今夜無眠 夢回千轉(zhuǎn) 攜一壺酒尋找 李白度年 但他 早已醉臥在曲江池畔 而杜甫 此刻在...
    王子少閱讀 161評論 0 0
  • 命運(yùn)是不公的,總是會以各種方式來捉弄人,當(dāng)我不斷變強(qiáng)時、耐抗時,它越發(fā)的變本加厲。上輩子我估計強(qiáng)奸了它,所以這輩子...
    沐府墓主閱讀 241評論 0 0

友情鏈接更多精彩內(nèi)容