国产唐伯虎麻豆,亚洲插综合网

Cross--validation: evaluating estimator performance

出處：https://scikit-learn.org/stable/modules/cross_validation.html

import numpyas np

from sklearn.model_selectionimport train_test_split,cross_val_score

from sklearnimport svm,datasets

from sklearnimport preprocessing

'''官方文檔說明：https://scikit-learn.org/stable/modules/cross_validation.html'''

iris = datasets.load_iris()

iris.data.shape, iris.target.shape

#((150, 4), (150,))

'''交叉驗證-----法一：train_test_split'''

X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target,test_size=0.4,random_state=0)

X_train.shape, y_train.shape

#((90, 4), (90,))

X_test.shape, y_test.shape

#((60, 4), (60,))

clf = svm.SVC(kernel='linear',C=1).fit(X_train, y_train)

clf.score(X_test, y_test)

'''The simplest way to use cross-validation is to call the cross_val_score helper function

on the estimator and the dataset.'''

#交叉驗證-----法e二：cross_val_score? 喂入全部數(shù)據(jù) 通過cv設(shè)定

clf = svm.SVC(kernel='linear',C=1)

scores = cross_val_score(clf, iris.data, iris.target,cv=5,scoring='f1_macro')

#scoring='score'默認(rèn)

print(scores)

print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() *2))

#Accuracy: 0.98 (+/- 0.03)

'''對驗證集和訓(xùn)練集同時標(biāo)準(zhǔn)化處理'''

X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target,test_size=0.4,random_state=0)

scaler = preprocessing.StandardScaler().fit(X_train)

X_train_transformed = scaler.transform(X_train)

clf = svm.SVC(C=1).fit(X_train_transformed, y_train)

X_test_transformed = scaler.transform(X_test)

clf.score(X_test_transformed, y_test)#0.9333...

''' Pipeline 對標(biāo)準(zhǔn)化處理和交叉驗證同時進(jìn)行'''

from sklearn.pipelineimport make_pipeline

clf = make_pipeline(preprocessing.StandardScaler(), svm.SVC(C=1))

cross_val_score(clf, iris.data, iris.target,cv=5)

#-----------cross_validate----------

''' The cross_validate function and multiple metric evaluation:不同于cross_val_score,允許評估多個值'''

from sklearn.model_selectionimport cross_validate

from sklearn.metricsimport recall_score

scoring = ['precision_macro','recall_macro']

clf = svm.SVC(kernel='linear',C=1,random_state=0)

scores = cross_validate(clf, iris.data, iris.target,scoring=scoring,cv=5,return_train_score=False)

sorted(scores.keys())

#['fit_time', 'score_time', 'test_precision_macro', 'test_recall_macro']

print(scores['test_recall_macro'])

#--------Cross validation iterators------------

'''根據(jù)不同的交叉驗證策略生成數(shù)據(jù)集分割。應(yīng)用于獨(dú)立同分布'''

'''1.KFold--Note that KFold is not affected by classes or groups,of equal sizes (if possible)'''

from sklearn.model_selectionimport KFold

X = ["a","b","c","d"]

kf = KFold(n_splits=2)

for train, testin kf.split(X):

print("%s %s" % (train, test))

'''2.Repeated KFold--repeats Stratified K-Fold n times with different randomization in each repetition'''

from sklearn.model_selectionimport RepeatedKFold

X = np.array([[1,2], [3,4], [1,2], [3,4]])

random_state =12883823

rkf = RepeatedKFold(n_splits=2,n_repeats=2,random_state=random_state)

for train, testin rkf.split(X):

print("%s %s" % (train, test))

'''3.Each learning set is created by taking all the samples except one,

the test set being the sample left out.'''

'''作為一般規(guī)則，大多數(shù)作者和經(jīng)驗證據(jù)表明，5或10倍的交叉驗證應(yīng)該優(yōu)先于LOO。'''

from sklearn.model_selectionimport LeaveOneOut

X = [1,2,3,4]

loo = LeaveOneOut()

for train, testin loo.split(X):

print("%s %s" % (train, test))

'''Cross-validation iterators with stratification based on class labels樣本標(biāo)簽非平衡問題'''

'''1.StratifiedKFold is a variation of k-fold which returns stratified folds:

each set contains approximately the same percentage of samples of each target class as the complete set.'''

from sklearn.model_selectionimport StratifiedKFold

X = np.ones(10)

y = [0,0,0,0,1,1,1,1,1,1]

skf = StratifiedKFold(n_splits=3)

for train, testin skf.split(X, y):

print("%s %s" % (train, test))

#RepeatedStratifiedKFold :

#can be used to repeat Stratified K-Fold n times with different randomization in each repetition.

'''Cross-validation iterators for grouped data'''

'''GroupKFold is a variation of k-fold which

ensures that the same group is not represented in both testing and training sets. '''

from sklearn.model_selectionimport GroupKFold

X = [0.1,0.2,2.2,2.4,2.3,4.55,5.8,8.8,9,10]

y = ["a","b","b","b","c","c","c","d","d","d"]

groups = [1,1,1,2,2,2,3,3,3,3]

gkf = GroupKFold(n_splits=3)

for train, testin gkf.split(X, y,groups=groups):

print("%s %s" % (train, test))

色偷偷精品伊人,欧洲久久精品,欧美综合婷婷骚逼,国产AV主播,国产最新探花在线,九色在线视频一区,伊人大交九欧美,1769亚洲,黄色成人av

sklearn模型評估--Cross-validation

sklearn模型評估--Cross-validation

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容

色偷偷精品伊人,欧洲久久精品,欧美综合婷婷骚逼,国产AV主播,国产最新探花在线,九色在线视频一区,伊人大交九 欧美,1769亚洲,黄色成人av

sklearn模型評估--Cross-validation

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容

色偷偷精品伊人,欧洲久久精品,欧美综合婷婷骚逼,国产AV主播,国产最新探花在线,九色在线视频一区,伊人大交九欧美,1769亚洲,黄色成人av