隨機(jī)森林優(yōu)化demo

參考:https://blog.csdn.net/y0367/article/details/51501780

import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
train = pd.read_csv("train.csv", dtype={"Age": np.float64},)
print(train.head(10))

print(len(train))

def harmonize_data(titanic):
    # 填充空數(shù)據(jù) 和 把string數(shù)據(jù)轉(zhuǎn)成integer表示
    # 對(duì)于年齡字段發(fā)生缺失,我們用所有年齡的均值替代
    titanic["Age"] = titanic["Age"].fillna(titanic["Age"].median())
    # 性別男: 用0替代
    titanic.loc[titanic["Sex"] == "male", "Sex"] = 0
    # 性別女: 用1替代
    titanic.loc[titanic["Sex"] == "female", "Sex"] = 1

    titanic["Embarked"] = titanic["Embarked"].fillna("S")

    titanic.loc[titanic["Embarked"] == "S", "Embarked"] = 0
    titanic.loc[titanic["Embarked"] == "C", "Embarked"] = 1
    titanic.loc[titanic["Embarked"] == "Q", "Embarked"] = 2

    titanic["Fare"] = titanic["Fare"].fillna(titanic["Fare"].median())

    return titanic

train_data = harmonize_data(train)

# 列出對(duì)生存結(jié)果有影響的字段
predictors = ["Pclass", "Sex", "Age", "SibSp", "Parch", "Fare", "Embarked"]
# 存放不同參數(shù)取值,以及對(duì)應(yīng)的精度,每一個(gè)元素都是一個(gè)三元組(a, b, c)
results = []
# 最小葉子結(jié)點(diǎn)的參數(shù)取值
sample_leaf_options = list(range(1, 500, 3))
# 決策樹(shù)個(gè)數(shù)參數(shù)取值
n_estimators_options = list(range(1, 1000, 5))
groud_truth = train_data['Survived'][601:]

for leaf_size in sample_leaf_options:
    for n_estimators_size in n_estimators_options:
        alg = RandomForestClassifier(min_samples_leaf=leaf_size, n_estimators=n_estimators_size, random_state=50)
        alg.fit(train_data[predictors][:600], train_data['Survived'][:600])
        predict = alg.predict(train_data[predictors][601:])
        # 用一個(gè)三元組,分別記錄當(dāng)前的 min_samples_leaf,n_estimators, 和在測(cè)試數(shù)據(jù)集上的精度
        results.append((leaf_size, n_estimators_size, (groud_truth == predict).mean()))
        # 真實(shí)結(jié)果和預(yù)測(cè)結(jié)果進(jìn)行比較,計(jì)算準(zhǔn)確率
        print((groud_truth == predict).mean())

# 打印精度最大的那一個(gè)三元組
print(max(results, key=lambda x: x[2]))
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請(qǐng)聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時(shí)請(qǐng)結(jié)合常識(shí)與多方信息審慎甄別。
平臺(tái)聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點(diǎn),簡(jiǎn)書(shū)系信息發(fā)布平臺(tái),僅提供信息存儲(chǔ)服務(wù)。

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容