代碼
import pandas as pd
import xgboost as xgb
import operator
def get_data():
train = pd.read_csv("first_result2.csv")
#這里我只有12個(gè)特征
features = list(train.columns[:11])
y_train = train['target']
#數(shù)據(jù)缺失值補(bǔ)全
for feat in train.select_dtypes(include=['object']).columns:
m = train.groupby([feat])['target'].mean()
train[feat].replace(m,inplace=True)
x_train = train[features]
return x_train, y_train
x_train, y_train = get_data()
#這里的參數(shù)自己改
xgb_params = {'booster':'gbtree','objective': 'binary:logistic', "eta": 0.01, "max_depth": 5, "silent": 0,"colsample_bytree":0.7}
num_rounds = 1000
dtrain = xgb.DMatrix(x_train, label=y_train)
gbdt = xgb.train(xgb_params, dtrain, num_rounds)
importance = gbdt.get_fscore()
importance = sorted(importance.items(), key=operator.itemgetter(1))
print importance
結(jié)果
[('gender', 578), ('is_sys', 1202), ('is_font_cem', 1448), ('is_sup_cem', 1507), ('ite_phone_num', 1669), ('is_dou_kard', 1729), ('is_auto', 1796), ('age', 2235), ('now_month', 2596), (' avg_flow', 2914), ('avr_cost', 4149)]
最后編輯于 :
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請(qǐng)聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時(shí)請(qǐng)結(jié)合常識(shí)與多方信息審慎甄別。
平臺(tái)聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點(diǎn),簡(jiǎn)書系信息發(fā)布平臺(tái),僅提供信息存儲(chǔ)服務(wù)。