在线视频亚洲一区,久久久a区

```

#首先了解如何快速做一個(gè)列表

df4 = pd.DataFrame({'col1':['1',3],'col2':[2,4]},index=['a','b'])

#接下來我們來看如何處理臟數(shù)據(jù)

import pandas as pd

import numpy as np

df=pd.DataFrame({"id":["1","2","3,4"]})

def max_str(t):

? ? a=[int(i) for i in t]

? ? return max(a)

df["id_max"]=df["id"].str.split(",").map(max_str)

#騰訊廣告算法大賽為例子

##################################制作小樣本集##############################

###########################################################################

#第一步讀取數(shù)據(jù)

df=pd.read_table('C:/Users/fafa/Desktop/testA/user_data',sep = '\t',header=None,engine='python')

#增加列名

df.columns=['用戶ID','年齡','性別','地域','婚戀狀態(tài)','學(xué)歷','消費(fèi)能力','設(shè)備','工作狀態(tài)','連接類型','行為性趣']

#切分?jǐn)?shù)據(jù)，使用前2000條

df2=df.head(2000)

#導(dǎo)出切分好的數(shù)據(jù)

df2.to_excel('C:/Users/fafa/Desktop/testa/user_data.xls')

##################同理，制作其余的小樣本集##########

test=pd.read_table('C:/Users/fafa/Desktop/testA/test_sample.dat',sep = '\t',header=None,engine='python')

test.columns=['樣本id','廣告id','創(chuàng)建時(shí)間','素材尺寸','廣告行業(yè)id','商品類型','商品id','廣告賬戶id','投放時(shí)段','人群定向','出價(jià)(單位分)']

test.to_excel('C:/Users/fafa/Desktop/testa/測(cè)試數(shù)據(jù).xls')

test=pd.read_table('C:/Users/fafa/Desktop/testA/ad_operation.dat',sep = '\t',header=None,engine='python')

test.columns=['廣告id','創(chuàng)建/修改時(shí)間','操作類型','修改字段','操作后的字段']

df3=test.head(2000)

df3.to_excel('C:/Users/fafa/Desktop/testa/廣告操作數(shù)據(jù).xls')

df4=pd.read_table('C:/Users/fafa/Desktop/testA/ad_static_feature.out',sep = '\t',header=None,engine='python')

df4.columns=['廣告id','創(chuàng)建時(shí)間','廣告賬戶id','商品id','商品類型','廣告行業(yè)id','素材尺寸']

df5=df4.head(2000)

df5.to_excel('C:/Users/fafa/Desktop/testa/廣告靜態(tài)數(shù)據(jù).xls')

df=pd.read_table('C:/Users/fafa/Desktop/testA/想',sep = '\t',header=None,engine='python')

df.columns=['廣告請(qǐng)求id','廣告請(qǐng)求時(shí)間','廣告位id','用戶id','曝光廣告id','曝光廣告素材尺寸','曝光廣告出價(jià)bid','曝光廣告pctr','曝光廣告quality_ecpm','曝光廣告totalEcpm']

df=df.head(2000)

df.to_excel('C:/Users/fafa/Desktop/廣告曝光日志.xls')

###################################################

##########統(tǒng)計(jì)日志中廣告id的出現(xiàn)次數(shù)，并關(guān)聯(lián)其信息##########

###################################################

#讀取曝光日志

df=pd.read_excel('D:/mini數(shù)據(jù)集/曝光日志.xls',header=0)

#對(duì)"姓名" 計(jì)數(shù)，得到 “姓名”和“計(jì)數(shù)”兩列。

df.姓名.value_counts().to_frame().reset_index().rename({"index":"姓名","姓名":"計(jì)數(shù)"},axis=1)

#對(duì)"曝光廣告id" 計(jì)數(shù)，得到 “廣告id”和“曝光次數(shù)”兩列。

df1=df.曝光廣告id.value_counts().to_frame().reset_index().rename({"index":"廣告id","曝光廣告id":"曝光次數(shù)"},axis=1)df1.head()

#法二

# df['count'] = 1

#df.groupby('Name')['count'].agg('sum')

#關(guān)聯(lián)廣告其他屬性到曝光次數(shù) 表格

#讀取廣告其他屬性df2

df2=pd.read_excel('D:/mini數(shù)據(jù)集/廣告靜態(tài)數(shù)據(jù).xls',header=0)df2.head()

#關(guān)聯(lián)

df1.merge(df2,on="廣告id")

#刪除不需要的列

df4=df3.drop({'創(chuàng)建時(shí)間',"廣告賬戶id"},1)df4.head()

#修改列的位置

df=df[['廣告id', '商品id', '商品類型', '廣告行業(yè)id', '素材尺寸','曝光次數(shù)']]

#################################################

###########以下是直接調(diào)取大形數(shù)據(jù)制作訓(xùn)練集#########

#################################################

#第一步讀取數(shù)據(jù)df1=pd.read_table('C:/Users/fafa/Desktop/testA/totalExposureLog.out',sep = '\t',header=None,engine='python')df2=pd.read_table('C:/Users/fafa/Desktop/testA/ad_static_feature.out',sep = '\t',header=None,engine='python')

#增加列名

df1.columns=['廣告請(qǐng)求id','廣告請(qǐng)求時(shí)間','廣告位id','用戶id','曝光廣告id','曝光廣告素材尺寸','曝光廣告出價(jià)bid','曝光廣告pctr','曝光廣告quality_ecpm','曝光廣告totalEcpm']df2.columns=['廣告id','創(chuàng)建時(shí)間','廣告賬戶id','商品id','商品類型','廣告行業(yè)id','素材尺寸']#對(duì)"曝光廣告id" 計(jì)數(shù)，得到 “廣告id”和“曝光次數(shù)”兩列。

df3=df1.曝光廣告id.value_counts().to_frame().reset_index().rename({"index":"廣告id","曝光廣告id":"曝光次數(shù)"},axis=1) df1.head()

#關(guān)聯(lián)廣告其他屬性到曝光次數(shù) 表格

df3.merge(df2,on="廣告id")

#刪除不需要的列

df4=df3.drop({'創(chuàng)建時(shí)間',"廣告賬戶id"},1) df4.head()

#修改列的位置

df5=df4[['廣告id', '商品id', '商品類型', '廣告行業(yè)id', '素材尺寸','曝光次數(shù)']]

#發(fā)現(xiàn)id中存在臟數(shù)據(jù) 所以清理一下（方法見臟數(shù)據(jù)的清理）

def max_str(t):

? ? a=[int(i) for i in t]

? ? return max(a)

df5["廣告id"]=df5["廣告id"].str.split(",").map(max_str)

df5["商品id"]=df5["商品id"].str.split(",").map(max_str)

df5["廣告行業(yè)id"]=df5["廣告行業(yè)id"].str.split(",").map(max_str)

#令空值NaN為0

df5.fillna(0)

df5.head()

###最終得到的df5就是一個(gè)數(shù)據(jù)集合，最后一列是Y，其余列都是特征X（要注意這里df5是有列名的）。然后套用nn模板(或者Light gbm 代碼在此)來訓(xùn)練即可。

####nn模板如下####

# -*- coding: utf-8 -*-

import pandas as pd

import numpy as np

from keras import metrics

from keras.models import Sequential

from keras.layers import Dense

from keras.wrappers.scikit_learn import KerasClassifier

from sklearn.model_selection import KFold, cross_val_scoredataset=pd.read_csv('housing.csv',header=None)

X=dataset.iloc[:,0:13]

Y=dataset.iloc[:,13]

# print(Y)

seed=7

np.random.seed(seed)

# 建立模型

optimizer='adam'

init='normal'

model=Sequential()

model.add(Dense(units=13,activation='relu',input_dim=13,kernel_initializer=init))

#構(gòu)建更多的隱藏層

model.add(Dense(units=10,activation='relu',kernel_initializer=init))

model.add(Dense(units=1,kernel_initializer=init))

#輸出層不需要進(jìn)行激活函數(shù),預(yù)測(cè)回歸的話unit=1# 編譯模型

model.compile(loss='mse',optimizer=optimizer,metrics=['acc'])

model.fit(X.values,Y.values,epochs=30,batch_size=64)

```

數(shù)據(jù)分析基本過程

XGBOOST模型訓(xùn)練數(shù)據(jù)集

If you are interested in this topic.
You can get in touch with me.
18234056952(Tel? wechat? qq)

色偷偷精品伊人,欧洲久久精品,欧美综合婷婷骚逼,国产AV主播,国产最新探花在线,九色在线视频一区,伊人大交九欧美,1769亚洲,黄色成人av

數(shù)據(jù)預(yù)處理之——騰訊廣告算法大賽

數(shù)據(jù)預(yù)處理之——騰訊廣告算法大賽

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容

色偷偷精品伊人,欧洲久久精品,欧美综合婷婷骚逼,国产AV主播,国产最新探花在线,九色在线视频一区,伊人大交九 欧美,1769亚洲,黄色成人av

數(shù)據(jù)預(yù)處理之——騰訊廣告算法大賽

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容

色偷偷精品伊人,欧洲久久精品,欧美综合婷婷骚逼,国产AV主播,国产最新探花在线,九色在线视频一区,伊人大交九欧美,1769亚洲,黄色成人av