開始學(xué)習(xí)RNN

首先從閱讀論文開始。

先后閱讀了如下文章

關(guān)于《A Critical Review of Recurrent Neural Networks for Sequence Learning》的閱讀理解

《Understanding LSTM Networks》——文章對 LSTM 結(jié)構(gòu)為什么這樣設(shè)計,做了一步步的推理解釋

關(guān)于《Supervised Sequence Labelling with Recurrent Neural Networks》的閱讀理解

……一些文章

然后是一些tensorflow實(shí)現(xiàn)RNN或LSTM的例子。

目前,把普通的神經(jīng)網(wǎng)絡(luò)改造成RNN的成果如下。對RNN用tensorflow實(shí)現(xiàn)的邏輯可以理順,但是實(shí)現(xiàn)起來有錯誤,提示維度不匹配。正在檢查原因。

import numpyas np

import pandasas pd

import tensorflowas tf

# 轉(zhuǎn)為onehot編碼

def turn_onehot(df):

for keyin df.columns:

oneHot = pd.get_dummies(df[key])

for oneHotKeyin oneHot.columns:# 防止重名

? ? ? ? ? ? oneHot = oneHot.rename(columns={oneHotKey: key +'_' +str(oneHotKey)})

df = df.drop(key,axis=1)

df = df.join(oneHot)

return df

# 獲取一批次的數(shù)據(jù)

def get_batch(x_date, y_date, batch):

global pointer

x_date_batch = x_date[pointer:pointer + batch]

y_date_batch = y_date[pointer:pointer + batch]

pointer = pointer + batch

return x_date_batch, y_date_batch

# 生成layer

def add_layer(input_num, output_num, x, layer, active=None):

# 生成權(quán)重

? ? with tf.name_scope('layer' + layer +'/W' + layer):

W = tf.Variable(tf.random_normal([2*input_num, output_num],dtype=tf.float32),name='W' + layer)

tf.summary.histogram('layer' + layer +'/W' + layer, W)

# 加入L2正則化

? ? ? ? if isregularization:

tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(lambda1)(W))

# 生成偏移量

? ? with tf.name_scope('layer' + layer +'/b' + layer):

b = tf.Variable(tf.zeros([output_num]) +0.1,dtype=tf.float32,name='b' + layer)

tf.summary.histogram('layer' + layer +'/b' + layer, b)

# 激活

? ? with tf.name_scope('layer' + layer +'/l' + layer):

l = active(tf.matmul(x, W) + b)# 使用sigmoid激活函數(shù),備用函數(shù)還有relu

? ? ? ? tf.summary.histogram('layer' + layer +'/l' + layer, l)

return l

hiddenDim =1000? # 隱藏層神經(jīng)元數(shù)

lambda1 =0.5? # 正則化超參數(shù)

save_file ='./train_model.ckpt'

pointer =0

time_step =1

istrain =True? # 啟用訓(xùn)練模式

istensorborad =False? # 啟用tensorboard

isregularization =False? # 啟用正則化

if istrain:

samples =2000

? ? batch =1? # 每批次的數(shù)據(jù)輸入數(shù)量

else:

samples =550

? ? batch =1? # 每批次的數(shù)據(jù)輸入數(shù)量

with tf.name_scope('inputdate-x-y'):

# 導(dǎo)入

? ? df = pd.DataFrame(pd.read_csv('GHMX.CSV',header=0))

# 產(chǎn)生 y_data 值 (n, 1)

? ? y_date = df['number'].values

y_date = y_date.reshape((-1,1))

# 產(chǎn)生 x_data 值 (n, 4+12+31+24)

? ? df = df.drop('number',axis=1)

df = turn_onehot(df)

x_data = df.values

###生成神經(jīng)網(wǎng)絡(luò)模型

# 占位符

with tf.name_scope('inputs'):

x = tf.placeholder(tf.float32,shape=[None, time_step,71],name='x_input')

y_ = tf.placeholder(tf.float32,shape=[None,1],name='y_input')

keep_prob = tf.placeholder(tf.float32,name='keep_prob')

# 生成神經(jīng)網(wǎng)絡(luò)

lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=71,forget_bias=1.0,state_is_tuple=True)

lstm_cell = tf.nn.rnn_cell.DropoutWrapper(cell=lstm_cell,input_keep_prob=1.0,output_keep_prob=keep_prob)

mlstm_cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cellfor _in range(3)])

init_state = mlstm_cell.zero_state(batch,dtype=tf.float32)

outputs, date = tf.nn.dynamic_rnn(mlstm_cell,inputs=x,initial_state=init_state,time_major=False)

h_date= outputs[:, -1, :]

y = add_layer(71,1, h_date,'1', tf.nn.relu)

# 計算loss

with tf.name_scope('loss'):

# loss = tf.reduce_mean(tf.reduce_sum(tf.square(y - y_), name='square'), name='loss')? #損失函數(shù),損失不下降,換用別的函數(shù)

# loss = -tf.reduce_sum(y_*tf.log(y))? #損失仍然不下降

# loss = -tf.reduce_sum(y_*tf.log(tf.clip_by_value(y,1e-10,1.0)) , name='loss')

? ? loss = tf.losses.mean_squared_error(labels=y_,predictions=y)

#tf.add_to_collection('losses', mse_loss)? # 損失集合

#loss = tf.add_n(tf.get_collection('losses'))

? ? tf.summary.scalar('loss', loss)

# 梯度下降

with tf.name_scope('train_step'):

train_step = tf.train.GradientDescentOptimizer(0.0005).minimize(loss)# 有效的學(xué)習(xí)率0.000005

# 初始化

init = tf.global_variables_initializer()

sess = tf.Session()

if istensorborad:

merged = tf.summary.merge_all()

writer = tf.summary.FileWriter('logs/', sess.graph)

sess.run(init)

# 保存/讀取模型

saver = tf.train.Saver()

if not istrain:

saver.restore(sess, save_file)

for iin range(samples):

x_date_batch, y_date_batch = get_batch(x_data, y_date, batch)

feed_dict = {x: x_date_batch, y_: y_date_batch, keep_prob:1.0}

if istrain:

_, loss_value, y_value, y__value = sess.run((train_step, loss, y, y_),feed_dict=feed_dict)

print('y=', y_value,'----ture=', y__value)

print(loss_value)

else:

loss_value, y_value, y__value = sess.run((loss, y, y_),feed_dict=feed_dict)

print('y=', y_value,'----ture=', y__value)

print(loss_value)

if istensorborad:

result = sess.run(merged,feed_dict=feed_dict)

writer.add_summary(result, i)

# 保存模型

if istrain:

saver.save(sess, save_file)

?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時請結(jié)合常識與多方信息審慎甄別。
平臺聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點(diǎn),簡書系信息發(fā)布平臺,僅提供信息存儲服務(wù)。

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容