常用深度學(xué)習(xí)模型及網(wǎng)絡(luò)構(gòu)建

本文模型

定一個(gè)attention后面會(huì)用到

#自定義注意力層
from tf.keras import initializers, constraints,activations,regularizers
from tf.keras import backend as K
from tf.keras.layers import Layer
class Attention(Layer):
    #返回值:返回的不是attention權(quán)重,而是每個(gè)timestep乘以權(quán)重后相加得到的向量。
    #輸入:輸入是rnn的timesteps,也是最長(zhǎng)輸入序列的長(zhǎng)度
    def __init__(self, step_dim,
                 W_regularizer=None, b_regularizer=None,
                 W_constraint=None, b_constraint=None,
                 bias=True, **kwargs):
        self.supports_masking = True
        self.init = initializers.get('glorot_uniform')
 
        self.W_regularizer = regularizers.get(W_regularizer)
        self.b_regularizer = regularizers.get(b_regularizer)
 
        self.W_constraint = constraints.get(W_constraint)
        self.b_constraint = constraints.get(b_constraint)
 
        self.bias = bias
        self.step_dim = step_dim
        self.features_dim = 0
        super(Attention, self).__init__(**kwargs)
 
    def build(self, input_shape):
        assert len(input_shape) == 3
 
        self.W = self.add_weight(shape=(input_shape[-1],),initializer=self.init,name='{}_W'.format(self.name),
                                 regularizer=self.W_regularizer,constraint=self.W_constraint)
        self.features_dim = input_shape[-1]
 
        if self.bias:
            self.b = self.add_weight(shape=(input_shape[1],),initializer='zero', name='{}_b'.format(self.name),
                                     regularizer=self.b_regularizer,constraint=self.b_constraint)
        else:
            self.b = None
        self.built = True
 
    def compute_mask(self, input, input_mask=None):
        return None     ## 后面的層不需要mask了,所以這里可以直接返回none
 
    def call(self, x, mask=None):
        features_dim = self.features_dim    ## 這里應(yīng)該是 step_dim是我們指定的參數(shù),它等于input_shape[1],也就是rnn的timesteps
        step_dim = self.step_dim
        
        # 輸入和參數(shù)分別reshape再點(diǎn)乘后,tensor.shape變成了(batch_size*timesteps, 1),之后每個(gè)batch要分開(kāi)進(jìn)行歸一化
         # 所以應(yīng)該有 eij = K.reshape(..., (-1, timesteps))
 
        eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)),K.reshape(self.W, (features_dim, 1))), (-1, step_dim))
        if self.bias:
            eij += self.b        
        eij = K.tanh(eij)    #RNN一般默認(rèn)激活函數(shù)為tanh, 對(duì)attention來(lái)說(shuō)激活函數(shù)差別不大,因?yàn)橐鰏oftmax
        a = K.exp(eij)
        if mask is not None:    ## 如果前面的層有mask,那么后面這些被mask掉的timestep肯定是不能參與計(jì)算輸出的,也就是將他們attention權(quán)重設(shè)為0
            a *= K.cast(mask, K.floatx())   ## cast是做類(lèi)型轉(zhuǎn)換,keras計(jì)算時(shí)會(huì)檢查類(lèi)型,可能是因?yàn)橛胓pu的原因
 
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())
        a = K.expand_dims(a)      # a = K.expand_dims(a, axis=-1) , axis默認(rèn)為-1, 表示在最后擴(kuò)充一個(gè)維度。比如shape = (3,)變成 (3, 1)
        ## 此時(shí)a.shape = (batch_size, timesteps, 1), x.shape = (batch_size, timesteps, units)
        weighted_input = x * a    
        # weighted_input的shape為 (batch_size, timesteps, units), 每個(gè)timestep的輸出向量已經(jīng)乘上了該timestep的權(quán)重
        # weighted_input在axis=1上取和,返回值的shape為 (batch_size, 1, units)
        return K.sum(weighted_input, axis=1)
 
    def compute_output_shape(self, input_shape):    ## 返回的結(jié)果是c,其shape為 (batch_size, units)
        return input_shape[0],  self.features_dim
 

常用的文本模型

def build_model(top_words=top_words,max_words=max_words,num_labels=num_labels,mode='LSTM',hidden_dim=[32]):
    if mode=='RNN':
        model = Sequential()
        model.add(Embedding(top_words, 32, input_length=max_words))
        model.add(Dropout(0.25))
        model.add(SimpleRNN(32))  
        model.add(Dropout(0.25))   
        model.add(Dense(num_labels, activation="softmax"))
    elif mode=='MLP':
        model = Sequential()
        model.add(Embedding(top_words, 32, input_length=max_words))
        model.add(Dropout(0.25))
        model.add(Flatten())
        model.add(Dense(256, activation="relu"))  
        model.add(Dropout(0.25))   
        model.add(Dense(num_labels, activation="softmax"))
    elif mode=='LSTM':
        model = Sequential()
        model.add(Embedding(top_words, 32, input_length=max_words))
        model.add(Dropout(0.25))
        model.add(LSTM(32))
        model.add(Dropout(0.25))   
        model.add(Dense(num_labels, activation="softmax"))
    elif mode=='GRU':
        model = Sequential()
        model.add(Embedding(top_words, 32, input_length=max_words))
        model.add(Dropout(0.25))
        model.add(GRU(32))
        model.add(Dropout(0.25))   
        model.add(Dense(num_labels, activation="softmax"))
    elif mode=='CNN':        #一維卷積
        model = Sequential()
        model.add(Embedding(top_words, 32, input_length=max_words))
        model.add(Dropout(0.25))
        model.add(Conv1D(filters=32, kernel_size=3, padding="same",activation="relu"))
        model.add(MaxPooling1D(pool_size=2))
        model.add(Flatten())
        model.add(Dense(256, activation="relu"))
        model.add(Dropout(0.25))   
        model.add(Dense(num_labels, activation="softmax"))
    elif mode=='CNN+LSTM':
        model = Sequential()
        model.add(Embedding(top_words, 32, input_length=max_words))
        model.add(Dropout(0.25))    
        model.add(Conv1D(filters=32, kernel_size=3, padding="same",activation="relu"))
        model.add(MaxPooling1D(pool_size=2))
        model.add(LSTM(64))
        model.add(Dropout(0.25))   
        model.add(Dense(num_labels, activation="softmax"))
    elif mode=='BiLSTM':
        model = Sequential()
        model.add(Embedding(top_words, 32, input_length=max_words))
        model.add(Bidirectional(LSTM(64)))
        model.add(Dense(128, activation='relu'))
        model.add(Dropout(0.25))
        model.add(Dense(num_labels, activation='softmax'))
    #下面的網(wǎng)絡(luò)采用Funcional API實(shí)現(xiàn)
    elif mode=='TextCNN':
        inputs = Input(name='inputs',shape=[max_words,], dtype='float64')
        ## 詞嵌入使用預(yù)訓(xùn)練的詞向量
        layer = Embedding(top_words, 32, input_length=max_words, trainable=False)(inputs)
        ## 詞窗大小分別為3,4,5
        cnn1 = Conv1D(32, 3, padding='same', strides = 1, activation='relu')(layer)
        cnn1 = MaxPooling1D(pool_size=2)(cnn1)
        cnn2 = Conv1D(32, 4, padding='same', strides = 1, activation='relu')(layer)
        cnn2 = MaxPooling1D(pool_size=2)(cnn2)
        cnn3 = Conv1D(32, 5, padding='same', strides = 1, activation='relu')(layer)
        cnn3 = MaxPooling1D(pool_size=2)(cnn3)
        # 合并三個(gè)模型的輸出向量
        cnn = concatenate([cnn1,cnn2,cnn3], axis=-1)
        flat = Flatten()(cnn) 
        drop = Dropout(0.2)(flat)
        main_output = Dense(num_labels, activation='softmax')(drop)
        model = Model(inputs=inputs, outputs=main_output)
        
    elif mode=='Attention':
        inputs = Input(name='inputs',shape=[max_words,], dtype='float64')
        layer = Embedding(top_words, 32, input_length=max_words, trainable=False)(inputs)
        attention_probs = Dense(32, activation='softmax', name='attention_vec')(layer)
        attention_mul =  Multiply()([layer, attention_probs])
        mlp = Dense(64)(attention_mul) #原始的全連接
        fla=Flatten()(mlp)
        output = Dense(num_labels, activation='softmax')(fla)
        model = Model(inputs=[inputs], outputs=output)  
    elif mode=='Attention*3':
        inputs = Input(name='inputs',shape=[max_words,], dtype='float64')
        layer = Embedding(top_words, 32, input_length=max_words, trainable=False)(inputs)
        attention_probs = Dense(32, activation='softmax', name='attention_vec')(layer)
        attention_mul =  Multiply()([layer, attention_probs])
        mlp = Dense(32,activation='relu')(attention_mul) 
        attention_probs = Dense(32, activation='softmax', name='attention_vec1')(mlp)
        attention_mul =  Multiply()([mlp, attention_probs])
        mlp2 = Dense(32,activation='relu')(attention_mul) 
        attention_probs = Dense(32, activation='softmax', name='attention_vec2')(mlp2)
        attention_mul =  Multiply()([mlp2, attention_probs])
        mlp3 = Dense(32,activation='relu')(attention_mul)           
        fla=Flatten()(mlp3)
        output = Dense(num_labels, activation='softmax')(fla)
        model = Model(inputs=[inputs], outputs=output)      
        
    elif mode=='BiLSTM+Attention':
        inputs = Input(name='inputs',shape=[max_words,], dtype='float64')
        layer = Embedding(top_words, 32, input_length=max_words, trainable=False)(inputs)
        bilstm = Bidirectional(LSTM(64, return_sequences=True))(layer)  #參數(shù)保持維度3
        bilstm = Bidirectional(LSTM(64, return_sequences=True))(bilstm)
        layer = Dense(256, activation='relu')(bilstm)
        layer = Dropout(0.2)(layer)
        ## 注意力機(jī)制 
        attention = Attention(step_dim=max_words)(layer)
        layer = Dense(128, activation='relu')(attention)
        output = Dense(num_labels, activation='softmax')(layer)
        model = Model(inputs=inputs, outputs=output)  
        
    elif mode=='BiGRU+Attention':
        inputs = Input(name='inputs',shape=[max_words,], dtype='float64')
        layer = Embedding(top_words, 32, input_length=max_words, trainable=False)(inputs)
        attention_probs = Dense(32, activation='softmax', name='attention_vec')(layer)
        attention_mul =  Multiply()([layer, attention_probs])
        mlp = Dense(64,activation='relu')(attention_mul) #原始的全連接
        #bat=BatchNormalization()(mlp)
        #act=Activation('relu')
        gru=Bidirectional(GRU(32))(mlp)
        mlp = Dense(16,activation='relu')(gru)
        output = Dense(num_labels, activation='softmax')(mlp)
        model = Model(inputs=[inputs], outputs=output) 
        
    model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
    return model
 
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請(qǐng)聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時(shí)請(qǐng)結(jié)合常識(shí)與多方信息審慎甄別。
平臺(tái)聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點(diǎn),簡(jiǎn)書(shū)系信息發(fā)布平臺(tái),僅提供信息存儲(chǔ)服務(wù)。

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容