Get Even More Visitors To Your Blog, Upgrade To A Business Listing >>

Bidirectional Parrelized Gpt Model by Luminosity-e

import tensorflow as tf
from transformers import TFGPT2Model, GPT2Tokenizer
from tensorflow.keras.layers import LSTM, Dense, Dropout, TimeDistributed, Input, Bidirectional
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K

# Loading Pre-trained GPT2 and Tokenizer
gpt2_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
gpt2_model = TFGPT2Model.from_pretrained('gpt2')

# LSTM parameters
lstm_units = 128
vocab_size = gpt2_tokenizer.vocab_size
dropout_rate = 0.5

# Model Inputs
gpt2_inputs = Input(shape=(None,), dtype=tf.int32)
lstm_inputs = Input(shape=(None, lstm_units))

# GPT2 model
gpt2_outputs = gpt2_model(gpt2_inputs)[0]
gpt2_outputs_tiled = K.repeat_elements(gpt2_outputs, K.shape(lstm_inputs)[1], axis=1)
concatenated_inputs = tf.concat([lstm_inputs, gpt2_outputs_tiled], axis=-1)

# Dropout for regularization
dropout = Dropout(dropout_rate)(concatenated_inputs)

# LSTM model with Bidirectional
lstm = LSTM(lstm_units, return_sequences=True, return_state=True)
lstm_outputs, forward_state_h, forward_state_c, backward_state_h, backward_state_c = lstm(dropout)

# Reverse the backward LSTM outputs
backward_lstm_outputs = tf.reverse(lstm_outputs, [1])

# Concatenate forward and backward LSTM outputs
final_lstm_outputs = tf.concat([lstm_outputs, backward_lstm_outputs], axis=-1)

# TimeDistributed layer to apply Dense layer to each time step
outputs = TimeDistributed(Dense(vocab_size, activation='softmax'))(final_lstm_outputs)

# Final Model
model = Model(inputs=[gpt2_inputs, lstm_inputs], outputs=outputs)
model.compile(loss='categorical_crossentropy', optimizer='adam')




This post first appeared on A Day Dream Lived., please read the originial post: here

Share the post

Bidirectional Parrelized Gpt Model by Luminosity-e

×

Subscribe to A Day Dream Lived.

Get updates delivered right to your inbox!

Thank you for your subscription

×