2 years ago
#73969
Gorg
Image Captioning model predicts repeated word when using BERT as word embedding layer
I'm currently using this page's code to build image captioning model. The only different is I'm using BERT model's output as word embedding at Decoder part, therefore I have to change some of the original parameter like d_model, dff and target_vocab_size. The training went fine but when I tried to use evaluate function, the caption contained repeated words. Any help would be really appreciated. Below is my Decoder:
class Decoder(tf.keras.layers.Layer):
def __init__(self, num_layers, d_model, num_heads, dff, target_vocab_size,
maximum_position_encoding, rate=0.1):
super(Decoder, self).__init__()
self.d_model = d_model
self.num_layers = num_layers
self.embedding = TFBertModel.from_pretrained('bert-base-uncased')
self.pos_encoding = positional_encoding_1d(maximum_position_encoding, d_model)
self.dec_layers = [DecoderLayer(d_model, num_heads, dff, rate)
for _ in range(num_layers)]
self.dropout = tf.keras.layers.Dropout(rate)
def call(self, x, enc_output, training, look_ahead_mask=None, padding_mask=None):
seq_len = tf.shape(x)[1]
attention_weights = {}
bert_output = self.embedding(x) # (batch_size, target_seq_len, d_model)
x=bert_output[0]
x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
x += self.pos_encoding[:, :seq_len, :]
x = self.dropout(x, training=training)
for i in range(self.num_layers):
x, block1, block2 = self.dec_layers[i](x, enc_output, training,
look_ahead_mask, padding_mask)
attention_weights['decoder_layer{}_block1'.format(i+1)] = block1
attention_weights['decoder_layer{}_block2'.format(i+1)] = block2
return x, attention_weights
Here are my parameter:
num_layer = 12
d_model = 768
dff = 9216
num_heads = 12
row_size = 8
col_size = 8
target_vocab_size = 29690
dropout_rate = 0.1
deep-learning
bert-language-model
0 Answers
Your Answer