|
|
@@ -108,13 +108,16 @@ class PTBModel(object):
|
|
|
# Slightly better results can be obtained with forget gate biases
|
|
|
# initialized to 1 but the hyperparameters of the model would need to be
|
|
|
# different than reported in the paper.
|
|
|
- lstm_cell = tf.contrib.rnn.BasicLSTMCell(
|
|
|
- size, forget_bias=0.0, state_is_tuple=True)
|
|
|
+ def lstm_cell():
|
|
|
+ return tf.contrib.rnn.BasicLSTMCell(
|
|
|
+ size, forget_bias=0.0, state_is_tuple=True)
|
|
|
+ attn_cell = lstm_cell
|
|
|
if is_training and config.keep_prob < 1:
|
|
|
- lstm_cell = tf.contrib.rnn.DropoutWrapper(
|
|
|
- lstm_cell, output_keep_prob=config.keep_prob)
|
|
|
+ def attn_cell():
|
|
|
+ return tf.contrib.rnn.DropoutWrapper(
|
|
|
+ lstm_cell(), output_keep_prob=config.keep_prob)
|
|
|
cell = tf.contrib.rnn.MultiRNNCell(
|
|
|
- [lstm_cell] * config.num_layers, state_is_tuple=True)
|
|
|
+ [attn_cell() for _ in range(config.num_layers)], state_is_tuple=True)
|
|
|
|
|
|
self._initial_state = cell.zero_state(batch_size, data_type())
|
|
|
|