123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260 |
- # Copyright 2016 Google Inc. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- """A library showing off sequence recognition and generation with the simple
- example of names.
- We use recurrent neural nets to learn complex functions able to recogize and
- generate sequences of a given form. This can be used for natural language
- syntax recognition, dynamically generating maps or puzzles and of course
- baby name generation.
- Before using this module, it is recommended to read the Tensorflow tutorial on
- recurrent neural nets, as it explains the basic concepts of this model, and
- will show off another module, the PTB module on which this model bases itself.
- Here is an overview of the functions available in this module:
- * RNN Module for sequence functions based on PTB
- * Name recognition specifically for recognizing names, but can be adapted to
- recognizing sequence patterns
- * Name generations specifically for generating names, but can be adapted to
- generating arbitrary sequence patterns
- """
- from __future__ import absolute_import
- from __future__ import division
- from __future__ import print_function
- import time
- import tensorflow as tf
- import numpy as np
- from model import NamignizerModel
- import data_utils
- class SmallConfig(object):
- """Small config."""
- init_scale = 0.1
- learning_rate = 1.0
- max_grad_norm = 5
- num_layers = 2
- num_steps = 20
- hidden_size = 200
- max_epoch = 4
- max_max_epoch = 13
- keep_prob = 1.0
- lr_decay = 0.5
- batch_size = 20
- vocab_size = 27
- epoch_size = 100
- class LargeConfig(object):
- """Medium config."""
- init_scale = 0.05
- learning_rate = 1.0
- max_grad_norm = 5
- num_layers = 2
- num_steps = 35
- hidden_size = 650
- max_epoch = 6
- max_max_epoch = 39
- keep_prob = 0.5
- lr_decay = 0.8
- batch_size = 20
- vocab_size = 27
- epoch_size = 100
- class TestConfig(object):
- """Tiny config, for testing."""
- init_scale = 0.1
- learning_rate = 1.0
- max_grad_norm = 1
- num_layers = 1
- num_steps = 2
- hidden_size = 2
- max_epoch = 1
- max_max_epoch = 1
- keep_prob = 1.0
- lr_decay = 0.5
- batch_size = 20
- vocab_size = 27
- epoch_size = 100
- def run_epoch(session, m, names, counts, epoch_size, eval_op, verbose=False):
- """Runs the model on the given data for one epoch
- Args:
- session: the tf session holding the model graph
- m: an instance of the NamignizerModel
- names: a set of lowercase names of 26 characters
- counts: a list of the frequency of the above names
- epoch_size: the number of batches to run
- eval_op: whether to change the params or not, and how to do it
- Kwargs:
- verbose: whether to print out state of training during the epoch
- Returns:
- cost: the average cost during the last stage of the epoch
- """
- start_time = time.time()
- costs = 0.0
- iters = 0
- for step, (x, y) in enumerate(data_utils.namignizer_iterator(names, counts,
- m.batch_size, m.num_steps, epoch_size)):
- cost, _ = session.run([m.cost, eval_op],
- {m.input_data: x,
- m.targets: y,
- m.weights: np.ones(m.batch_size * m.num_steps)})
- costs += cost
- iters += m.num_steps
- if verbose and step % (epoch_size // 10) == 9:
- print("%.3f perplexity: %.3f speed: %.0f lps" %
- (step * 1.0 / epoch_size, np.exp(costs / iters),
- iters * m.batch_size / (time.time() - start_time)))
- if step >= epoch_size:
- break
- return np.exp(costs / iters)
- def train(data_dir, checkpoint_path, config):
- """Trains the model with the given data
- Args:
- data_dir: path to the data for the model (see data_utils for data
- format)
- checkpoint_path: the path to save the trained model checkpoints
- config: one of the above configs that specify the model and how it
- should be run and trained
- Returns:
- None
- """
- # Prepare Name data.
- print("Reading Name data in %s" % data_dir)
- names, counts = data_utils.read_names(data_dir)
- with tf.Graph().as_default(), tf.Session() as session:
- initializer = tf.random_uniform_initializer(-config.init_scale,
- config.init_scale)
- with tf.variable_scope("model", reuse=None, initializer=initializer):
- m = NamignizerModel(is_training=True, config=config)
- tf.global_variables_initializer().run()
- for i in range(config.max_max_epoch):
- lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0)
- m.assign_lr(session, config.learning_rate * lr_decay)
- print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))
- train_perplexity = run_epoch(session, m, names, counts, config.epoch_size, m.train_op,
- verbose=True)
- print("Epoch: %d Train Perplexity: %.3f" %
- (i + 1, train_perplexity))
- m.saver.save(session, checkpoint_path, global_step=i)
- def namignize(names, checkpoint_path, config):
- """Recognizes names and prints the Perplexity of the model for each names
- in the list
- Args:
- names: a list of names in the model format
- checkpoint_path: the path to restore the trained model from, should not
- include the model name, just the path to
- config: one of the above configs that specify the model and how it
- should be run and trained
- Returns:
- None
- """
- with tf.Graph().as_default(), tf.Session() as session:
- with tf.variable_scope("model"):
- m = NamignizerModel(is_training=False, config=config)
- m.saver.restore(session, checkpoint_path)
- for name in names:
- x, y = data_utils.name_to_batch(name, m.batch_size, m.num_steps)
- cost, loss, _ = session.run([m.cost, m.loss, tf.no_op()],
- {m.input_data: x,
- m.targets: y,
- m.weights: np.concatenate((
- np.ones(len(name)), np.zeros(m.batch_size * m.num_steps - len(name))))})
- print("Name {} gives us a perplexity of {}".format(
- name, np.exp(cost)))
- def namignator(checkpoint_path, config):
- """Generates names randomly according to a given model
- Args:
- checkpoint_path: the path to restore the trained model from, should not
- include the model name, just the path to
- config: one of the above configs that specify the model and how it
- should be run and trained
- Returns:
- None
- """
- # mutate the config to become a name generator config
- config.num_steps = 1
- config.batch_size = 1
- with tf.Graph().as_default(), tf.Session() as session:
- with tf.variable_scope("model"):
- m = NamignizerModel(is_training=False, config=config)
- m.saver.restore(session, checkpoint_path)
- activations, final_state, _ = session.run([m.activations, m.final_state, tf.no_op()],
- {m.input_data: np.zeros((1, 1)),
- m.targets: np.zeros((1, 1)),
- m.weights: np.ones(1)})
- # sample from our softmax activations
- next_letter = np.random.choice(27, p=activations[0])
- name = [next_letter]
- while next_letter != 0:
- activations, final_state, _ = session.run([m.activations, m.final_state, tf.no_op()],
- {m.input_data: [[next_letter]],
- m.targets: np.zeros((1, 1)),
- m.initial_state: final_state,
- m.weights: np.ones(1)})
- next_letter = np.random.choice(27, p=activations[0])
- name += [next_letter]
- print(map(lambda x: chr(x + 96), name))
- if __name__ == "__main__":
- train("data/SmallNames.txt", "model/namignizer", SmallConfig)
- namignize(["mary", "ida", "gazorbazorb", "mmmhmm", "bob"],
- tf.train.latest_checkpoint("model"), SmallConfig)
- namignator(tf.train.latest_checkpoint("model"), SmallConfig)
|