inception_eval.py 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. # Copyright 2016 Google Inc. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. """A library to evaluate Inception on a single GPU.
  16. """
  17. from __future__ import absolute_import
  18. from __future__ import division
  19. from __future__ import print_function
  20. from datetime import datetime
  21. import math
  22. import os.path
  23. import time
  24. import numpy as np
  25. import tensorflow as tf
  26. from inception import image_processing
  27. from inception import inception_model as inception
  28. FLAGS = tf.app.flags.FLAGS
  29. tf.app.flags.DEFINE_string('eval_dir', '/tmp/imagenet_eval',
  30. """Directory where to write event logs.""")
  31. tf.app.flags.DEFINE_string('checkpoint_dir', '/tmp/imagenet_train',
  32. """Directory where to read model checkpoints.""")
  33. # Flags governing the frequency of the eval.
  34. tf.app.flags.DEFINE_integer('eval_interval_secs', 60 * 5,
  35. """How often to run the eval.""")
  36. tf.app.flags.DEFINE_boolean('run_once', False,
  37. """Whether to run eval only once.""")
  38. # Flags governing the data used for the eval.
  39. tf.app.flags.DEFINE_integer('num_examples', 50000,
  40. """Number of examples to run. Note that the eval """
  41. """ImageNet dataset contains 50000 examples.""")
  42. tf.app.flags.DEFINE_string('subset', 'validation',
  43. """Either 'validation' or 'train'.""")
  44. def _eval_once(saver, summary_writer, top_1_op, top_5_op, summary_op):
  45. """Runs Eval once.
  46. Args:
  47. saver: Saver.
  48. summary_writer: Summary writer.
  49. top_1_op: Top 1 op.
  50. top_5_op: Top 5 op.
  51. summary_op: Summary op.
  52. """
  53. with tf.Session() as sess:
  54. ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
  55. if ckpt and ckpt.model_checkpoint_path:
  56. if os.path.isabs(ckpt.model_checkpoint_path):
  57. # Restores from checkpoint with absolute path.
  58. saver.restore(sess, ckpt.model_checkpoint_path)
  59. else:
  60. # Restores from checkpoint with relative path.
  61. saver.restore(sess, os.path.join(FLAGS.checkpoint_dir,
  62. ckpt.model_checkpoint_path))
  63. # Assuming model_checkpoint_path looks something like:
  64. # /my-favorite-path/imagenet_train/model.ckpt-0,
  65. # extract global_step from it.
  66. global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
  67. print('Succesfully loaded model from %s at step=%s.' %
  68. (ckpt.model_checkpoint_path, global_step))
  69. else:
  70. print('No checkpoint file found')
  71. return
  72. # Start the queue runners.
  73. coord = tf.train.Coordinator()
  74. try:
  75. threads = []
  76. for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS):
  77. threads.extend(qr.create_threads(sess, coord=coord, daemon=True,
  78. start=True))
  79. num_iter = int(math.ceil(FLAGS.num_examples / FLAGS.batch_size))
  80. # Counts the number of correct predictions.
  81. count_top_1 = 0.0
  82. count_top_5 = 0.0
  83. total_sample_count = num_iter * FLAGS.batch_size
  84. step = 0
  85. print('%s: starting evaluation on (%s).' % (datetime.now(), FLAGS.subset))
  86. start_time = time.time()
  87. while step < num_iter and not coord.should_stop():
  88. top_1, top_5 = sess.run([top_1_op, top_5_op])
  89. count_top_1 += np.sum(top_1)
  90. count_top_5 += np.sum(top_5)
  91. step += 1
  92. if step % 20 == 0:
  93. duration = time.time() - start_time
  94. sec_per_batch = duration / 20.0
  95. examples_per_sec = FLAGS.batch_size / sec_per_batch
  96. print('%s: [%d batches out of %d] (%.1f examples/sec; %.3f'
  97. 'sec/batch)' % (datetime.now(), step, num_iter,
  98. examples_per_sec, sec_per_batch))
  99. start_time = time.time()
  100. # Compute precision @ 1.
  101. precision_at_1 = count_top_1 / total_sample_count
  102. recall_at_5 = count_top_5 / total_sample_count
  103. print('%s: precision @ 1 = %.4f recall @ 5 = %.4f [%d examples]' %
  104. (datetime.now(), precision_at_1, recall_at_5, total_sample_count))
  105. summary = tf.Summary()
  106. summary.ParseFromString(sess.run(summary_op))
  107. summary.value.add(tag='Precision @ 1', simple_value=precision_at_1)
  108. summary.value.add(tag='Recall @ 5', simple_value=recall_at_5)
  109. summary_writer.add_summary(summary, global_step)
  110. except Exception as e: # pylint: disable=broad-except
  111. coord.request_stop(e)
  112. coord.request_stop()
  113. coord.join(threads, stop_grace_period_secs=10)
  114. def evaluate(dataset):
  115. """Evaluate model on Dataset for a number of steps."""
  116. with tf.Graph().as_default():
  117. # Get images and labels from the dataset.
  118. images, labels = image_processing.inputs(dataset)
  119. # Number of classes in the Dataset label set plus 1.
  120. # Label 0 is reserved for an (unused) background class.
  121. num_classes = dataset.num_classes() + 1
  122. # Build a Graph that computes the logits predictions from the
  123. # inference model.
  124. logits, _ = inception.inference(images, num_classes)
  125. # Calculate predictions.
  126. top_1_op = tf.nn.in_top_k(logits, labels, 1)
  127. top_5_op = tf.nn.in_top_k(logits, labels, 5)
  128. # Restore the moving average version of the learned variables for eval.
  129. variable_averages = tf.train.ExponentialMovingAverage(
  130. inception.MOVING_AVERAGE_DECAY)
  131. variables_to_restore = variable_averages.variables_to_restore()
  132. saver = tf.train.Saver(variables_to_restore)
  133. # Build the summary operation based on the TF collection of Summaries.
  134. summary_op = tf.merge_all_summaries()
  135. graph_def = tf.get_default_graph().as_graph_def()
  136. summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir,
  137. graph_def=graph_def)
  138. while True:
  139. _eval_once(saver, summary_writer, top_1_op, top_5_op, summary_op)
  140. if FLAGS.run_once:
  141. break
  142. time.sleep(FLAGS.eval_interval_secs)