run_tf.py 2.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
  1. import pycuda.gpuarray as gpuarray
  2. import pycuda.driver as cuda
  3. import pycuda.autoinit
  4. # get initial meomry before importing other libraries
  5. gpu_free, gpu_total = cuda.mem_get_info()
  6. gpu_used_0 = (gpu_total - gpu_free)
  7. import argparse
  8. import numpy as np
  9. import tensorflow as tf
  10. import time
  11. parser = argparse.ArgumentParser()
  12. parser.add_argument('frozen_graph', type=str)
  13. parser.add_argument('--batch_size', type=int, default=1)
  14. parser.add_argument('--num_runs', type=int, default=10)
  15. parser.add_argument('--allow_growth', action='store_true')
  16. args = parser.parse_args()
  17. # LOAD MODEL
  18. with open(args.frozen_graph, 'rb') as f:
  19. graph_def = tf.GraphDef()
  20. graph_def.ParseFromString(f.read())
  21. with tf.Graph().as_default() as graph:
  22. tf.import_graph_def(graph_def, name="")
  23. tf_config = tf.ConfigProto()
  24. tf_config.gpu_options.allow_growth = args.allow_growth # disable upfront memory allocation
  25. tf_config.allow_soft_placement = True
  26. if 'vgg_16' in args.frozen_graph:
  27. output_name = 'vgg_16/fc8/BiasAdd'
  28. elif 'vgg_19' in args.frozen_graph:
  29. output_name = 'vgg_19/fc8/BiasAdd'
  30. elif 'inception_v1' in args.frozen_graph:
  31. output_name = 'InceptionV1/Logits/SpatialSqueeze'
  32. elif 'inception_v2' in args.frozen_graph:
  33. output_name = 'InceptionV2/Logits/SpatialSqueeze'
  34. elif 'resnet_v1_50' in args.frozen_graph:
  35. output_name = 'resnet_v1_50/SpatialSqueeze'
  36. elif 'resnet_v1_101' in args.frozen_graph:
  37. output_name = 'resnet_v1_101/SpatialSqueeze'
  38. elif 'resnet_v1_152' in args.frozen_graph:
  39. output_name = 'resnet_v1_152/SpatialSqueeze'
  40. elif 'mobilenet_v1_1p0_224' in args.frozen_graph:
  41. output_name = 'MobilenetV1/Logits/SpatialSqueeze'
  42. else:
  43. raise RuntimeError('Could not find output name for model.')
  44. with tf.Session(config=tf_config, graph=graph) as tf_sess:
  45. tf_input = tf_sess.graph.get_tensor_by_name('input' + ':0')
  46. tf_output = tf_sess.graph.get_tensor_by_name(output_name + ':0')
  47. input = np.zeros((args.batch_size, 224, 224, 3)).astype(np.float32)
  48. for i in range(args.num_runs):
  49. output = tf_sess.run([tf_output], feed_dict={
  50. tf_input: input
  51. })[0]
  52. gpu_free, gpu_total = cuda.mem_get_info()
  53. gpu_used_1 = (gpu_total - gpu_free)
  54. print('%dMB GPU MEMORY DELTA' % ((gpu_used_1 - gpu_used_0) // 1e6))